def __init__(self, database, language="english", filename="english_lang_cds.txt"):
     self.database = database
     self.language = language
 
     # load decompression DLL
     decompDLL = ctypes.CDLL("riftdecomp.dll")
 
     stream = BytesIO()
     self.database.extractByNameToMemory("lang_" + language + ".cds", stream) 
     # seek to start
     stream.seek(0)
    
     dis = LittleEndianDataInputStream(stream)
     
     entryCount =  dis.read_int();
     
     # read the frequency table
     frequencyTable = stream.read(1024)
     
     print("entryCount:" + str(entryCount))  
     
     # not sure what these are
     for i in range(0, entryCount):
             key = stream.read(4)
             value = readUnsignedLeb128(stream)
     
     f = open(filename, "w", encoding='UTF-8')   
     
     for i in range(0, entryCount):   
         compressedSize = readUnsignedLeb128(stream)
         uncompressedSize = readUnsignedLeb128(stream)
         entryData = stream.read(compressedSize)
         
         # create a buffer to decompress into
         outputData = ctypes.create_string_buffer(uncompressedSize)
         # call a DLL to do the actual decompress. The ASM code to decompress was too complicated to reverse engineer, so I just
         # took the code and put it into a DLL 
         decompDLL.decompressData(frequencyTable, entryData, compressedSize, outputData, uncompressedSize)
         
         # And the results are in!
         
         # - The first 10 bytes we don't know, they seem to be the same between files though?
         buffer = BytesIO(outputData.raw)
         buffer.read(10)
         # - Then a LEB128 with length of string
         strLength = readUnsignedLeb128(buffer)
         # - Then string
         finalStringBytes = buffer.read(strLength)
         finalString = finalStringBytes.decode("utf-8")
         # print("doing entry: " + str(i) + ", length[" + str(strLength) + "]:" + finalString.encode(sys.stdout.encoding, errors="replace").decode(sys.stdout.encoding))
         
         print(finalString,file=f)
         
         
        
     f.close()    
Beispiel #2
0
def readCodeAndExtract(stream, indent):
    value = readUnsignedLeb128(stream)
    if value == 0:
        return None
    codeResult = splitCode(value)
    log("READ result " + str(codeResult), indent)
    return codeResult
def decompress(frequencyTable, valueData):
    buffer = BytesIO(valueData)
    uncompressedSize = readUnsignedLeb128(buffer)
    compressedSize = len(valueData) - buffer.tell()
    # create a buffer to decompress into
    inputData = buffer.read(compressedSize)
    outputData = ctypes.create_string_buffer(uncompressedSize)
    decompDLL.decompressData(frequencyTable, inputData, compressedSize,
                             outputData, uncompressedSize)
    return BytesIO(outputData.raw)
Beispiel #4
0
def readCodeThenReadTwice(stream, indent):
    value = readUnsignedLeb128(stream)
    if value == 0:
        return None

    codeResult = splitCode(value)
    if codeResult == None:
        return None
    codeA = codeResult.code
    codeResultB = splitCode(codeResult.data)
    codeB = codeResultB.code
    return CodeResult2(codeA, codeB, codeResultB.data)
Beispiel #5
0
def parse(file):
    file = open(file, "rb")

    stream = LittleEndianDataInputStream(file)

    classCode = readUnsignedLeb128(stream)
    print("Found class code:", classCode)

    i = 1
    done = False

    while not done:
        print("do member " + str(i))
        codeResult = readCodeAndExtract(stream, 1)
        done = not handleCode(stream, codeResult.code, 1)
        i = i + 1
    file.close()
Beispiel #6
0
def extractUnencryptedTelaraDB(unencryptedDBFilename, extractDirectory):

    print("Begin extracting of " + unencryptedDBFilename)
    # load decompression DLL
    decompDLL = ctypes.CDLL("riftdecomp.dll")

    conn = sqlite3.connect(unencryptedDBFilename)
    conn.row_factory = sqlite3.Row
    ds = conn.cursor()

    # DatasetID appears to be a "category" of sorts, with the datasetkey being subcategories
    # For example, dataset 7701 has different keys for different languages.
    # Guesses at some randomly chosen dataset id contents:
    # 83 - ability formulas
    # 84 - worlds? contains NIF references
    # 111 - Scene?
    # 114 - sound bank reference
    # 4307 - profanity block?
    # 7701 - EULA

    # In test mode only the first row for each datasetid will be extracted, disable it to extract more than one row per datasetid
    ###############
    #
    # WARNING:    BE AWARE IF YOU DISABLE TEST MODE WITHOUT CHANGING THE SQL EURY YOU WILL PULL **EVERY RECORD** FROM THE DATABASE.
    # WARNING:    THERE ARE 400,000+ AND MOST ARE UNDER 1KB.BE AWARE THAT your filesystem might not appreciate 400,000 1KB files suddenly appearing
    # WARNING:    You may wish to filter the first query by a specific datasetId, eg:
    # WARNING:    ds.execute('SELECT * from dataset where datasetId=?', (7701,))
    #
    ###############
    TEST_MODE = True
    test_mode_ids = set()

    ds.execute('SELECT * from dataset order by length(value) desc')
    while (1):
        rowA = ds.fetchone()
        if rowA == None:
            break

        dsc = conn.cursor()

        dsid = rowA["datasetId"]
        dskey = rowA["datasetKey"]
        dsname = rowA[
            "name"]  # some entries have a "name" that can be useful to identify, but often have funny characters in them so we can't use them directly

        if TEST_MODE:
            if dsid in test_mode_ids:
                continue
            test_mode_ids.add(dsid)

        dsc.execute("select * from dataset_compression where datasetid= ?",
                    (dsid, ))
        freqRow = dsc.fetchone()

        valueData = rowA['value']
        frequencyTable = freqRow["frequencies"]

        buffer = BytesIO(valueData)

        uncompressedSize = readUnsignedLeb128(buffer)
        compressedSize = len(valueData) - buffer.tell()

        # create a buffer to decompress into
        inputData = buffer.read(compressedSize)
        outputData = ctypes.create_string_buffer(uncompressedSize)
        decompDLL.decompressData(frequencyTable, inputData, compressedSize,
                                 outputData, uncompressedSize)

        # write our new data to
        f = open(os.path.join(extractDirectory + str(dsid) + "_" + str(dskey)),
                 "wb")
        f.write(outputData)
        f.close()

        dsc.close()

    ds.close()
Beispiel #7
0
def handleCode(stream, code, indent):

    if code == 0 or code == 1:
        log("handleCode:" + str(code) + ", boolean?", indent)
        log("bool? " + str(code), indent + 1)
        return True
    if code == 2:
        #some kind of float/double. Can be UP to 80 bits long
        #IEEE 754 extended precision format maybe?
        log("handleCode:" + str(code) + ", (1)float/double?", indent)
        i = 1
        while stream.read_byte() < 0 and i < 10:
            i = i + 1

        return True
    if code == 3:
        #some kind of float/double. Can be UP to 80 bits long
        #IEEE 754 extended precision format maybe?
        log("handleCode:" + str(code) + ", (1)float/double?", indent)
        i = 1
        while stream.read_byte() < 0 and i < 10:
            i = i + 1
        return True
    if code == 4:
        log("handleCode:" + str(code) + ", int?", indent)
        log("int:" + str(stream.read_int()), indent + 1)
        return True
    if code == 5:
        log("handleCode:" + str(code) + ", long?", indent)
        log("long:" + str(stream.read_long()), indent + 1)
        return True
    if code == 6:
        log("handleCode:" + str(code) + ", string/data", indent)
        slen = readUnsignedLeb128(stream)
        sstr = stream.read_string(slen)
        log("string:" + sstr, indent + 1)
        return True
    if code == 10 or code == 9:
        if code == 10:
            # a value then some kind of array?
            log("handleCode:" + str(code) + ", array?", indent)
            value = readUnsignedLeb128(stream)
            log("value:" + str(value), indent + 1)
            if value > 0xFFFF or value == 0:
                log("bad value code 10", indent + 1)
                return False
        log("handleCode:" + str(code) + ", array2?", indent)

        while True:
            rr = readCodeAndExtract(stream, indent + 1)
            if rr == None:
                break
            if rr.code == 8:
                return True
            if not handleCode(stream, rr.code, indent + 1):
                break
        log("overrun while code 9", indent + 1)
        return False
    if code == 11:
        log("handleCode:" + str(code) + ", array?", indent)
        result = readCodeAndExtract(stream, indent + 1)
        if result == None:
            log("bad result code 11", indent + 1)
            return False
        count = result.data
        if count == 0:
            return True
        i = 0
        log(
            "handle array count[" + str(count) + "], startingCode[" +
            str(result.code) + "]", indent + 1)
        while (handleCode(stream, result.code, indent + 1)):
            i = i + 1
            if (i >= count):
                return True
        log("overrun while code 11", indent + 1)
        return False
    if code == 12:
        log("handleCode:" + str(code) + ", array3?", indent)
        result = readCodeThenReadTwice(stream, indent + 1)

        count = result.count
        if count == 0:
            return True
        i = 0
        log("handle Multidimensional array?:" + str(result), indent + 1)
        while (handleCode(stream, result.codeA, indent + 1)
               and handleCode(stream, result.codeB, indent + 1)):
            i = i + 1
            if (i >= count):
                return True
        log("overrun while code 12", indent + 1)
        return False

    if code == 8:
        log("handleCode:" + str(code) + ", end of object", indent)
        return False

    log("UNKNOWN CODE:" + str(code))
    exit(1)