Example #1
0
    def dist(self, src, tar):
        """Return the NCD between two strings using LZSS compression.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            Compression distance

        Raises
        ------
        ValueError
            Install the PyLZSS module in order to use LZSS

        Examples
        --------
        >>> cmp = NCDlzss()
        >>> cmp.dist('cat', 'hat')
        0.75
        >>> cmp.dist('Niall', 'Neil')
        1.0
        >>> cmp.dist('aluminum', 'Catalan')
        1.0
        >>> cmp.dist('ATCG', 'TAGC')
        0.8


        .. versionadded:: 0.4.0

        """
        if src == tar:
            return 0.0

        if lzss is not None:
            src_comp = lzss.encode(src)
            tar_comp = lzss.encode(tar)
            concat_comp = lzss.encode(src + tar)
            concat_comp2 = lzss.encode(tar + src)
        else:  # pragma: no cover
            raise ValueError('Install the PyLZSS module in order to use LZSS')

        return (
            min(len(concat_comp), len(concat_comp2))
            - min(len(src_comp), len(tar_comp))
        ) / max(len(src_comp), len(tar_comp))
Example #2
0
    def dist(self, src, tar):
        """Return the NCD between two strings using LZSS compression.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            Compression distance

        Raises
        ------
        ValueError
            Install the PyLZSS module in order to use LZSS

        Examples
        --------
        >>> cmp = NCDlzss()
        >>> cmp.dist('cat', 'hat')
        0.75
        >>> cmp.dist('Niall', 'Neil')
        1.0
        >>> cmp.dist('aluminum', 'Catalan')
        1.0
        >>> cmp.dist('ATCG', 'TAGC')
        0.8


        .. versionadded:: 0.4.0

        """
        if src == tar:
            return 0.0

        if lzss is not None:
            src_comp = lzss.encode(src)
            tar_comp = lzss.encode(tar)
            concat_comp = lzss.encode(src + tar)
            concat_comp2 = lzss.encode(tar + src)
        else:  # pragma: no cover
            raise ValueError('Install the PyLZSS module in order to use LZSS')

        return (
            min(len(concat_comp), len(concat_comp2))
            - min(len(src_comp), len(tar_comp))
        ) / max(len(src_comp), len(tar_comp))
Example #3
0
 def pack(self, data):
     wvf = data["wvf"]
     wvf_md5 = hashlib.md5()
     wvf_md5.update(wvf)
     wvf_data = lzss.encode(wvf, ei=EI, ej=EJ)
     data.update(
         {
             "wvf_md5": wvf_md5.digest(),
             "wvf_full_length": len(wvf),
             "wvf_data_length": len(wvf_data),
             "wvf_data": wvf_data,
         }
     )
     return super(WfBlob, self).pack(data)
Example #4
0
def main(): 
    from struct import unpack, pack
    from array import array
    from decrypt import decrypt, encrypt, decryptPs2, encryptPs2
    from lzss import decode, encode
    import codecs
    from scriptgetter import ScriptGetter
    
    cpzFilename = ur"D:\eroge\明日の君と逢うために\data\pack\scriptback.cpz"
    newFilename = ur"D:\eroge\明日の君と逢うために\data\pack\script.cpz"
    
    relation_filename = ur"D:\data\workspace\asutame_text\etc\relation.txt"
    source_path = ur"D:\data\workspace\asutame_text\source" + u"\\"
    translated_path = ur"D:\data\workspace\asutame_text\已译" + u"\\"
    source_encoding = "shift-jis"
    translated_encoding = "gb18030"
    
    #解密文件头
    with open(cpzFilename, 'rb') as cpz:
        header = array('B')
        header.fromfile(cpz, 0x18)
        
        indexCount = unpack('L', header[4:8])[0] ^ 0x5E9C4F37
        header[4:8] = array('B' , pack('L', indexCount))
        
        indexLength = unpack('L', header[0x8:0xC])[0] ^ 0xF32AED17
        header[0x8:0xC] = array('B' , pack('L', indexLength))
                                
        keyMask = unpack('L', header[0x10:0x14])[0] ^ 0xDDDDDDDD
        header[0x10:0x14] = array('B' , pack('L', keyMask))
        
    with open(cpzFilename, 'rb') as cpz:
        fullHeader = array('B')
        fullHeader.fromfile(cpz, indexLength + 0x14)
        
        fullHeader[4:8] = array('B' , pack('L', indexCount))
        fullHeader[0x8:0xC] = array('B' , pack('L', indexLength))
        fullHeader[0x10:0x14] = array('B' , pack('L', keyMask))
    
    keyMask = keyMask ^ 0x7BF4A539
    #解密index
    delta = 12 - 20
    
    decrypt(fullHeader, 0x14, indexLength + 0x14, delta, keyMask)
    
    #循环提取文件
    newCpzHeader = array('B', fullHeader.tostring())
    newCpzContent = array('B')
    i = 0
    pos = 0x14
    while i < indexCount:
        itemIndexLength = unpack('L', fullHeader[pos:pos + 4])[0]
        itemLength = unpack('L', fullHeader[pos + 4:pos + 8])[0]
        itemOffset = unpack('L', fullHeader[pos + 8:pos + 0x0C])[0]
        itemFilename = fullHeader[pos + 0x18:pos + itemIndexLength].tostring().strip('\0')
            
        itemKeyMask = unpack('L', fullHeader[pos + 0x14:pos + 0x18])[0]
        itemKeyMask = itemKeyMask ^ 0xC7F5DA63
        with open(cpzFilename, 'rb') as cpz:
            cpz.seek(itemOffset + indexLength + 0x14, 0)
            item = array('B')
            item.fromfile(cpz, itemLength)
            
            getter = ScriptGetter(itemFilename + '.txt', relation_filename, source_path, translated_path, source_encoding, translated_encoding)
            
            #只封包relation中定义的文本
            if hasattr(getter, 'start_id'):
                skipcount = getter.start_id
                decrypt(item, 0, itemLength, 12, itemKeyMask)
                
                #解密&解压ps2
                
                decryptPs2(item, 0x30, itemLength - 0x30, unpack('L', item[0x0c:0x10])[0])
                itemHeader = item[0:0x30]
                itemContent = decode(item, 0x30, itemLength - 0x30)
                
                scriptLength = unpack('L', itemHeader[0x1C:0x20])[0]
                scriptOffset = len(itemContent) - scriptLength
                
                #读取txt文件,封入封包
                txtOffset = 0
                
                #新文本的偏移和长度
                textOffset = {}
                text = ''
                count = 1
                #<则不动的文本的序号297
    #            skipcount = 297
                #不跳过的第一条文本的原偏移。从这个位置开始放入新文本
                skipoffset = 0
                    
                with open(source_path + itemFilename + '.txt', 'rb') as inputTxtFile:
                    for t in inputTxtFile:
                        if t.startswith(';'):
                            continue
                        if len(t.strip()) == 0:
                            continue
                        if count < skipcount and t.find('=') >= 0:
                            count += 1
                            continue
                        eq = t.find('=')
                        line = t[eq + 1:].strip('\r\n') + '\0'
                        newline = getter.get_script(codecs.decode(t.split('=')[0], source_encoding))
                        if newline:
                            try:
    #                                temp = codecs.decode(t[eq + 1:].strip('\r\n'), 'shift-jis')
                                line = codecs.encode(newline, translated_encoding) + '\0'
                                pass
                            except UnicodeEncodeError:
                                print "ERROR: can't encode " + newline
                        
                        textOffset[count] = (len(text), len(line))
                        text += line
                        count += 1
                count = 1
                lastOffset = (0, 0)
                j = 0
                itemContentString = itemContent.tostring()
                scriptEnd = scriptOffset - 8
                while j < scriptEnd:
                    findJ = itemContentString.find('\x01\x02\x20\x01', j + 1)
                    if findJ >= 0:
                        j = findJ
                    else:
                        break
#                for j in xrange(0, scriptOffset - 8):
#                    if itemContent[j]+(itemContent[j + 1]<<8)+(itemContent[j + 2]<<16)+(itemContent[j + 3]<<24) == 0x01200201: #unpack 太慢
#                    if (itemContent[j] == 0x01) and (itemContent[j + 1] == 0x02) and (itemContent[j + 2] == 0x20) and (itemContent[j + 3] == 0x01): #上一行太慢
                        
                    sentenceOffset = unpack('L', itemContent[j + 4:j + 8])[0]
#                    sentence = itemContentString[scriptOffset + sentenceOffset:scriptOffset + sentenceOffset + 255].split('\0')[0]  #slow
                    sentenceEnd = itemContentString.find('\0', scriptOffset + sentenceOffset)
                    sentence = itemContentString[scriptOffset + sentenceOffset:sentenceEnd]
                    
                    if sentence == '':
                        if lastOffset[0] + lastOffset[1] - 1 + skipoffset>0:
                            itemContent[j + 4:j + 8] = array('B', pack('L', lastOffset[0] + lastOffset[1] - 1 + skipoffset))
                        #itemContent[j + 4:j + 8] = array('B', pack('L', 0))
                        continue
                    if count < skipcount:
                        count += 1
                        continue
                    if count == skipcount:
                        skipoffset = sentenceOffset
                    
                    itemContent[j + 4:j + 8] = array('B', pack('L', textOffset[count][0] + skipoffset))
                    lastOffset = textOffset[count]
                    count += 1
                        
                
                textArray = array('B', text)
                itemHeader[0x1C:0x20] = array('B', pack('L', len(textArray)))
                #itemHeader[0x28:0x2C] = array('B', pack('L', scriptOffset + skipoffset + len(textArray)))
                itemHeader[0x28:0x2C] = array('B', pack('L', scriptOffset * 2 + skipoffset + len(textArray)))
    
                #itemContent = itemContent[0:scriptOffset] + textArray
                itemContent = itemContent[0:scriptOffset + skipoffset] + textArray
        
                #压缩
                itemContent = encode(itemContent, 0, len(itemContent))
                item = itemHeader + itemContent
                encryptPs2(item, 0x30, len(itemContent), unpack('L', item[0x0c:0x10])[0])
                
                #加密
                encrypt(item, 0, len(itemContent) + 0x30, 12, itemKeyMask)
            
            itemOffset = len(newCpzContent)
            newCpzHeader[pos + 8:pos + 0x0C] = array('B', pack('L', itemOffset))
            
            itemLength = len(item)
            newCpzHeader[pos + 4:pos + 8] = array('B', pack('L', itemLength))
            
            #将此内容加入
            newCpzContent.fromstring(item.tostring())
            
            print itemFilename + ' added'
            
        pos += itemIndexLength
        i += 1
    
    
    delta = 12 - 20
    
    encrypt(newCpzHeader, 0x14, indexLength + 0x14, delta, keyMask)
    
    indexCount = unpack('L', newCpzHeader[4:8])[0] ^ 0x5E9C4F37
    newCpzHeader[4:8] = array('B' , pack('L', indexCount))
    
    indexLength = unpack('L', newCpzHeader[0x8:0xC])[0] ^ 0xF32AED17
    newCpzHeader[0x8:0xC] = array('B' , pack('L', indexLength))
                            
    keyMask = unpack('L', newCpzHeader[0x10:0x14])[0] ^ 0xDDDDDDDD
    newCpzHeader[0x10:0x14] = array('B' , pack('L', keyMask))
    
    with open(newFilename, 'wb') as newCpz:
        newCpzHeader.tofile(newCpz)
    #    for i in (0x33 ,0x25 ,0xAD ,0xBA ,0xA5 ,0x22 ,0x31 ,0xF1 ,0x08 ,0x69 ,0x82 ,0xC2 ,0xFE ,0xCC ,0x16 ,0x47):
    #        newCpz.write(chr(i))
        newCpzContent.tofile(newCpz) 
Example #5
0
                    count += 1
                    continue
                if count == skipcount:
                    skipoffset = sentenceOffset
                
                itemContent[j + 4:j + 8] = array('B', pack('L', textOffset[count][0] + skipoffset))
                lastOffset = textOffset[count]
                count += 1
        
        textArray = array('B', text)
        itemHeader[0x1C:0x20] = array('B', pack('L', len(textArray)))
        #itemHeader[0x28:0x2C] = array('B', pack('L', scriptOffset + skipoffset + len(textArray)))
        itemHeader[0x28:0x2C] = array('B', pack('L', scriptOffset * 2 + skipoffset + len(textArray)))

        #itemContent = itemContent[0:scriptOffset] + textArray
        itemContent = itemContent[0:scriptOffset + skipoffset] + textArray

        #压缩
        itemContent = encode(itemContent, 0, len(itemContent))
        item = itemHeader + itemContent
        encryptPs2(item, 0x30, len(itemContent), unpack('L', item[0x0c:0x10])[0])
        
    with open(newFilename, 'wb') as newFile:
        item.tofile(newFile)
 
    

#写入
#with open(newFilename, 'wb') as newCpz:
#    fullHeader.write(newCpz)
Example #6
0
def test_buffer(data):
    data_lzss = lzss.encode(data)
    data_orig = lzss.decode(data_lzss)
    return data == data_orig
Example #7
0
def test_buffer(data):
    data_lzss = lzss.encode(data)
    data_orig = lzss.decode(data_lzss)
    return data == data_orig