コード例 #1
0
ファイル: utils.py プロジェクト: jsjyhzy/xelatex-chemfig-api
def bcompress(b):
    if b is None:
        return None
    com = LZMACompressor()
    chunk = com.compress(b)
    tail = com.flush()
    return chunk + tail
コード例 #2
0
def generate_lzma_stream(data: bytes, dict_size: int=33554432, lc: int=3, lp: int=1, pb: int=1) -> bytes:
    lzma_filters = [
        {'id': FILTER_LZMA1, 'dict_size': dict_size, 'lc': lc, 'lp': lp, 'pb': pb}
    ]
    compressor = LZMACompressor(format=FORMAT_ALONE, filters=lzma_filters)
    lzma_stream = compressor.compress(data)
    return b''.join((lzma_stream, compressor.flush()))
コード例 #3
0
def compress( in_path, out_path, crc_to_spoof ):
	with open( in_path, 'rb' ) as in_file, open( out_path, 'wb' ) as out_file:
		lzmac = LZMACompressor( FORMAT_ALONE, filters = [ { 'id': FILTER_LZMA1, 'dict_size': 64 * 1024 } ] )
		
		size = 0
		crc = None
		
		out_file.write( pack( '<I', crc_to_spoof ) )
		
		_, file_name = split( in_path )
		data = '''
			do
				local f = file.Open( 'files.txt', 'ab', 'DATA' )
				if f then
					f:Write( string.format( '{0} = %s\\n', debug.getinfo( 1 ).short_src ) )
					f:Close()
				end
			end
		'''.format( file_name ).encode()
		data = in_file.read( 1024 )
		while len( data ):
			size = size + len( data )
			if crc == None:
				crc = crc32( data )
			else:
				crc = crc32( data, crc )
			out_file.write( lzmac.compress( data ) )
			data = in_file.read( 1024 )
			
		if crc != crc_to_spoof:
			fix = 0
			working_crc = ~crc_to_spoof
			
			for i in range( 32 ):
				if fix & 1:
					fix = ( fix >> 1 ) ^ 0xedb88320
				else:
					fix = fix >> 1
				
				if working_crc & 1:
					fix = fix ^ 0x5b358fd3
				
				working_crc = working_crc >> 1
			
			fix = ( fix ^ ~crc ) & 0xffffffff
			fix = pack( '<I', fix )
			#crc = crc32( fix, crc )
			size = size + len( fix )
			print( 'Fix: {0}'.format( fix ) )
			out_file.write( lzmac.compress( fix ) )
		
		out_file.write( lzmac.flush() )
		out_file.seek( 9, SEEK_SET )
		out_file.write( pack( '<q', size ) )
コード例 #4
0
ファイル: block.py プロジェクト: Rogdham/python-xz
 def __init__(
     self,
     fileobj: IOAbstract,
     check: int,
     preset: _LZMAPresetType,
     filters: _LZMAFiltersType,
 ) -> None:
     self.fileobj = fileobj
     self.check = check
     self.compressor = LZMACompressor(FORMAT_XZ, check, preset, filters)
     self.pos = 0
     if self.compressor.compress(b"") != create_xz_header(check):
         raise XZError("block: compressor header")
コード例 #5
0
ファイル: block.py プロジェクト: Rogdham/python-xz
class BlockWrite:
    def __init__(
        self,
        fileobj: IOAbstract,
        check: int,
        preset: _LZMAPresetType,
        filters: _LZMAFiltersType,
    ) -> None:
        self.fileobj = fileobj
        self.check = check
        self.compressor = LZMACompressor(FORMAT_XZ, check, preset, filters)
        self.pos = 0
        if self.compressor.compress(b"") != create_xz_header(check):
            raise XZError("block: compressor header")

    def _write(self, data: bytes) -> None:
        if data:
            self.fileobj.seek(self.pos)
            self.fileobj.write(data)
            self.pos += len(data)

    def compress(self, data: bytes) -> None:
        self._write(self.compressor.compress(data))

    def finish(self) -> Tuple[int, int]:
        data = self.compressor.flush()

        # footer
        check, backward_size = parse_xz_footer(data[-12:])
        if check != self.check:
            raise XZError("block: compressor footer check")

        # index
        records = parse_xz_index(data[-12 - backward_size:-12])
        if len(records) != 1:
            raise XZError("block: compressor index records length")

        # remaining block data
        self._write(data[:-12 - backward_size])

        return records[0]  # (unpadded_size, uncompressed_size)
コード例 #6
0
ファイル: archive.py プロジェクト: mbr/ministryofbackup
def compress(srcfd, destfd, level=9, bufsize=DEFAULT_BUFSIZE):
    setproctitle('mob compression')
    log.debug("Starting compression in process %d" % os.getpid())
    compressor = LZMACompressor(options={'level': level})
    log.debug("Compression level %d" % level)

    src = os.fdopen(srcfd, 'rb')
    dest = os.fdopen(destfd, 'wb')

    while True:
        log.debug('Reading into buffer for compression')
        buf = src.read(bufsize)
        log.debug('Read %d bytes' % len(buf))

        if not buf:
            break
        dest.write(compressor.compress(buf))

    # clean up
    dest.write(compressor.flush())
    log.debug("Compression finished")
コード例 #7
0
ファイル: state.py プロジェクト: skoranda/SATOSA
    def urlstate(self, encryption_key):
        """
        Will return a url safe representation of the state.

        :type encryption_key: Key used for encryption.
        :rtype: str

        :return: Url representation av of the state.
        """
        lzma = LZMACompressor()
        urlstate_data = json.dumps(self.data)
        urlstate_data = lzma.compress(urlstate_data.encode("UTF-8"))
        urlstate_data += lzma.flush()
        urlstate_data = _AESCipher(encryption_key).encrypt(urlstate_data)
        lzma = LZMACompressor()
        urlstate_data = lzma.compress(urlstate_data)
        urlstate_data += lzma.flush()
        urlstate_data = base64.urlsafe_b64encode(urlstate_data)
        return urlstate_data.decode("utf-8")
コード例 #8
0
ファイル: state.py プロジェクト: SUNET/SATOSA
    def urlstate(self, encryption_key):
        """
        Will return a url safe representation of the state.

        :type encryption_key: Key used for encryption.
        :rtype: str

        :return: Url representation av of the state.
        """
        lzma = LZMACompressor()
        urlstate_data = json.dumps(self._state_dict)
        urlstate_data = lzma.compress(urlstate_data.encode("UTF-8"))
        urlstate_data += lzma.flush()
        urlstate_data = _AESCipher(encryption_key).encrypt(urlstate_data)
        lzma = LZMACompressor()
        urlstate_data = lzma.compress(urlstate_data)
        urlstate_data += lzma.flush()
        urlstate_data = base64.urlsafe_b64encode(urlstate_data)
        return urlstate_data.decode("utf-8")
コード例 #9
0
ファイル: delta.py プロジェクト: MHZarei/elijah-provisioning
def _save_blob(start_index,
               delta_list,
               self_ref_dict,
               blob_name,
               blob_size,
               statistics=None):
    # mode = 2 indicates LZMA_SYNC_FLUSH, which show all output right after input
    comp_option = {'format': 'xz', 'level': 9}
    comp = LZMACompressor(options=comp_option)
    disk_offset_list = list()
    memory_offset_list = list()
    comp_data = ''
    original_length = 0
    index = start_index
    item_count = 0

    memory_overlay_size = 0
    disk_overlay_size = 0

    while index < len(delta_list):
        delta_item = delta_list[index]

        if delta_item.ref_id != DeltaItem.REF_SELF:
            # Those deduped chunks will be put right after original data
            # using deduped_list
            delta_bytes = delta_item.get_serialized()
            original_length += len(delta_bytes)
            comp_delta_bytes = comp.compress(delta_bytes)
            comp_data += comp_delta_bytes
            item_count += 1
            if delta_item.delta_type == DeltaItem.DELTA_MEMORY:
                memory_offset_list.append(delta_item.offset)
                memory_overlay_size += len(comp_delta_bytes)
            elif delta_item.delta_type == DeltaItem.DELTA_DISK:
                disk_offset_list.append(delta_item.offset)
                disk_overlay_size += len(comp_delta_bytes)
            else:
                raise DeltaError("Delta should be either memory or disk")

            # remove dependece getting required index by finding reference
            deduped_list = self_ref_dict.get(delta_item.index, None)
            if deduped_list != None:
                #LOG.debug("moving %d deduped delta item" % (len(deduped_list)))
                for deduped_item in deduped_list:
                    deduped_bytes = deduped_item.get_serialized()
                    original_length += len(deduped_bytes)
                    comp_deduped_bytes = comp.compress(deduped_bytes)
                    comp_data += comp_deduped_bytes
                    item_count += 1
                    if deduped_item.delta_type == DeltaItem.DELTA_MEMORY:
                        memory_offset_list.append(deduped_item.offset)
                        memory_overlay_size += len(comp_deduped_bytes)
                    elif deduped_item.delta_type == DeltaItem.DELTA_DISK:
                        disk_offset_list.append(deduped_item.offset)
                        disk_overlay_size += len(comp_deduped_bytes)
                    else:
                        raise DeltaError(
                            "Delta should be either memory or disk")

        if len(comp_data) >= blob_size:
            LOG.debug("savefile for %s(%ld delta item) %ld --> %ld" % \
                    (blob_name, item_count, original_length, len(comp_data)))
            comp_data += comp.flush()
            blob_file = open(blob_name, "w+b")
            blob_file.write(comp_data)
            blob_file.close()
            if statistics != None:
                statistics['item_count'] = item_count
            return index, memory_offset_list, disk_offset_list
        index += 1

    comp_data += comp.flush()
    if len(comp_data) > 0:
        blob_file = open(blob_name, "w+b")
        blob_file.write(comp_data)
        blob_file.close()
        if statistics != None:
            statistics['item_count'] = item_count
        return index, memory_offset_list, disk_offset_list
    else:
        raise DeltaError("LZMA compression is zero")
コード例 #10
0
    def extract(self, raw):
        sum = CharStatistics.stats('')
        compressor = LZMACompressor(format=FORMAT_ALONE)

        longest_word = 0
        #longest_conseq = 0
        collection = raw["rwords"] if "rwords" in raw else {}
        chrs = defaultdict(int)
        added_words = ""
        sign_sums = [0, 0]
        been_mixed = 0

        dropped_upper = defaultdict(int)
        added_upper = defaultdict(int)
        for word, diff in collection.items():  # type: str,int
            if diff < 0:
                dropped_upper[word.upper()] -= diff
            elif diff > 0:
                added_upper[word.upper()] += diff

        for word, diff in collection.items():  # type: str,int
            upper = word.upper()
            if dropped_upper[upper] == added_upper[upper]:
                continue

            if diff < 0:
                sign_sums[0] += -diff
                continue


            sign_sums[1] += diff

            added_words += word
            longest_word = max(longest_word, len(word))
            #longest_conseq = max(longest_conseq, CharStatistics.longest_conseq(word))

            been_latin = False
            been_non_latin = False
            for c in word:
                chrs[c] += 1

                if not been_latin or not been_non_latin:
                    if c == '-':
                        been_latin = False
                        been_non_latin = False
                    elif c.isalpha():
                        if ord(c) < 128:
                            been_latin = True
                        elif c.lower() in self.russian_alphabet:
                            been_non_latin = True

            if been_latin and been_non_latin:
                been_mixed += 1

            if word not in self.abbrs and not self.check_rgb(word):
                stats = CharStatistics.stats(word)
                for k, v in stats.items():
                    sum[k] += v

        revs = self.revs(raw)
        curr_text = (revs["current"]['text'] if revs['current'] is not None else '')  or ''
        prev_text = (revs['prev_user']['text'] if revs['prev_user'] is not None else '') or ''

        curr_text = curr_text.replace("\r\n","\n")
        prev_text = prev_text.replace("\r\n", "\n")
        nl_diff = (curr_text.count('\n') - prev_text.count('\n'))
        nl2_diff = (curr_text.count('\n\n') - prev_text.count('\n\n'))

        curr_cbr = [curr_text.count('{{'), curr_text.count('}}')]
        dbr_diff_o = (curr_cbr[0] - prev_text.count('{{'))
        dbr_diff_c = (curr_cbr[1] - prev_text.count('}}'))

        curr_rb = [curr_text.count('[['), curr_text.count(']]')]
        rbr_diff_o = (curr_rb[0] - prev_text.count('[['))
        rbr_diff_c = (curr_rb[1] - prev_text.count(']]'))

        punct_prev = 0
        for c in prev_text:
            if c in ['.', ',', '!', '?', ':', ';']:
                punct_prev += 1

        punct_now = 0
        for c in curr_text:
            if c in ['.', ',', '!', '?', ':', ';']:
                punct_now += 1

        main_curr = curr_text.count("{{main|")
        main_prev = prev_text.count("{{main|")

        longest_conseq = [CharStatistics.longest_conseq(prev_text), CharStatistics.longest_conseq(curr_text)]
        longest_conseq_upper = [CharStatistics.longest_upper(prev_text), CharStatistics.longest_upper(curr_text)]

        return {
            't_cap': sum['capitalized'] / (1 + sum['alpha']),
            't_cap_to_lwr': (sum['capitalized'] / (1 + sum['alpha'] - sum['capitalized'])),
            't_lgt_w': longest_word,
            #'t_cmpr': 1 if added_words == "" else len(compressor.compress(bytes(added_words, 'utf-8')) + compressor.flush())/(len(added_words) + 1),
            't_c_div': len(added_words) / (1 + len(chrs)),
            't_numalpha': sum['num'] / (1 + sum['alpha'] + sum['num']),
            't_lat': sum['latin'] / (1 + sum['alpha']),
            't_lgt_cs': longest_conseq[1],
            't_lgt_cs_rel': longest_conseq[1] / (longest_conseq[0] + 1),
            't_lgt_up': longest_conseq_upper[1],
            't_lgt_up_rel': longest_conseq_upper[1] / (longest_conseq_upper[0]  + 1),
            't_szdiff': (len(curr_text)- len(prev_text)),
            't_sz_rel': (1  + len(curr_text)) / (1+len(prev_text)),
            't_w_total': sign_sums[1] - sign_sums[0],
            't_w_added': sign_sums[1],
            't_w_deleted': sign_sums[0],
            't_mdf_wrds': min(sign_sums),
            't_nl_diff': nl_diff,
            't_nl2_diff': nl2_diff,
            't_nl_wrds': nl_diff / (1 + sign_sums[1]),
            't_dbr_o_diff': dbr_diff_o,
            't_dbr_c_diff': dbr_diff_c,
            't_dbr_diff': abs(dbr_diff_o-dbr_diff_c),
            't_dbr_curr': abs(curr_cbr[0] - curr_cbr[1]),
            't_rbr_o_diff': rbr_diff_o,
            't_rbr_c_diff': rbr_diff_o,
            't_rbr_diff': abs(rbr_diff_o - rbr_diff_c),
            't_rbr_curr': abs(curr_rb[0] - curr_rb[1]),
            't_w_mixed': been_mixed / (1 + sign_sums[1]),
            't_cut': 1 if check_cut(prev_text, curr_text) else 0,
            't_punct_diff': punct_now - punct_prev,
            't_punct_words': (punct_now - punct_prev)/(sign_sums[1] - sign_sums[0] + 0.9),
            't_main_diff': (main_curr - main_prev),
            't_diff_rel': (sign_sums[1] - sign_sums[0]) / (1 + min(sign_sums)),
            't_wikificated': 1 if self.detect_wikification(prev_text, curr_text) else 0
        }
コード例 #11
0
ファイル: tool.py プロジェクト: scolphoy/elijah-provisioning
            # get real data, which is HASH_CHUNKING_SIZE
            data = fd.read(HASH_CHUNKING_SIZE)
            #LOG.debug("data, recovering : %d %d" % (start_offset, end_offset))
        delta_list.append((start_offset, end_offset, ref_hashlist_id, data))

    fd.close()
    return delta_list


if __name__ == "__main__":
    import random
    import string

    if sys.argv[1] == "comp":
        base = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(2096))
        compressor = LZMACompressor(LZMA_OPTION)
        comp = compressor.compress(base)
        comp += compressor.flush()

        decompressor = LZMADecompressor()
        decomp = decompressor.decompress(comp)
        decomp += decompressor.flush()

        if base != decomp:
            print "result is wrong"
            print "%d == %d" % (len(base), len(decomp))
            sys.exit(1)
        print "success"

    elif sys.argv[1] == "xdelta":
        base = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(4096))
コード例 #12
0
async def lzma_compress(adata):
    loop = asyncio.get_running_loop()
    lz = LZMACompressor()
    async for chunk in adata:
        yield await loop.run_in_executor(None, lz.compress, chunk)
    yield await loop.run_in_executor(None, lz.flush)
コード例 #13
0
ファイル: tool.py プロジェクト: MHZarei/elijah-provisioning
            #LOG.debug("data, recovering : %d %d" % (start_offset, end_offset))
        delta_list.append((start_offset, end_offset, ref_hashlist_id, data))

    fd.close()
    return delta_list


if __name__ == "__main__":
    import random
    import string

    if sys.argv[1] == "comp":
        base = ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for x in range(2096))
        compressor = LZMACompressor(LZMA_OPTION)
        comp = compressor.compress(base)
        comp += compressor.flush()

        decompressor = LZMADecompressor()
        decomp = decompressor.decompress(comp)
        decomp += decompressor.flush()

        if base != decomp:
            print "result is wrong"
            print "%d == %d" % (len(base), len(decomp))
            sys.exit(1)
        print "success"

    elif sys.argv[1] == "xdelta":
        base = ''.join(
コード例 #14
0
ファイル: delta.py プロジェクト: scolphoy/elijah-provisioning
def _save_blob(start_index, delta_list, self_ref_dict, blob_name, blob_size, statistics=None):
    # mode = 2 indicates LZMA_SYNC_FLUSH, which show all output right after input
    comp_option = {'format':'xz', 'level':9}
    comp = LZMACompressor(options=comp_option)
    disk_offset_list = list()
    memory_offset_list= list()
    comp_data = ''
    original_length = 0
    index = start_index
    item_count = 0

    memory_overlay_size = 0
    disk_overlay_size = 0
    
    while index < len(delta_list):
        delta_item = delta_list[index]

        if delta_item.ref_id != DeltaItem.REF_SELF:
            # Those deduped chunks will be put right after original data
            # using deduped_list
            delta_bytes = delta_item.get_serialized()
            original_length += len(delta_bytes)
            comp_delta_bytes = comp.compress(delta_bytes)
            comp_data += comp_delta_bytes
            item_count += 1
            if delta_item.delta_type == DeltaItem.DELTA_MEMORY:
                memory_offset_list.append(delta_item.offset)
                memory_overlay_size += len(comp_delta_bytes)
            elif delta_item.delta_type == DeltaItem.DELTA_DISK:
                disk_offset_list.append(delta_item.offset)
                disk_overlay_size += len(comp_delta_bytes)
            else:
                raise DeltaError("Delta should be either memory or disk")

            # remove dependece getting required index by finding reference
            deduped_list = self_ref_dict.get(delta_item.index, None)
            if deduped_list != None:
                #LOG.debug("moving %d deduped delta item" % (len(deduped_list)))
                for deduped_item in deduped_list:
                    deduped_bytes = deduped_item.get_serialized()
                    original_length += len(deduped_bytes)
                    comp_deduped_bytes = comp.compress(deduped_bytes)
                    comp_data += comp_deduped_bytes
                    item_count += 1
                    if deduped_item.delta_type == DeltaItem.DELTA_MEMORY:
                        memory_offset_list.append(deduped_item.offset)
                        memory_overlay_size += len(comp_deduped_bytes)
                    elif deduped_item.delta_type == DeltaItem.DELTA_DISK:
                        disk_offset_list.append(deduped_item.offset)
                        disk_overlay_size += len(comp_deduped_bytes)
                    else:
                        raise DeltaError("Delta should be either memory or disk")
            
        if len(comp_data) >= blob_size:
            LOG.debug("savefile for %s(%ld delta item) %ld --> %ld" % \
                    (blob_name, item_count, original_length, len(comp_data)))
            comp_data += comp.flush()
            blob_file = open(blob_name, "w+b")
            blob_file.write(comp_data)
            blob_file.close()
            if statistics != None:
                statistics['item_count'] = item_count
            return index, memory_offset_list, disk_offset_list
        index += 1

    comp_data += comp.flush()
    if len(comp_data) > 0 :
        blob_file = open(blob_name, "w+b")
        blob_file.write(comp_data)
        blob_file.close()
        if statistics != None:
            statistics['item_count'] = item_count
        return index, memory_offset_list, disk_offset_list
    else:
        raise DeltaError("LZMA compression is zero")