def find_key(s, key_length):
    """
    Find the key by scoring several "single byte xor" using the frequency letter.
    """
    s = base64.b64decode(s)
    transposed_blocks = utils.transpose_blocks(utils.get_blocks(s, key_length),
                                               key_length)
    letter_place = {}
    key = ""
    occurence = dict((letter, 0) for letter in string.printable)
    for i, tblock in enumerate(transposed_blocks):
        for letter in string.printable:
            occurence[letter] = 0
            xored_tblock = [byte ^ ord(letter) for byte in tblock]
            for byte in xored_tblock:
                # the 6 first most frequent letters in english plaintexts http://letterfrequency.org/#english-language-letter-frequency
                if chr(byte) in "etaoinETAOIN":
                    occurence[letter] += 1
                    letter_place[i] = occurence.copy()
    for i, value in letter_place.items(
    ):  #for each dict in letter_place returns the key that contains the greatest score
        getkey = lambda i, val: [
            k for k, v in value.items() if v == (max(value.values()))
        ]
        key += max(getkey(i, value))
    return key
Esempio n. 2
0
    def construct_blocks_json(self):

        #print(self.input_entry_list)
        #print(self.output_entry_list)

        new_block = ProgramBlock()
        #print((self.builder.get_object('name_ent')).get())
        new_block.name = self.builder.get_object('name_ent').get()
        new_block.command = self.builder.get_object('command_ent').get()
        new_block.inputs = [ a.get() for a in self.input_entry_list]
        prelim_outputs =  [ a.get() for a in self.output_entry_list]

        new_block.outputs = []
        prelim_output_types = {}
        for i in prelim_outputs:
            this_output_list = i.split('.')
            new_block.outputs.append(this_output_list[0])
            if len(this_output_list) == 1:
                prelim_output_types.update({this_output_list[0]:""})
            else:
                prelim_output_types.update({this_output_list[0]:this_output_list[1]})
        new_block.output_types = prelim_output_types

        prelim_sbatch_text = self.builder.get_object('sbatch_txtbox').get("1.0",END)

        new_block.sbatch = prelim_sbatch_text.splitlines()

        blocks = utils.get_blocks()
        blocks[new_block.name] = new_block
        json_loader.write_config("./../programs/blocks.json",blocks)
        program_compiler.run(new_block.name)
Esempio n. 3
0
    def cut_base_on_dict(self, sentence):
        word_list, tag_list = [], []
        blocks = utils.get_blocks(sentence, utils.RE_NORMAL_HAN)
        for block in blocks:
            max_prob_route = self.trie_model.get_max_prob_route(block)
            max_prob_word_list = [block[max_prob_route[idx]: max_prob_route[idx+1]] \
                                  for idx in range(len(max_prob_route)-1)]
            continuos_singe_list = self.get_continuos_singe(max_prob_word_list)
            last_end = 0
            for start, end in continuos_singe_list:
                for pre_word in max_prob_word_list[last_end: start]:
                    word_list.append(pre_word)
                    tag_list.append(self.trie_model.word_value.get(pre_word, {}).get('tag', 'x'))
                last_end = end
                continuos_singe_str = ''.join(max_prob_word_list[start: end])
                for slices in utils.get_splits(continuos_singe_str, utils.RE_NUNMBER_ENG):
                    #print slices
                    if utils.is_number_or_eng(slices):
                        word_list.append(slices)
                        number_tag = 'm'
                        tag_list.append(number_tag)
                    else:
                        mid_word_list = tag.crf_tag.crfToken(slices)
                        mid_tag_list = tag.crf_tag.crfPos(mid_word_list)
                        word_list.extend(mid_word_list)
                        tag_list.extend(mid_tag_list)
            for word in max_prob_word_list[last_end: ]:
                word_list.append(word)
                tag_list.append(self.trie_model.word_value.get(pre_word, {}).get('tag', 'x'))

        #tag_list = [self.trie_model.word_value.get(word, {}).get('tag', 'x') \
                    #for word in word_list]
        return SegmentPair(word_list, tag_list)
def key_length(s):
    with open(s, "rb") as txt:
        s = base64.b64decode(txt.read())
    distances = {}
    for k_length in range(2, 40):
        blocks = utils.get_blocks(s, k_length)
        # get the normalized hamming distance of each block with his adjacent block
        distances[k_length] = sum(
            hamming_dist(a, b) / k_length for a, b in zip(blocks, blocks[1:]))
        # get the avg of all normalized hamming distance with the current key length
        distances[k_length] /= len(blocks)
    distances = sorted(distances.items(), key=lambda x: x[1])
    #Returns the lowest distance using the greatest common divisor if the first two are a multiple of the key_length
    return distances[0][0] if utils.pgcd(distances[0][0], distances[1][0]) == 1\
     else utils.pgcd(distances[0][0], distances[1][0])
def key_length(s):
    with open(s, "rb") as txt:
        s = base64.b64decode(txt.read())
    distances = {}
    for k_length in range(2, 40):
        blocks = utils.get_blocks(s, k_length)
        # get the normalized hamming distance of each block with his adjacent block
        distances[k_length] = sum(hamming_dist(a, b)/k_length for
         a, b in zip(blocks, blocks[1:]))
         # get the avg of all normalized hamming distance with the current key length
        distances[k_length] /= len(blocks)
    distances = sorted(distances.items(), key=lambda x: x[1])
    #Returns the lowest distance using the greatest common divisor if the first two are a multiple of the key_length
    return distances[0][0] if utils.pgcd(distances[0][0], distances[1][0]) == 1\
     else utils.pgcd(distances[0][0], distances[1][0])
def find_key(s, key_length):
    """
    Find the key by scoring several "single byte xor" using the frequency letter.
    """
    s = base64.b64decode(s)
    transposed_blocks = utils.transpose_blocks(utils.get_blocks(s, key_length), key_length)
    letter_place = {}
    key = ""
    occurence = dict((letter, 0) for letter in string.printable)
    for i, tblock in enumerate(transposed_blocks):
        for letter in string.printable:
            occurence[letter] = 0
            xored_tblock = [byte ^ ord(letter) for byte in tblock]
            for byte in xored_tblock:
                # the 6 first most frequent letters in english plaintexts http://letterfrequency.org/#english-language-letter-frequency
                if chr(byte) in "etaoinETAOIN":
                    occurence[letter] += 1
                    letter_place[i] = occurence.copy()
    for i, value in letter_place.items(): #for each dict in letter_place returns the key that contains the greatest score
        getkey = lambda i, val: [k for k,v in value.items() if v == (max(value.values()))]
        key += max(getkey(i, value))
    return key
Esempio n. 7
0
    def cut_base_on_dict(self, sentence):
        word_list, tag_list = [], []
        blocks = utils.get_blocks(sentence, utils.RE_NORMAL_HAN)
        for block in blocks:
            max_prob_route = self.trie_model.get_max_prob_route(block)
            max_prob_word_list = [block[max_prob_route[idx]: max_prob_route[idx+1]] \
                                  for idx in range(len(max_prob_route)-1)]
            continuos_singe_list = self.get_continuos_singe(max_prob_word_list)
            last_end = 0
            for start, end in continuos_singe_list:
                for pre_word in max_prob_word_list[last_end:start]:
                    word_list.append(pre_word)
                    tag_list.append(
                        self.trie_model.word_value.get(pre_word,
                                                       {}).get('tag', 'x'))
                last_end = end
                continuos_singe_str = ''.join(max_prob_word_list[start:end])
                for slices in utils.get_splits(continuos_singe_str,
                                               utils.RE_NUNMBER_ENG):
                    #print slices
                    if utils.is_number_or_eng(slices):
                        word_list.append(slices)
                        number_tag = 'm'
                        tag_list.append(number_tag)
                    else:
                        mid_word_list = tag.crf_tag.crfToken(slices)
                        mid_tag_list = tag.crf_tag.crfPos(mid_word_list)
                        word_list.extend(mid_word_list)
                        tag_list.extend(mid_tag_list)
            for word in max_prob_word_list[last_end:]:
                word_list.append(word)
                tag_list.append(
                    self.trie_model.word_value.get(pre_word,
                                                   {}).get('tag', 'x'))

        #tag_list = [self.trie_model.word_value.get(word, {}).get('tag', 'x') \
        #for word in word_list]
        return SegmentPair(word_list, tag_list)
Esempio n. 8
0
if __name__ == '__main__':
    img_dir = "/home/mahad/tmp/01-protected-retirement-plan-customer-key-features/png"
    npy_dir = "/home/mahad/tmp/01-protected-retirement-plan-customer-key-features/npy"
    save_dir = "/tmp"
    img_files = os.listdir(img_dir)
    npy_files = os.listdir(npy_dir)
    for img_file in img_files:
        img_path = os.path.join(img_dir, img_file)
        npy_path = os.path.join(npy_dir, Path(img_file).stem + ".npy")
        img = cv2.imread(img_path)
        all_boxes = np.load(npy_path, allow_pickle=True).tolist()
        print()
        # all_boxes = para_boxes + table_boxes
        # all_texts = para_texts + table_texts
        column_blocks = get_blocks((img.shape[0], img.shape[1]), all_boxes)
        column_blocks_merged = merge_blocks(column_blocks, all_boxes)
        ordered_boxes = create_order(column_blocks_merged, all_boxes)
        # ordered_texts = []
        # for i in range(0, len(ordered_boxes)):
        #     idx = all_boxes.index(ordered_boxes[i])
        #     ordered_texts.append(all_texts[idx])
        # if idx:
        #     del idx
        for ordered_box in ordered_boxes:
            img_draw = draw_boxes(img, [ordered_box])
            cv2.imshow('', cv2.resize(img_draw, fx=0.25, fy=0.25, dsize=None))
            # cv2.imshow('', img_draw)
            cv2.waitKey()
        # # cv2.imwrite("/tmp/" + xml_file.replace("xml", "png"), img_draw)
Esempio n. 9
0
def setup_batch(request):
    """Setup method for posting batches and returning the 
       response
    """
    data = {}
    signer = get_signer()
    expected_trxn_ids = []
    expected_batch_ids = []
    initial_state_length = len(get_state_list())

    LOGGER.info("Creating intkey transactions with set operations")

    txns = [
        create_intkey_transaction("set", 'a', 0, [], signer),
    ]

    for txn in txns:
        data = MessageToDict(txn,
                             including_default_value_fields=True,
                             preserving_proto_field_name=True)

        trxn_id = data['header_signature']
        expected_trxn_ids.append(trxn_id)

    data['expected_trxn_ids'] = expected_trxn_ids

    LOGGER.info("Creating batches for transactions 1trn/batch")

    batches = [create_batch([txn], signer) for txn in txns]

    for batch in batches:
        data = MessageToDict(batch,
                             including_default_value_fields=True,
                             preserving_proto_field_name=True)

        batch_id = data['header_signature']
        expected_batch_ids.append(batch_id)

    data['expected_batch_ids'] = expected_batch_ids
    data['signer_key'] = signer.get_public_key().as_hex()

    post_batch_list = [
        BatchList(batches=[batch]).SerializeToString() for batch in batches
    ]

    LOGGER.info("Submitting batches to the handlers")

    for batch in post_batch_list:
        try:
            response = post_batch(batch)
        except urllib.error.HTTPError as error:
            LOGGER.info("Rest Api is not reachable")
            data = json.loads(error.fp.read().decode('utf-8'))
            LOGGER.info(data['error']['title'])
            LOGGER.info(data['error']['message'])

    block_list = get_blocks()
    data['block_list'] = block_list
    block_ids = [block['header_signature'] for block in block_list]
    data['block_ids'] = block_ids
    batch_ids = [block['header']['batch_ids'][0] for block in block_list]
    data['batch_ids'] = batch_ids
    expected_head_id = block_ids[0]
    data['expected_head_id'] = expected_head_id
    yield data
Esempio n. 10
0
 xml_path = os.path.join(xml_dir, xml_file)
 pdf_path = os.path.join(pdf_dir, Path(xml_file).stem + ".pdf")
 xml_data = get_raw_data(xml_path)
 for page in xml_data:
     para_boxes = page["para_boxes"]
     para_texts = page["para_texts"]
     para_boxes, para_texts = remove_empty(para_boxes, para_texts)
     tables = page["tables"]
     table_boxes = [tt["bbox"] for tt in tables]
     table_texts = [tt["rows"] for tt in tables]
     img = pdf2image.convert_from_path(pdf_path, size=(page["width"], page["height"]),
                                       first_page=page["page_number"], last_page=page["page_number"])
     img = np.asarray(img[0])
     all_boxes = para_boxes + table_boxes
     all_texts = para_texts + table_texts
     column_blocks = get_blocks((page["height"], page["width"]), all_boxes)
     column_blocks_merged = merge_blocks(column_blocks, all_boxes)
     ordered_boxes = create_order(column_blocks_merged, all_boxes)
     ordered_texts = []
     for i in range(0, len(ordered_boxes)):
         idx = all_boxes.index(ordered_boxes[i])
         ordered_texts.append(all_texts[idx])
     if idx:
         del idx
     for i in range(0, len(ordered_boxes)):
         if not isinstance(ordered_texts[i], list):
             img_draw = draw_boxes(img, [ordered_boxes[i]])
             cv2.imshow('', cv2.resize(img_draw, fx=0.25, fy=0.25, dsize=None))
             cv2.waitKey()
         else:
             for row in ordered_texts[i]: