コード例 #1
0
from utils import parse_file_name, float_try_parse, get_color


def pair_plot(dataset, mask, sizex, sizey):
    first_feature = dataset.features[0]
    _, ax = plt.subplots(sizex, sizey)
    ax_index = 0
    for i in range(len(first_feature)):
        if i in mask and float_try_parse(first_feature[i]):
            for j in range(len(first_feature)):
                if j in mask and float_try_parse(first_feature[j]):
                    yval = []
                    xval = []
                    colors = []
                    for f in dataset.features:
                        if float_try_parse(f[i]) and float_try_parse(f[j]):
                            yval.append(float(f[i]))
                            xval.append(float(f[j]))
                            colors.append(get_color(f[0]))
                    ax_tindex = (ax_index // sizex, ax_index % sizey)
                    ax[ax_tindex].scatter(xval, yval, color=colors)
                    ax_index += 1
    plt.show()


if __name__ == '__main__':
    file_name = parse_file_name()
    dataset = Dataset(file_name)
    pair_plot(dataset, [i for i in range(len(dataset.features[0]))], 13, 13)
    pair_plot(dataset, [6, 7, 8, 11], 4, 4)
コード例 #2
0
ファイル: yarowsky.py プロジェクト: juhokallio/YarowskyWSD
 def print_help():
     print "Usage: python {} pattern seed1 seed2 ...".format(parse_file_name(sys.argv[0]))
     print "The output will be saved to the file \"log\"."
コード例 #3
0
ファイル: logic.py プロジェクト: doctoromer/haya-data
    def run(self):
        """Execute the logic thread."""
        while self.running:
            message = self.logic_queue.get()
            message_type = message['type']

            self.logger.info(
                'received message of type %s' % message_type)

            # store a block
            if message_type == 'send_block':
                file_name = build_file_name(
                    name=message['name'],
                    number=message['number'],
                    block_type=message['block_type'])

                file_path = os.path.join(self.data_path, file_name)

                content = message['content']

                try:
                    with open(file_path, 'wb') as f:
                        f.write(content)
                except:
                    self.logger.exception(
                        'an error acurred while trying to write to file:\n')

            # send a block to the server
            elif message_type == 'ask_block':
                file_name = build_file_name(
                    name=message['name'],
                    number=message['number'],
                    block_type=message['block_type'])

                for block in glob(self.data_path, file_name):
                    try:
                        with open(block, 'rb') as f:
                            content = f.read()
                    except:
                        self.logger.exception(
                            'an error acurred while reading file: %s' % block)
                    else:
                        real_file = os.path.basename(block)
                        block_info = parse_file_name(real_file)

                        net_message = protocol.client.block(
                            block_type=block_info['block_type'],
                            name=block_info['name'],
                            number=block_info['number'],
                            content=content)

                        thread_message = protocol.thread.send(
                            message=net_message)
                        self.network_queue.put(thread_message)

                # announce the server that all block were sent
                name = message['name']
                net_message_finished = protocol.client.file_sent(name)
                thread_message_finished = protocol.thread.send(
                    message=net_message_finished)
                self.network_queue.put(thread_message_finished)

            # delete blocks
            elif message_type == 'delete_block':
                file_name = file_name = build_file_name(
                    name=message['name'],
                    number=message['number'],
                    block_type=message['block_type'])

                block_list = glob(self.data_path, file_name)
                for file in block_list:
                    os.remove(file)

            # send the disk state to the server
            elif message_type == 'ask_disk_state':
                total = diskutil.total()
                free = diskutil.free()

                net_message = protocol.client.disk_state(
                    total=total, free=free)
                message = protocol.thread.send(
                    message=net_message)

                self.network_queue.put(message)

            # send the storage state to the server
            elif message_type == 'ask_storage_state':
                block_list = glob(self.data_path, '*_*.*')
                block_list = map(os.path.basename, block_list)
                block_list = map(parse_file_name, block_list)
                net_message = protocol.client.storage_state(blocks=block_list)
                message = protocol.thread.send(
                    message=net_message)
                self.network_queue.put(message)

            # end the thread
            elif message_type == 'kill':
                self.network_queue.put(message)
                self.running = False
コード例 #4
0
    def run(self):
        """Execute the logic thread."""
        while self.running:
            message = self.logic_queue.get()
            message_type = message['type']

            self.logger.info('received message of type %s' % message_type)

            # store a block
            if message_type == 'send_block':
                file_name = build_file_name(name=message['name'],
                                            number=message['number'],
                                            block_type=message['block_type'])

                file_path = os.path.join(self.data_path, file_name)

                content = message['content']

                try:
                    with open(file_path, 'wb') as f:
                        f.write(content)
                except:
                    self.logger.exception(
                        'an error acurred while trying to write to file:\n')

            # send a block to the server
            elif message_type == 'ask_block':
                file_name = build_file_name(name=message['name'],
                                            number=message['number'],
                                            block_type=message['block_type'])

                for block in glob(self.data_path, file_name):
                    try:
                        with open(block, 'rb') as f:
                            content = f.read()
                    except:
                        self.logger.exception(
                            'an error acurred while reading file: %s' % block)
                    else:
                        real_file = os.path.basename(block)
                        block_info = parse_file_name(real_file)

                        net_message = protocol.client.block(
                            block_type=block_info['block_type'],
                            name=block_info['name'],
                            number=block_info['number'],
                            content=content)

                        thread_message = protocol.thread.send(
                            message=net_message)
                        self.network_queue.put(thread_message)

                # announce the server that all block were sent
                name = message['name']
                net_message_finished = protocol.client.file_sent(name)
                thread_message_finished = protocol.thread.send(
                    message=net_message_finished)
                self.network_queue.put(thread_message_finished)

            # delete blocks
            elif message_type == 'delete_block':
                file_name = file_name = build_file_name(
                    name=message['name'],
                    number=message['number'],
                    block_type=message['block_type'])

                block_list = glob(self.data_path, file_name)
                for file in block_list:
                    os.remove(file)

            # send the disk state to the server
            elif message_type == 'ask_disk_state':
                total = diskutil.total()
                free = diskutil.free()

                net_message = protocol.client.disk_state(total=total,
                                                         free=free)
                message = protocol.thread.send(message=net_message)

                self.network_queue.put(message)

            # send the storage state to the server
            elif message_type == 'ask_storage_state':
                block_list = glob(self.data_path, '*_*.*')
                block_list = map(os.path.basename, block_list)
                block_list = map(parse_file_name, block_list)
                net_message = protocol.client.storage_state(blocks=block_list)
                message = protocol.thread.send(message=net_message)
                self.network_queue.put(message)

            # end the thread
            elif message_type == 'kill':
                self.network_queue.put(message)
                self.running = False
コード例 #5
0
ファイル: merge_4folder.py プロジェクト: why702/read_sys_file
    fp.write("# Columns (tab separated):\n")
    fp.write("# Person ID (0 if unknown)\n")
    fp.write(
        "# 	Finger ID (= Finger Type if unspecified or 0 if unknown/unused)\n")
    fp.write("# 		Finger Type (according to ISO/IES 19794-2:2005 table 2)\n")
    fp.write(
        "# 			Sample ID (sometimes referred to as \"Attempt\" or \"Transaction\"\n"
    )
    fp.write("# 				Image file relative path\n")
    fp.write("#\n")

    count = 0
    verify_count = 0
    if len(list0) == len(list1):
        for i in range(len(list0)):
            log = parse_file_name(list0[i])

            info0, info1, info2, info3, info4 = list0[i].split('\t')
            info5, info6, info7, info8, info9 = list1[i].split('\t')

            if info4.find('20201002_142457_673') >= 0:
                print()
                pass

            # init count
            if int(info3) % 10000 == 0:
                count = int(info3)
            elif int(info3) == 0:
                count = 0

            if int(info3) < 1000:  # enroll
コード例 #6
0
    def run(self):
        """
        Execute the restore thread.

        The thread first collect all blocks from the clients.
        Then, It maps the received blocks to a giant dict.

        Example:
            {1:
                {blocks: {1: ['10.0.0.9/file.dat_1.data',
                             '10.0.0.10/file.dat_1.data',
                             '10.0.0.11/file.dat_1.data'],
                          2: ['10.0.0.9/file.dat_2.data',
                             '10.0.0.10/file.dat_2.data',
                             '10.0.0.11/file.dat_2.data'],
                          3: ['10.0.0.9/file.dat_3.data',
                             '10.0.0.10/file.dat_3.data',
                             '10.0.0.11/file.dat_3.data']},
                'path': '10.0.0.10/file.dat_1.metadata'},

        2:
                {blocks: {4: ['10.0.0.9/file.dat_4.data',
                             '10.0.0.10/file.dat_4.data',
                             '10.0.0.11/file.dat_4.data'],
                          5: ['10.0.0.9/file.dat_5.data',
                             '10.0.0.10/file.dat_5.data',
                             '10.0.0.11/file.dat_5.data'],
                          6: ['10.0.0.9/file.dat_6.data',
                             '10.0.0.10/file.dat_6.data',
                             '10.0.0.11/file.dat_6.data']},
                'path': '10.0.0.9/file.dat_2.metadata'}
            }

            After every block is mapped, the thread exemine if
            the file can be restored. If the file can be restored,
            the thread create missing parts and write the data to
            the path in 'self.real_file'.

        Raises:
            Exception: Description
        """
        self.logger.info(self.name + ' thread started')

        # receiving all blocks until the clients finished sending.
        start_time = time.time()
        while not all(self.clients.values()) and time.time() < start_time + 30:
            try:
                message = self.restore_queue.get(timeout=3)
            except:
                self.logger.debug('waiting for blocks...')
            else:
                # if received a massage, reset timeout
                start_time = time.time()

                message_type = message['type']
                self.logger.debug(
                    'received message of type \'%s\'' % message_type)

                if message_type == 'block':
                    self.received_block(message)
                elif message_type == 'file_sent':
                    self.clients[message['client']] = True
                elif message_type == 'exit':
                    self.exit_thread(success=False)
                    return
                else:
                    log = 'unknown message type: %s. message not processed'
                    self.logger.warning(log % message_type)

        self.logger.info('finished collecting blocks of file %s' %
                         self.virtual_file)

        # mapping the restored file blocks
        mapping = {}
        for path in self.get_blocks_names(block_type=protocol.METADATA_BLOCK):
            basename = os.path.basename(path)
            file_info = parse_file_name(basename)

            number = file_info['number']
            if number.isdigit():
                number = int(number)
                mapping[number] = {'path': path, 'blocks': {}}

        # insert records for missing metadata blocks into the mapping
        for metadata_number in xrange(1, self.validation_number + 1):
            if metadata_number not in mapping:
                mapping[metadata_number] = {'path': None, 'blocks': {}}

            start_number = 1 + self.validation_level * (metadata_number - 1)
            end_number = self.validation_level * metadata_number

            # insert paths of data blocks matching he missing metadata block
            for data_number in xrange(start_number, end_number + 1):
                blocks = mapping[metadata_number]['blocks']
                if data_number <= self.block_number:
                    blocks[data_number] = self.get_blocks_names(
                        block_type=protocol.DATA_BLOCK, number=data_number)

        self.logger.debug('file blocks mapping:\n' + pprint.pformat(mapping))

        # mapping missing / corrupted blocks
        valid_blocks = {}
        missing_data = {}
        missing_metadata = []
        validation_warning = True

        for metadata_number in mapping:
            metadata_path = mapping[metadata_number]['path']
            blocks_dict = mapping[metadata_number]['blocks']

            missing_data[metadata_number] = []

            # check if the metadata can be used
            valid_metadata = True
            if metadata_path is not None:
                try:
                    f = open(metadata_path, 'rb')
                    metadata = protocol.parse(f.read())
                    f.close()

                    if type(metadata) != dict:
                        raise Exception()

                except:
                    valid_metadata = False
            else:
                valid_metadata = False

            if valid_metadata:
                # if the metadata can be used, validate the blocks against it
                for block_number in blocks_dict:

                    if 'hashes' in metadata:
                        hashes = metadata['hashes']
                        if block_number in hashes:
                            block_hash = hashes[block_number]

                    block_list = blocks_dict[block_number]

                    if block_list == []:
                        missing_data[metadata_number].append(block_number)

                    for path in block_list:
                        f = open(path, 'rb')
                        content = f.read()
                        f.close()

                        validated_hash = encrypt.hash_string(content)
                        if validated_hash.lower() == block_hash.lower():
                            valid_blocks[block_number] = path
                            break

                '''
                if all variant of the blocks were tested against the metadata
                and none of them are valid, then add the block to the missing
                data. Later, try to recreate them
                '''
                for block_number in blocks_dict:
                    if block_number not in valid_blocks:
                        if block_number not in missing_data[metadata_number]:
                            missing_data[metadata_number].append(block_number)
            else:
                # if the metadata is not usable, choose the most common block
                missing_metadata.append(metadata_number)

                for block_number in blocks_dict:
                    if blocks_dict[block_number] != []:
                        content = []
                        for path in blocks_dict[block_number]:
                            f = open(path, 'rb')
                            content.append((path, f.read()))
                            f.close()

                        path, _ = max(set(content), key=content.count)
                        basename = os.path.basename(path)
                        number = parse_file_name(basename)['number']
                        number = int(number)
                        valid_blocks[number] = path
                    else:
                        missing_data[metadata_number].append(block_number)

                if validation_warning:
                    self.logger.warning('some blocks are not validated')
                    validation_warning = False

        self.logger.debug('valid blocks: %s' % pprint.pformat(valid_blocks))
        self.logger.debug('missing data: %s' % pprint.pformat(missing_data))
        self.logger.debug('missing metadata: %s' %
                          pprint.pformat(missing_metadata))

        # check if file can be restored, and recreate missing parts
        corrupted = False
        for metadata_number in missing_data:
            missing_count = len(missing_data[metadata_number])
            # if more then one block is missing per metadata block,
            # the file cannot be restored
            if missing_count > 1:
                corrupted = True
                break
            elif missing_count == 1:
                # if only one block is missing per metadata block, rebuild it
                if metadata_number not in missing_metadata:
                    # read metadata block
                    with open(mapping[metadata_number]['path'], 'rb') as f:
                        metadata = protocol.parse(f.read())

                    # calculate the first and last number of data block
                    # of the current metadata block
                    start_number = 1 + (self.validation_level *
                                        (metadata_number - 1))
                    end_number = self.validation_level * metadata_number
                    if end_number > self.block_number:
                        end_number = self.block_number

                    # restore the missing block using the metadata
                    restored = metadata['xor']
                    missing_block_number = missing_data[metadata_number][0]

                    for block_number in xrange(start_number, end_number + 1):
                        if block_number != missing_block_number:
                            with open(valid_blocks[block_number], 'rb') as f:
                                content = f.read()
                            restored = encrypt.xor_strings(restored, content)

                    # if the hash of the block is valid, write it to file
                    restored_hash = encrypt.hash_string(restored)
                    metadata_hash = metadata['hashes'][missing_block_number]

                    if restored_hash.lower() == metadata_hash.lower():
                        file_name = build_file_name(
                            block_type=protocol.DATA_BLOCK,
                            name=self.virtual_file,
                            number=missing_block_number)
                        path = os.path.join(self.temp, file_name)
                        restored_block = open(path, 'wb')
                        restored_block.write(restored)
                        restored_block.close()

                        valid_blocks[missing_block_number] = path
                    else:
                        corrupted = True
                        break
                else:
                    corrupted = True
                    break

        # if file can be restored, restore it
        if not corrupted:
            real_file = open(self.real_file, 'wb')
            for block_number in xrange(1, self.block_number + 1):
                path = valid_blocks[block_number]
                f = open(path, 'rb')
                real_file.write(f.read())
                f.close()
            real_file.close()
            self.logger.info('\'%s\' restored successfully' %
                             self.virtual_file)

        # else, tell the logic thread
        else:
            err = '\'%s\' is corrupted, could not restore' % self.virtual_file
            self.logger.error(err)
            message = protocol.thread.error(thread_id=self.ident, message=err)
            self.logic_queue.put(message)

        # executing exit operations
        self.callback()
        self.exit_thread(success=not corrupted)