def _pull_damaged_segment_data(connection, work_dir, node_name):
    """
    write out a tuple for each damaged segment_sequence
    """
    log = logging.getLogger("_pull_damaged_segment_data")
    damaged_segment_count = 0
    damaged_segment_file_path = \
            compute_damaged_segment_file_path(work_dir, node_name)
    damaged_segment_file = \
            gzip.GzipFile(filename=damaged_segment_file_path, mode="wb")

    group_object = itertools.groupby(_damaged_segment_generator(connection), 
                                     _group_key_function)
    for (unified_id, conjoined_part, ), damaged_segment_group in group_object:

        sequence_numbers = list()
        for damaged_segment_row in damaged_segment_group:
            sequence_numbers.extend(damaged_segment_row.sequence_numbers)

        assert len(sequence_numbers) > 0
        damaged_segment_dict = {"unified_id"        : unified_id, 
                                "conjoined_part"    : conjoined_part, 
                                "sequence_numbers"  : sequence_numbers, }
        store_sized_pickle(damaged_segment_dict, damaged_segment_file)
        damaged_segment_count += 1

    log.info("stored {0} damaged segment entries".format(damaged_segment_count))
Example #2
0
def _pull_damaged_segment_data(connection, work_dir, node_name):
    """
    write out a tuple for each damaged segment_sequence
    """
    log = logging.getLogger("_pull_damaged_segment_data")
    damaged_segment_count = 0
    damaged_segment_file_path = \
            compute_damaged_segment_file_path(work_dir, node_name)
    damaged_segment_file = \
            gzip.GzipFile(filename=damaged_segment_file_path, mode="wb")

    group_object = itertools.groupby(_damaged_segment_generator(connection),
                                     _group_key_function)
    for (
            unified_id,
            conjoined_part,
    ), damaged_segment_group in group_object:

        sequence_numbers = list()
        for damaged_segment_row in damaged_segment_group:
            sequence_numbers.extend(damaged_segment_row.sequence_numbers)

        assert len(sequence_numbers) > 0
        damaged_segment_dict = {
            "unified_id": unified_id,
            "conjoined_part": conjoined_part,
            "sequence_numbers": sequence_numbers,
        }
        store_sized_pickle(damaged_segment_dict, damaged_segment_file)
        damaged_segment_count += 1

    log.info(
        "stored {0} damaged segment entries".format(damaged_segment_count))
Example #3
0
 def __init__(self, work_dir, node_name):
     path = compute_segment_file_path(work_dir, node_name)
     self._segment_file = gzip.GzipFile(filename=path, mode="rb")
     path = compute_damaged_segment_file_path(work_dir, node_name)
     self._damaged_file = gzip.GzipFile(filename=path, mode="rb")
     self.segment_dict = None
     try:
         self._damaged_dict = retrieve_sized_pickle(self._damaged_file)
     except EOFError:
         self._damaged_dict = None
     self.advance()