def _pull_segment_data(connection, work_dir, node_name):
    """
    write out a dict for eqch segment_sequence
    """
    log = logging.getLogger("_pull_segment_data")
    result_generator = connection.generate_all_rows("""
        select {0} from nimbusio_node.segment
        where status <> 'C'
        order by unified_id, conjoined_part, handoff_node_id nulls last
    """.format(",".join(segment_row_template._fields), []))

    segment_row_count = 0

    segment_file_path = compute_segment_file_path(work_dir, node_name)
    segment_file = gzip.GzipFile(filename=segment_file_path, mode="wb")
    handoff_rows = list()
    for result in result_generator:
        segment_row = segment_row_template._make(result)
        if segment_row.file_hash is not None:
            segment_row = segment_row._replace(
                file_hash=bytes(segment_row.file_hash))
        if segment_row.handoff_node_id is not None:
            handoff_rows.append(segment_row._asdict())
            continue
        segment_dict = segment_row._asdict()
        segment_dict["handoff_rows"] = handoff_rows 
        store_sized_pickle(segment_dict, segment_file)
        segment_row_count += 1 
        handoff_rows = list()
    segment_file.close()

    log.info("stored {0} segment rows".format(segment_row_count))
Example #2
0
def _pull_segment_data(connection, work_dir, node_name):
    """
    write out a dict for eqch segment_sequence
    """
    log = logging.getLogger("_pull_segment_data")
    result_generator = connection.generate_all_rows("""
        select {0} from nimbusio_node.segment
        where status <> 'C'
        order by unified_id, conjoined_part, handoff_node_id nulls last
    """.format(",".join(segment_row_template._fields), []))

    segment_row_count = 0

    segment_file_path = compute_segment_file_path(work_dir, node_name)
    segment_file = gzip.GzipFile(filename=segment_file_path, mode="wb")
    handoff_rows = list()
    for result in result_generator:
        segment_row = segment_row_template._make(result)
        if segment_row.file_hash is not None:
            segment_row = segment_row._replace(
                file_hash=bytes(segment_row.file_hash))
        if segment_row.handoff_node_id is not None:
            handoff_rows.append(segment_row._asdict())
            continue
        segment_dict = segment_row._asdict()
        segment_dict["handoff_rows"] = handoff_rows
        store_sized_pickle(segment_dict, segment_file)
        segment_row_count += 1
        handoff_rows = list()
    segment_file.close()

    log.info("stored {0} segment rows".format(segment_row_count))
Example #3
0
 def __init__(self, work_dir, node_name):
     path = compute_segment_file_path(work_dir, node_name)
     self._segment_file = gzip.GzipFile(filename=path, mode="rb")
     path = compute_damaged_segment_file_path(work_dir, node_name)
     self._damaged_file = gzip.GzipFile(filename=path, mode="rb")
     self.segment_dict = None
     try:
         self._damaged_dict = retrieve_sized_pickle(self._damaged_file)
     except EOFError:
         self._damaged_dict = None
     self.advance()