def _pull_segment_data(connection, work_dir, node_name): """ write out a dict for eqch segment_sequence """ log = logging.getLogger("_pull_segment_data") result_generator = connection.generate_all_rows(""" select {0} from nimbusio_node.segment where status <> 'C' order by unified_id, conjoined_part, handoff_node_id nulls last """.format(",".join(segment_row_template._fields), [])) segment_row_count = 0 segment_file_path = compute_segment_file_path(work_dir, node_name) segment_file = gzip.GzipFile(filename=segment_file_path, mode="wb") handoff_rows = list() for result in result_generator: segment_row = segment_row_template._make(result) if segment_row.file_hash is not None: segment_row = segment_row._replace( file_hash=bytes(segment_row.file_hash)) if segment_row.handoff_node_id is not None: handoff_rows.append(segment_row._asdict()) continue segment_dict = segment_row._asdict() segment_dict["handoff_rows"] = handoff_rows store_sized_pickle(segment_dict, segment_file) segment_row_count += 1 handoff_rows = list() segment_file.close() log.info("stored {0} segment rows".format(segment_row_count))
def _pull_segment_data(connection, work_dir, node_name): """ write out a dict for eqch segment_sequence """ log = logging.getLogger("_pull_segment_data") result_generator = connection.generate_all_rows(""" select {0} from nimbusio_node.segment where status <> 'C' order by unified_id, conjoined_part, handoff_node_id nulls last """.format(",".join(segment_row_template._fields), [])) segment_row_count = 0 segment_file_path = compute_segment_file_path(work_dir, node_name) segment_file = gzip.GzipFile(filename=segment_file_path, mode="wb") handoff_rows = list() for result in result_generator: segment_row = segment_row_template._make(result) if segment_row.file_hash is not None: segment_row = segment_row._replace( file_hash=bytes(segment_row.file_hash)) if segment_row.handoff_node_id is not None: handoff_rows.append(segment_row._asdict()) continue segment_dict = segment_row._asdict() segment_dict["handoff_rows"] = handoff_rows store_sized_pickle(segment_dict, segment_file) segment_row_count += 1 handoff_rows = list() segment_file.close() log.info("stored {0} segment rows".format(segment_row_count))
def __init__(self, work_dir, node_name): path = compute_segment_file_path(work_dir, node_name) self._segment_file = gzip.GzipFile(filename=path, mode="rb") path = compute_damaged_segment_file_path(work_dir, node_name) self._damaged_file = gzip.GzipFile(filename=path, mode="rb") self.segment_dict = None try: self._damaged_dict = retrieve_sized_pickle(self._damaged_file) except EOFError: self._damaged_dict = None self.advance()