def _manage_subprocesses(halt_event, merge_manager): log = logging.getLogger("_manage_subprocesses") group_object = itertools.groupby(merge_manager, _group_key_function) for (unified_id, conjoined_part, sequence_num), node_group in group_object: group_dict = { "unified_id": unified_id, "conjoined_part": conjoined_part, "sequence_num": sequence_num, "segment_status": None, "node_data": list() } if halt_event.is_set(): log.warn("halt_event set, exiting") break for ( _sequence_key, segment_status, node_data, ) in node_group: if group_dict["segment_status"] is None: group_dict["segment_status"] = segment_status assert segment_status == group_dict["segment_status"] group_dict["node_data"].append(node_data) log.debug("group: unified_id={0}, conjoined_part={1}, " "sequence_num={2}, segment_status={3}".format( group_dict["unified_id"], group_dict["conjoined_part"], group_dict["sequence_num"], group_dict["segment_status"])) store_sized_pickle(group_dict, sys.stdout.buffer)
def _pull_damaged_segment_data(connection, work_dir, node_name): """ write out a tuple for each damaged segment_sequence """ log = logging.getLogger("_pull_damaged_segment_data") damaged_segment_count = 0 damaged_segment_file_path = \ compute_damaged_segment_file_path(work_dir, node_name) damaged_segment_file = \ gzip.GzipFile(filename=damaged_segment_file_path, mode="wb") group_object = itertools.groupby(_damaged_segment_generator(connection), _group_key_function) for (unified_id, conjoined_part, ), damaged_segment_group in group_object: sequence_numbers = list() for damaged_segment_row in damaged_segment_group: sequence_numbers.extend(damaged_segment_row.sequence_numbers) assert len(sequence_numbers) > 0 damaged_segment_dict = {"unified_id" : unified_id, "conjoined_part" : conjoined_part, "sequence_numbers" : sequence_numbers, } store_sized_pickle(damaged_segment_dict, damaged_segment_file) damaged_segment_count += 1 log.info("stored {0} damaged segment entries".format(damaged_segment_count))
def _manage_subprocesses(halt_event, merge_manager): log = logging.getLogger("_manage_subprocesses") group_object = itertools.groupby(merge_manager, _group_key_function) for (unified_id, conjoined_part, sequence_num), node_group in group_object: group_dict = { "unified_id" : unified_id, "conjoined_part" : conjoined_part, "sequence_num" : sequence_num, "segment_status" : None, "node_data" : list() } if halt_event.is_set(): log.warn("halt_event set, exiting") break for (_sequence_key, segment_status, node_data, ) in node_group: if group_dict["segment_status"] is None: group_dict["segment_status"] = segment_status assert segment_status == group_dict["segment_status"] group_dict["node_data"].append(node_data) log.debug("group: unified_id={0}, conjoined_part={1}, " "sequence_num={2}, segment_status={3}".format( group_dict["unified_id"], group_dict["conjoined_part"], group_dict["sequence_num"], group_dict["segment_status"])) store_sized_pickle(group_dict, sys.stdout.buffer)
def _pull_segment_data(connection, work_dir, node_name): """ write out a dict for eqch segment_sequence """ log = logging.getLogger("_pull_segment_data") result_generator = connection.generate_all_rows(""" select {0} from nimbusio_node.segment where status <> 'C' order by unified_id, conjoined_part, handoff_node_id nulls last """.format(",".join(segment_row_template._fields), [])) segment_row_count = 0 segment_file_path = compute_segment_file_path(work_dir, node_name) segment_file = gzip.GzipFile(filename=segment_file_path, mode="wb") handoff_rows = list() for result in result_generator: segment_row = segment_row_template._make(result) if segment_row.file_hash is not None: segment_row = segment_row._replace( file_hash=bytes(segment_row.file_hash)) if segment_row.handoff_node_id is not None: handoff_rows.append(segment_row._asdict()) continue segment_dict = segment_row._asdict() segment_dict["handoff_rows"] = handoff_rows store_sized_pickle(segment_dict, segment_file) segment_row_count += 1 handoff_rows = list() segment_file.close() log.info("stored {0} segment rows".format(segment_row_count))
def _pull_damaged_segment_data(connection, work_dir, node_name): """ write out a tuple for each damaged segment_sequence """ log = logging.getLogger("_pull_damaged_segment_data") damaged_segment_count = 0 damaged_segment_file_path = \ compute_damaged_segment_file_path(work_dir, node_name) damaged_segment_file = \ gzip.GzipFile(filename=damaged_segment_file_path, mode="wb") group_object = itertools.groupby(_damaged_segment_generator(connection), _group_key_function) for ( unified_id, conjoined_part, ), damaged_segment_group in group_object: sequence_numbers = list() for damaged_segment_row in damaged_segment_group: sequence_numbers.extend(damaged_segment_row.sequence_numbers) assert len(sequence_numbers) > 0 damaged_segment_dict = { "unified_id": unified_id, "conjoined_part": conjoined_part, "sequence_numbers": sequence_numbers, } store_sized_pickle(damaged_segment_dict, damaged_segment_file) damaged_segment_count += 1 log.info( "stored {0} damaged segment entries".format(damaged_segment_count))
def audit_segments(halt_event, work_dir): log = logging.getLogger("audit_segments") if not os.path.exists(anti_entropy_dir): log.info("creating {0}".format(anti_entropy_dir)) os.mkdir(anti_entropy_dir) meta_repair_file_path = compute_meta_repair_file_path() meta_repair_file = \ gzip.GzipFile(filename=meta_repair_file_path, mode="wb") data_repair_file_path = compute_data_repair_file_path() data_repair_file = \ gzip.GzipFile(filename=data_repair_file_path, mode="wb") counts = { "total" : 0, anti_entropy_missing_replicas : 0, anti_entropy_incomplete_finalization : 0, anti_entropy_damaged_records : 0, anti_entropy_missing_tombstones : 0, anti_entropy_database_inconsistancy : 0, } current_time = create_timestamp() min_segment_age = parse_timedelta_str(_min_segment_age) newest_allowable_timestamp = current_time - min_segment_age log.info("newest allowable timestamp = {0}".format( newest_allowable_timestamp.isoformat())) for row_key, segment_status, segment_data in generate_work(work_dir): if halt_event.is_set(): log.info("halt_event is set: exiting") return assert segment_status == anti_entropy_pre_audit counts["total"] += 1 # missing replicas needs to run first, because the other tests # assume there are no missing replicas if _missing_replicas(segment_data, newest_allowable_timestamp): log.debug("missing_replicas {0}".format(row_key)) counts[anti_entropy_missing_replicas] += 1 store_sized_pickle( (row_key, anti_entropy_missing_replicas, segment_data, ), data_repair_file) continue # _missing_tombstones needs to run ahead of _incomplete_finalization if _missing_tombstones(segment_data, newest_allowable_timestamp): log.debug("missing_tombstones {0}".format(row_key)) counts[anti_entropy_missing_tombstones] += 1 store_sized_pickle( (row_key, anti_entropy_missing_tombstones, segment_data, ), meta_repair_file) continue if _incomplete_finalization(segment_data, newest_allowable_timestamp): log.debug("incomplete_finalization {0}".format(row_key)) counts[anti_entropy_incomplete_finalization] += 1 store_sized_pickle( (row_key, anti_entropy_incomplete_finalization, segment_data,), data_repair_file) continue if _damaged_records(segment_data): log.debug("damaged_records {0}".format(row_key)) counts[anti_entropy_damaged_records] += 1 store_sized_pickle( (row_key, anti_entropy_damaged_records, segment_data,), data_repair_file) continue if _database_inconsistancy(row_key, segment_data): log.debug("database_inconsistancy {0}".format(row_key)) counts[anti_entropy_database_inconsistancy] += 1 store_sized_pickle( (row_key, anti_entropy_database_inconsistency, segment_data,), data_repair_file) continue meta_repair_file.close() data_repair_file.close() keys = ["total", anti_entropy_missing_replicas, anti_entropy_incomplete_finalization, anti_entropy_damaged_records, anti_entropy_missing_tombstones, anti_entropy_database_inconsistancy] for key in keys: log.info("{0} {1:,}".format(key, counts[key]))
def _process_repair_entries(index, source_node_name, req_socket): log = logging.getLogger("_process_repair_entries") repair_file_path = compute_data_repair_file_path() log.debug("opening {0}".format(repair_file_path)) repair_file = gzip.GzipFile(filename=repair_file_path, mode="rb") record_number = 0 while True: try: row_key, segment_status, segment_data = \ retrieve_sized_pickle(repair_file) except EOFError: log.debug("EOF at record number {0}".format(record_number)) repair_file.close() return record_number damaged_sequence_numbers = list() for segment_row in segment_data: damaged_sequence_numbers.extend( segment_row["damaged_sequence_numbers"]) segment_row = segment_data[index] record_number += 1 result = {"record_number" : record_number, "action" : None, "part" : None, "zfec_padding_size" : None, "source_node_name" : source_node_name, "segment_num" : segment_row["segment_num"], "result" : None, "data" : None,} expected_slice_count = \ compute_expected_slice_count(segment_row["file_size"]) for sequence_num in range(0, expected_slice_count): result["data"] = None if sequence_num in damaged_sequence_numbers: log.debug("{0} damaged sequence {1}".format(row_key, sequence_num)) result["action"] = "read" result["part"] = _compute_part_label(sequence_num, expected_slice_count) try: result["zfec_padding_size"], data = \ _get_sequence_from_data_reader(req_socket, segment_row, sequence_num) except Exception as instance: log.exception("record #{0} sequence {1} {2}".format( record_number, sequence_num, instance)) result["result"] = "error" else: result["result"] = "success" result["data"] = data else: result["action"] = "skip" result["result"] = None unified_id, conjoined_part = row_key sequence_key = (unified_id, conjoined_part, sequence_num, segment_row["segment_num"]) log.debug("storing {0} {1}".format(sequence_key, result["action"])) store_sized_pickle((sequence_key, segment_status, result, ), sys.stdout.buffer)
def audit_segments(halt_event, work_dir): log = logging.getLogger("audit_segments") if not os.path.exists(anti_entropy_dir): log.info("creating {0}".format(anti_entropy_dir)) os.mkdir(anti_entropy_dir) meta_repair_file_path = compute_meta_repair_file_path() meta_repair_file = \ gzip.GzipFile(filename=meta_repair_file_path, mode="wb") data_repair_file_path = compute_data_repair_file_path() data_repair_file = \ gzip.GzipFile(filename=data_repair_file_path, mode="wb") counts = { "total": 0, anti_entropy_missing_replicas: 0, anti_entropy_incomplete_finalization: 0, anti_entropy_damaged_records: 0, anti_entropy_missing_tombstones: 0, anti_entropy_database_inconsistancy: 0, } current_time = create_timestamp() min_segment_age = parse_timedelta_str(_min_segment_age) newest_allowable_timestamp = current_time - min_segment_age log.info("newest allowable timestamp = {0}".format( newest_allowable_timestamp.isoformat())) for row_key, segment_status, segment_data in generate_work(work_dir): if halt_event.is_set(): log.info("halt_event is set: exiting") return assert segment_status == anti_entropy_pre_audit counts["total"] += 1 # missing replicas needs to run first, because the other tests # assume there are no missing replicas if _missing_replicas(segment_data, newest_allowable_timestamp): log.debug("missing_replicas {0}".format(row_key)) counts[anti_entropy_missing_replicas] += 1 store_sized_pickle(( row_key, anti_entropy_missing_replicas, segment_data, ), data_repair_file) continue # _missing_tombstones needs to run ahead of _incomplete_finalization if _missing_tombstones(segment_data, newest_allowable_timestamp): log.debug("missing_tombstones {0}".format(row_key)) counts[anti_entropy_missing_tombstones] += 1 store_sized_pickle(( row_key, anti_entropy_missing_tombstones, segment_data, ), meta_repair_file) continue if _incomplete_finalization(segment_data, newest_allowable_timestamp): log.debug("incomplete_finalization {0}".format(row_key)) counts[anti_entropy_incomplete_finalization] += 1 store_sized_pickle(( row_key, anti_entropy_incomplete_finalization, segment_data, ), data_repair_file) continue if _damaged_records(segment_data): log.debug("damaged_records {0}".format(row_key)) counts[anti_entropy_damaged_records] += 1 store_sized_pickle(( row_key, anti_entropy_damaged_records, segment_data, ), data_repair_file) continue if _database_inconsistancy(row_key, segment_data): log.debug("database_inconsistancy {0}".format(row_key)) counts[anti_entropy_database_inconsistancy] += 1 store_sized_pickle(( row_key, anti_entropy_database_inconsistency, segment_data, ), data_repair_file) continue meta_repair_file.close() data_repair_file.close() keys = [ "total", anti_entropy_missing_replicas, anti_entropy_incomplete_finalization, anti_entropy_damaged_records, anti_entropy_missing_tombstones, anti_entropy_database_inconsistancy ] for key in keys: log.info("{0} {1:,}".format(key, counts[key]))