def _process_repair_entries(index, source_node_name, req_socket): log = logging.getLogger("_process_repair_entries") repair_file_path = compute_data_repair_file_path() log.debug("opening {0}".format(repair_file_path)) repair_file = gzip.GzipFile(filename=repair_file_path, mode="rb") record_number = 0 while True: try: row_key, segment_status, segment_data = \ retrieve_sized_pickle(repair_file) except EOFError: log.debug("EOF at record number {0}".format(record_number)) repair_file.close() return record_number damaged_sequence_numbers = list() for segment_row in segment_data: damaged_sequence_numbers.extend( segment_row["damaged_sequence_numbers"]) segment_row = segment_data[index] record_number += 1 result = {"record_number" : record_number, "action" : None, "part" : None, "zfec_padding_size" : None, "source_node_name" : source_node_name, "segment_num" : segment_row["segment_num"], "result" : None, "data" : None,} expected_slice_count = \ compute_expected_slice_count(segment_row["file_size"]) for sequence_num in range(0, expected_slice_count): result["data"] = None if sequence_num in damaged_sequence_numbers: log.debug("{0} damaged sequence {1}".format(row_key, sequence_num)) result["action"] = "read" result["part"] = _compute_part_label(sequence_num, expected_slice_count) try: result["zfec_padding_size"], data = \ _get_sequence_from_data_reader(req_socket, segment_row, sequence_num) except Exception as instance: log.exception("record #{0} sequence {1} {2}".format( record_number, sequence_num, instance)) result["result"] = "error" else: result["result"] = "success" result["data"] = data else: result["action"] = "skip" result["result"] = None unified_id, conjoined_part = row_key sequence_key = (unified_id, conjoined_part, sequence_num, segment_row["segment_num"]) log.debug("storing {0} {1}".format(sequence_key, result["action"])) store_sized_pickle((sequence_key, segment_status, result, ), sys.stdout.buffer)
def _process_work_batch(connection, known_value_files, batch): log = logging.getLogger("_process_work_batch") assert len(batch) > 0 batch_key = make_batch_key(batch[0]) log.info("batch {0}".format(batch_key)) missing_sequence_numbers = list() defective_sequence_numbers = list() expected_slice_count = compute_expected_slice_count(batch[0].file_size) expected_sequence_numbers = set(range(0, expected_slice_count)) actual_sequence_numbers = set([entry.sequence_num for entry in batch]) missing_sequence_numbers.extend( list(expected_sequence_numbers - actual_sequence_numbers)) for entry in batch: if not entry.value_file_id in known_value_files: known_value_files[entry.value_file_id] = \ _value_file_status(connection, entry) value_file_status = known_value_files[entry.value_file_id] # if we don't have a value_file for any sequence, # treat that as missing too if value_file_status == _value_file_missing: log.info("Missing value file {0} for {1} sequence {2}".format( entry.value_file_id, batch_key, entry.sequence_num)) missing_sequence_numbers.append(entry.sequence_num) continue if not _always_check_entries: if value_file_status == _value_file_valid: continue # if none of the above branches were fruitful, # then all records in the database that point to this value file # must be verified by opening, seeking, reading, and hashing the # record pointed to in the value file. This will be terribly costly # in terms of IO because our work is not sorted by value file. # Fortunately, data corruption should be rare enough that the # efficiency will be irrelevant assert value_file_status == _value_file_questionable if not _verify_entry_against_value_file(entry): log.info("Defective value file {0} for {1} sequence {2}".format( entry.value_file_id, batch_key, entry.sequence_num)) defective_sequence_numbers.append(entry.sequence_num) continue if len(missing_sequence_numbers) > 0: missing_sequence_numbers.sort() log.info("missing sequence numbers {0}".format( missing_sequence_numbers)) _store_damaged_segment(connection, batch[0], damaged_segment_missing_sequence, missing_sequence_numbers) if len(defective_sequence_numbers) > 0: defective_sequence_numbers.sort() log.info("defective sequence numbers {0}".format( defective_sequence_numbers)) _store_damaged_segment(connection, batch[0], damaged_segment_defective_sequence, defective_sequence_numbers)