def generate_all_sequence_rows( self, segment_unified_id, segment_conjoined_part, segment_num ): """ a generator to return sequence data for a segment in order """ open_value_files = dict() sequence_rows = _all_sequence_rows_for_segment( self._connection, segment_unified_id, segment_conjoined_part, segment_num ) # first yield is count of sequences yield len(sequence_rows) for sequence_row in sequence_rows: if not sequence_row.value_file_id in open_value_files: open_value_files[sequence_row.value_file_id] = open( compute_value_file_path( self._repository_path, sequence_row.value_file_id ), "r" ) value_file = open_value_files[sequence_row.value_file_id] value_file.seek(sequence_row.value_file_offset) yield sequence_row, value_file.read(sequence_row.size) for value_file in open_value_files.values(): value_file.close()
def __init__(self, connection, space_id, repository_path, expected_size=None): self._log = logging.getLogger("OutputValueFile") self._connection = connection assert space_id is not None self._space_id = space_id self._value_file_id = _get_next_value_file_id(connection) self._value_file_path = compute_value_file_path( repository_path, space_id, self._value_file_id) self._expected_size = expected_size self._log.debug("opening {0} expected size = {1}".format( self._value_file_path, self._expected_size)) value_file_dir = os.path.dirname(self._value_file_path) if not os.path.exists(value_file_dir): os.makedirs(value_file_dir) flags = os.O_WRONLY | os.O_CREAT self._value_file_fd = os.open(self._value_file_path, flags) self._creation_time = create_timestamp() self._size = 0 self._md5 = hashlib.md5() self._segment_sequence_count = 0 self._min_segment_id = None self._max_segment_id = None self._collection_ids = set()
def unlink_unreachable_value_files(connection, repository_path): """ clean out value files that are unreachable from the database """ log = logging.getLogger("unlink_unreachable_value_files") # 1) build up an accumulator of value files seen in the file system. # the repository contains some instances of directories 000-999 # where 'nnn' is value_file.id mod 1000 # in those directories, we're looking for filenames that are # 8 digit numbers, representing value_file.id filesystem_value_file_ids = set() for i in range(1000): dir_name = "{0:0>3}".format(i) dir_path = os.path.join(repository_path, dir_name) if os.path.isdir(dir_path): for name in os.listdir(dir_path): if len(name) != 8: continue try: value_file_id = int(name) except ValueError: continue filesystem_value_file_ids.add(value_file_id) # 2) explicitly rollback the connection's current transaction connection.rollback() # 3) do the "select id from value_file" and store in a set database_value_file_ids = set() for (value_file_id, ) in connection.fetch_all_rows( "select id from nimbusio_node.value_file", [] ): database_value_file_ids.add(value_file_id) # 4) unlink things from #1 not present in #3. unreachable_value_file_ids = filesystem_value_file_ids \ - database_value_file_ids total_value_file_size = 0 for value_file_id in list(unreachable_value_file_ids): value_file_path = compute_value_file_path(repository_path, value_file_id) value_file_size = os.path.getsize(value_file_path) log.info("unlinking unreachable value_file {0} size = {1}".format( value_file_path, value_file_size )) total_value_file_size += value_file_size try: os.unlink(value_file_path) except Exception: log.exception(value_file_path) log.info( "found {0:,} unreachable value files; savings={1:,}".format( len(unreachable_value_file_ids), total_value_file_size ) ) return total_value_file_size
def __init__(self, local_connection, repository_path, value_file_row): self._log = logging.getLogger("InputValueFile") self._local_connection = local_connection self._repository_path = repository_path self._value_file_row = value_file_row self._value_file_path = compute_value_file_path( repository_path, value_file_row.id ) self._value_file = open(self._value_file_path, "rb")
def _remove_old_value_files(repository_path, value_file_ids): log = logging.getLogger("_remove_old_value_files") for value_file_id in value_file_ids: value_file_path = \ compute_value_file_path(repository_path, value_file_id) try: os.unlink(value_file_path) except Exception: log.exception
def __init__(self, local_connection, repository_path, value_file_row): self._log = logging.getLogger("InputValueFile") self._local_connection = local_connection self._repository_path = repository_path self._value_file_row = value_file_row self._value_file_path = \ compute_value_file_path(repository_path, value_file_row.space_id, value_file_row.id) self._value_file = open(self._value_file_path, "rb")
def _unlink_value_files(connection, repository_path, unused_value_files): log = logging.getLogger("_unlink_value_files") for value_file_id, space_id in unused_value_files: connection.execute(_delete_value_file_query, [value_file_id, ]) value_file_path = compute_value_file_path( repository_path, space_id, value_file_id ) try: os.unlink(value_file_path) except Exception: log.exception(value_file_path)
def _remove_old_value_files(repository_path, value_file_keys): log = logging.getLogger("_remove_old_value_files") for value_file_id, space_id in value_file_keys: value_file_path = \ compute_value_file_path(repository_path, space_id, value_file_id) try: os.unlink(value_file_path) except Exception: log.exception(value_file_path)
def _unlink_value_files(connection, repository_path, unused_value_files): log = logging.getLogger("_unlink_value_files") for value_file_id, space_id in unused_value_files: connection.execute(_delete_value_file_query, [ value_file_id, ]) value_file_path = compute_value_file_path(repository_path, space_id, value_file_id) try: os.unlink(value_file_path) except Exception: log.exception(value_file_path)
def _verify_entry_against_value_file(entry): log = logging.getLogger("_verify_entry_against_vaue_file") value_file_path = compute_value_file_path(_repository_path, entry.space_id, entry.value_file_id) md5_sum = hashlib.md5() try: with open(value_file_path, "rb") as input_file: input_file.seek(entry.value_file_offset) md5_sum.update(input_file.read(entry.sequence_size)) except (OSError, IOError) as instance: log.error("Error seek/reading {0} {1}".format(value_file_path, instance)) return False return md5_sum.digest() == bytes(entry.sequence_hash)
def __init__(self, connection, space_id, repository_path): self._space_id = space_id self._value_file_id = _insert_value_file_default_row( connection, space_id) self._log = logging.getLogger("VF%08d" % (self._value_file_id, )) self._connection = connection self._value_file_path = compute_value_file_path( repository_path, space_id, self._value_file_id) self._log.info("opening %s" % (self._value_file_path, )) self._value_file_fd = _open_value_file(self._value_file_path) self._creation_time = create_timestamp() self._size = 0 self._md5 = hashlib.md5() self._segment_sequence_count = 0 self._min_segment_id = None self._max_segment_id = None self._collection_ids = set() self._synced = True # treat as synced until we write
def __init__(self, connection, space_id, repository_path): self._space_id = space_id self._value_file_id = _insert_value_file_default_row(connection, space_id) self._log = logging.getLogger("VF%08d" % (self._value_file_id, )) self._connection = connection self._value_file_path = compute_value_file_path( repository_path, space_id, self._value_file_id ) self._log.info("opening %s" % (self._value_file_path, )) self._value_file_fd = _open_value_file(self._value_file_path) self._creation_time = create_timestamp() self._size = 0 self._md5 = hashlib.md5() self._segment_sequence_count = 0 self._min_segment_id = None self._max_segment_id = None self._collection_ids = set() self._synced = True # treat as synced until we write
def _process_request(resources): """ Wait for a reply to our last message from the controller. """ log = logging.getLogger("_process_one_transaction") log.debug("waiting work request") try: request = resources.dealer_socket.recv_pyobj() except zmq.ZMQError as zmq_error: if is_interrupted_system_call(zmq_error): raise InterruptedSystemCall() raise assert resources.dealer_socket.rcvmore control = resources.dealer_socket.recv_pyobj() log.debug("user_request_id = {0}; control = {1}".format( request["user-request-id"], control)) assert resources.dealer_socket.rcvmore sequence_row = resources.dealer_socket.recv_pyobj() value_file_path = compute_value_file_path(_repository_path, sequence_row["space_id"], sequence_row["value_file_id"]) control["result"] = "success" control["error-message"] = "" if value_file_path in resources.file_cache: value_file, _ = resources.file_cache[value_file_path] del resources.file_cache[value_file_path] else: try: value_file = open(value_file_path, "rb") except Exception as instance: log.exception("user_request_id = {0}, " \ "read {1}".format(request["user-request-id"], value_file_path)) resources.event_push_client.exception("error_opening_value_file", str(instance)) control["result"] = "error_opening_value_file" control["error-message"] = str(instance) if control["result"] != "success": _send_error_reply(resources, request, control) return read_offset = \ sequence_row["value_file_offset"] + \ (control["left-offset"] * encoded_block_slice_size) read_size = \ sequence_row["size"] - \ (control["left-offset"] * encoded_block_slice_size) - \ (control["right-offset"] * encoded_block_slice_size) # Ticket #84 handle a short block # the last block in the file may be smaller than encoded_block_slice_size # so we might have subtracted too much for the right offset if control["right-offset"] > 0: block_modulus = sequence_row["size"] % encoded_block_slice_size last_block_size = (encoded_block_slice_size if block_modulus == 0 else \ block_modulus) last_block_delta = encoded_block_slice_size - last_block_size read_size += last_block_delta try: value_file.seek(read_offset) encoded_data = value_file.read(read_size) except Exception as instance: log.exception("user_request_id = {0}, " \ "read {1}".format(request["user-request-id"], value_file_path)) resources.event_push_client.exception("error_reading_value_file", str(instance)) control["result"] = "error_reading_vqalue_file" control["error-message"] = str(instance) if control["result"] != "success": value_file.close() _send_error_reply(resources, request, control) return resources.file_cache[value_file_path] = value_file, time.time() if len(encoded_data) != read_size: error_message = "{0} size mismatch {1} {2}".format( request["retrieve-id"], len(encoded_data), read_size) log.error("user_request_id = {0}, {1}".format(request["user-request-id"], error_message)) resources.event_push_client.error("size_mismatch", error_message) control["result"] = "size_mismatch" control["error-message"] = error_message if control["result"] != "success": _send_error_reply(resources, request, control) return encoded_block_list = list(encoded_block_generator(encoded_data)) segment_size = 0 segment_adler32 = 0 segment_md5 = hashlib.md5() for encoded_block in encoded_block_list: segment_size += len(encoded_block) segment_adler32 = zlib.adler32(encoded_block, segment_adler32) segment_md5.update(encoded_block) segment_md5_digest = segment_md5.digest() reply = { "message-type" : "retrieve-key-reply", "user-request-id" : request["user-request-id"], "client-tag" : request["client-tag"], "message-id" : request["message-id"], "retrieve-id" : request["retrieve-id"], "segment-unified-id" : request["segment-unified-id"], "segment-num" : request["segment-num"], "segment-size" : segment_size, "zfec-padding-size" : sequence_row["zfec_padding_size"], "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5_digest).decode("utf-8"), "sequence-num" : None, "completed" : control["completed"], "result" : "success", "error-message" : "", } push_socket = _get_reply_push_socket(resources, request["client-address"]) push_socket.send_json(reply, zmq.SNDMORE) for encoded_block in encoded_block_list[:-1]: push_socket.send(encoded_block, zmq.SNDMORE) push_socket.send(encoded_block_list[-1])
def _value_file_status(connection, entry): log = logging.getLogger("_value_file_status") batch_key = make_batch_key(entry) value_file_path = compute_value_file_path(_repository_path, entry.space_id, entry.value_file_id) # Always do a stat on the value file. try: stat_result = os.stat(value_file_path) except OSError as instance: # If the value file is missing, consider all of the segment_sequences # to be missing, and handle it as such. if instance.errno == errno.ENOENT: log.error("value file missing {0} {1}".format(batch_key, value_file_path)) return _value_file_missing log.error("Error stat'ing value file {0} {1} {2}".format( str(instance), batch_key, value_file_path)) raise # If the value file is still open, consider all data in it undammaged. if entry.value_file_close_time is None: return _value_file_valid # If the value file exists, is closed, and has an md5 in the database, # has a size in the database, and the size in the stat matches the size # in the database, and has a close_time or a last_integrity_check_time # that is younger than (MAX_TIME_BETWEEN_VALUE_FILE_INTEGRITY_CHECK) # consider all records in the file undammaged. (This is the common case.) if entry.value_file_hash is None: log.info("Value file row has no md5 hash {0} {1}".format(batch_key, entry)) return _value_file_questionable if entry.value_file_size is None: log.info("Value file row has no size {0} {1}".format(batch_key, entry)) return _value_file_questionable if entry.value_file_size != stat_result.st_size: log.info("Value file row size {0} != stat size {1} {2}".format( entry.value_file_size, stat_result.st_size, batch_key)) return _value_file_questionable current_time = create_timestamp() value_file_row_age = current_time - entry.value_file_close_time if entry.value_file_last_integrity_check_time is not None: value_file_row_age = \ current_time - entry.value_file_last_integrity_check_time if value_file_row_age < _max_value_file_time: return _value_file_valid value_file_result = _value_file_valid # If the value matches all the previous criteria EXCEPT the # MAX_TIME_BETWEEN_VALUE_FILE_INTEGRITY_CHECK, then read the whole file, # and calculate the md5. If it matches, consider the whole file good as # above. Update last_integrity_check_time regardless. md5_sum = hashlib.md5() try: with open(value_file_path, "rb") as input_file: while True: data = input_file.read(_read_buffer_size) if len(data) == 0: break md5_sum.update(data) except (OSError, IOError) as instance: log.error("Error reading {0} {1}".format(value_file_path, instance)) value_file_result = _value_file_questionable if value_file_result == _value_file_valid and \ md5_sum.digest() != bytes(entry.value_file_hash): log.error( "md5 mismatch {0} {1} {2} {3}".format(md5_sum.digest(), bytes(entry.value_file_hash), batch_key, value_file_path)) value_file_result = _value_file_questionable # we're only supposed to do this after we've also read the file # and inserted any damage. not before. otherwise it's a race condition -- # we may crash before finishing checking the file, and then the file # doesn't get checked, but it's marked as checked. _update_value_file_last_integrity_check_time(connection, entry.value_file_id, create_timestamp()) return value_file_result
def rewrite_value_files(options, connection, repository_path, ref_generator): log = logging.getLogger("_rewrite_value_files") max_sort_mem = options.max_sort_mem * 1024 ** 3 total_batch_size = 0 total_output_size = 0 savings = 0 batch_size = 0 refs = list() value_file_data = dict() while True: try: ref = next(ref_generator) except StopIteration: break # this should be the start of a partition assert ref.value_row_num == 1, ref if batch_size + ref.value_file_size > max_sort_mem: connection.begin_transaction() try: output_size = _process_batch(connection, repository_path, refs, value_file_data) except Exception: connection.rollback() raise connection.commit() _remove_old_value_files(repository_path, value_file_data.keys()) total_batch_size += batch_size total_output_size += output_size savings = batch_size - output_size log.debug( "batch_size={0:,}, output_size={1:,}, savings={2:,}".format( batch_size, output_size, savings )) batch_size = 0 refs = list() value_file_data = dict() batch_size += ref.value_file_size # get the value file data # TODO: we should only store the actual references from the files into # memory, not keep the whole files into memory. Keeping the whole file # in memory means we're using memory for parts of the files that are # garbage, effectively decreasing the size of our output sort batch. # We could end up with very small outputs from each batch if a large # portion of the input value files are garbage. value_file_key = (ref.value_file_id, ref.space_id, ) assert value_file_key not in value_file_data value_file_path = \ compute_value_file_path(repository_path, ref.space_id, ref.value_file_id) with open(value_file_path, "rb") as input_file: value_file_data[value_file_key] = input_file.read() # load up the refs for this partition refs.append(ref) for _ in range(ref.value_row_count-1): refs.append(next(ref_generator)) if len(refs) > 0: connection.begin_transaction() try: output_size = _process_batch(connection, repository_path, refs, value_file_data) except Exception: connection.rollback() raise connection.commit() _remove_old_value_files(repository_path, value_file_data.keys()) total_batch_size += batch_size total_output_size += output_size savings = batch_size - output_size log.debug("batch_size={0:,}, output_size={1:,}, savings={2:,}".format( batch_size, output_size, savings )) savings = total_batch_size - total_output_size log.info( "total_batch_size={0:,} total_output_size={1:,} savings={2:,}".format( total_batch_size, total_output_size, savings )) return savings
def unlink_unreachable_value_files(connection, repository_path): """ clean out value files that are unreachable from the database """ log = logging.getLogger("unlink_unreachable_value_files") # 1) build up an accumulator of value files seen in the file system. # the repository contains some instances of directories 000-999 # where 'nnn' is value_file.id mod 1000 # in those directories, we're looking for filenames that are # 8 digit numbers, representing value_file.id filesystem_value_file_ids = set() for i in range(1000): dir_name = "{0:0>3}".format(i) dir_path = os.path.join(repository_path, dir_name) if os.path.isdir(dir_path): for name in os.listdir(dir_path): if len(name) != 8: continue try: value_file_id = int(name) except ValueError: continue filesystem_value_file_ids.add(value_file_id) # 2) explicitly rollback the connection's current transaction # not needed because we are in auto-commit mode. # connection.rollback() assert not connection._in_transaction # 3) do the "select id from value_file" and store in a set database_value_file_ids = dict() for ( value_file_id, space_id, ) in connection.fetch_all_rows( "select id, space_id from nimbusio_node.value_file", []): database_value_file_ids[value_file_id] = space_id # 4) unlink things from #1 not present in #3. unreachable_value_file_ids = filesystem_value_file_ids \ - set(database_value_file_ids) total_value_file_size = 0 for value_file_id in list(unreachable_value_file_ids): value_file_path = compute_value_file_path( repository_path, database_value_file_ids[value_file_id], value_file_id) value_file_size = os.path.getsize(value_file_path) log.info("unlinking unreachable value_file {0} size = {1}".format( value_file_path, value_file_size)) total_value_file_size += value_file_size try: os.unlink(value_file_path) except Exception: log.exception(value_file_path) log.info("found {0:,} unreachable value files; savings={1:,}".format( len(unreachable_value_file_ids), total_value_file_size)) return total_value_file_size
def _process_request(resources): """ Wait for a reply to our last message from the controller. """ log = logging.getLogger("_process_one_transaction") log.debug("waiting work request") try: request = resources.dealer_socket.recv_pyobj() except zmq.ZMQError as zmq_error: if is_interrupted_system_call(zmq_error): raise InterruptedSystemCall() raise assert resources.dealer_socket.rcvmore control = resources.dealer_socket.recv_pyobj() log.debug("user_request_id = {0}; control = {1}".format( request["user-request-id"], control)) assert resources.dealer_socket.rcvmore sequence_row = resources.dealer_socket.recv_pyobj() value_file_path = compute_value_file_path(_repository_path, sequence_row["space_id"], sequence_row["value_file_id"]) control["result"] = "success" control["error-message"] = "" if value_file_path in resources.file_cache: value_file, _ = resources.file_cache[value_file_path] del resources.file_cache[value_file_path] else: try: value_file = open(value_file_path, "rb") except Exception as instance: log.exception("user_request_id = {0}, " \ "read {1}".format(request["user-request-id"], value_file_path)) resources.event_push_client.exception("error_opening_value_file", str(instance)) control["result"] = "error_opening_value_file" control["error-message"] = str(instance) if control["result"] != "success": _send_error_reply(resources, request, control) return read_offset = \ sequence_row["value_file_offset"] + \ (control["left-offset"] * encoded_block_slice_size) read_size = \ sequence_row["size"] - \ (control["left-offset"] * encoded_block_slice_size) - \ (control["right-offset"] * encoded_block_slice_size) # Ticket #84 handle a short block # the last block in the file may be smaller than encoded_block_slice_size # so we might have subtracted too much for the right offset if control["right-offset"] > 0: block_modulus = sequence_row["size"] % encoded_block_slice_size last_block_size = (encoded_block_slice_size if block_modulus == 0 else \ block_modulus) last_block_delta = encoded_block_slice_size - last_block_size read_size += last_block_delta try: value_file.seek(read_offset) encoded_data = value_file.read(read_size) except Exception as instance: log.exception("user_request_id = {0}, " \ "read {1}".format(request["user-request-id"], value_file_path)) resources.event_push_client.exception("error_reading_value_file", str(instance)) control["result"] = "error_reading_vqalue_file" control["error-message"] = str(instance) if control["result"] != "success": value_file.close() _send_error_reply(resources, request, control) return resources.file_cache[value_file_path] = value_file, time.time() if len(encoded_data) != read_size: error_message = "{0} size mismatch {1} {2}".format( request["retrieve-id"], len(encoded_data), read_size) log.error("user_request_id = {0}, {1}".format( request["user-request-id"], error_message)) resources.event_push_client.error("size_mismatch", error_message) control["result"] = "size_mismatch" control["error-message"] = error_message if control["result"] != "success": _send_error_reply(resources, request, control) return encoded_block_list = list(encoded_block_generator(encoded_data)) segment_size = 0 segment_adler32 = 0 segment_md5 = hashlib.md5() for encoded_block in encoded_block_list: segment_size += len(encoded_block) segment_adler32 = zlib.adler32(encoded_block, segment_adler32) segment_md5.update(encoded_block) segment_md5_digest = segment_md5.digest() reply = { "message-type": "retrieve-key-reply", "user-request-id": request["user-request-id"], "client-tag": request["client-tag"], "message-id": request["message-id"], "retrieve-id": request["retrieve-id"], "segment-unified-id": request["segment-unified-id"], "segment-num": request["segment-num"], "segment-size": segment_size, "zfec-padding-size": sequence_row["zfec_padding_size"], "segment-adler32": segment_adler32, "segment-md5-digest": b64encode(segment_md5_digest).decode("utf-8"), "sequence-num": None, "completed": control["completed"], "result": "success", "error-message": "", } push_socket = _get_reply_push_socket(resources, request["client-address"]) push_socket.send_json(reply, zmq.SNDMORE) for encoded_block in encoded_block_list[:-1]: push_socket.send(encoded_block, zmq.SNDMORE) push_socket.send(encoded_block_list[-1])
def rewrite_value_files(options, connection, repository_path, ref_generator): log = logging.getLogger("_rewrite_value_files") max_sort_mem = options.max_sort_mem * 1024**3 total_batch_size = 0 total_output_size = 0 savings = 0 batch_size = 0 refs = list() value_file_data = dict() while True: try: ref = next(ref_generator) except StopIteration: break # this should be the start of a partition assert ref.value_row_num == 1, ref if batch_size + ref.value_file_size > max_sort_mem: connection.begin_transaction() try: output_size = _process_batch(connection, repository_path, refs, value_file_data) except Exception: connection.rollback() raise connection.commit() _remove_old_value_files(repository_path, value_file_data.keys()) total_batch_size += batch_size total_output_size += output_size savings = batch_size - output_size log.debug( "batch_size={0:,}, output_size={1:,}, savings={2:,}".format( batch_size, output_size, savings)) batch_size = 0 refs = list() value_file_data = dict() batch_size += ref.value_file_size # get the value file data # TODO: we should only store the actual references from the files into # memory, not keep the whole files into memory. Keeping the whole file # in memory means we're using memory for parts of the files that are # garbage, effectively decreasing the size of our output sort batch. # We could end up with very small outputs from each batch if a large # portion of the input value files are garbage. value_file_key = ( ref.value_file_id, ref.space_id, ) assert value_file_key not in value_file_data value_file_path = \ compute_value_file_path(repository_path, ref.space_id, ref.value_file_id) with open(value_file_path, "rb") as input_file: value_file_data[value_file_key] = input_file.read() # load up the refs for this partition refs.append(ref) for _ in range(ref.value_row_count - 1): refs.append(next(ref_generator)) if len(refs) > 0: connection.begin_transaction() try: output_size = _process_batch(connection, repository_path, refs, value_file_data) except Exception: connection.rollback() raise connection.commit() _remove_old_value_files(repository_path, value_file_data.keys()) total_batch_size += batch_size total_output_size += output_size savings = batch_size - output_size log.debug("batch_size={0:,}, output_size={1:,}, savings={2:,}".format( batch_size, output_size, savings)) savings = total_batch_size - total_output_size log.info( "total_batch_size={0:,} total_output_size={1:,} savings={2:,}".format( total_batch_size, total_output_size, savings)) return savings