def __init__(self, connection, space_id, repository_path, expected_size=None): self._log = logging.getLogger("OutputValueFile") self._connection = connection assert space_id is not None self._space_id = space_id self._value_file_id = _get_next_value_file_id(connection) self._value_file_path = compute_value_file_path( repository_path, space_id, self._value_file_id) self._expected_size = expected_size self._log.debug("opening {0} expected size = {1}".format( self._value_file_path, self._expected_size)) value_file_dir = os.path.dirname(self._value_file_path) if not os.path.exists(value_file_dir): os.makedirs(value_file_dir) flags = os.O_WRONLY | os.O_CREAT self._value_file_fd = os.open(self._value_file_path, flags) self._creation_time = create_timestamp() self._size = 0 self._md5 = hashlib.md5() self._segment_sequence_count = 0 self._min_segment_id = None self._max_segment_id = None self._collection_ids = set()
def test_handoff_small_content(self): """test retrieving content that fits in a single message""" file_size = 10 * 64 * 1024 file_content = random_string(file_size) collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 5 file_adler32 = zlib.adler32(file_content) file_md5 = hashlib.md5(file_content) message = { "message-type" : "archive-key-entire", "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } g = gevent.spawn(self._send_message_get_reply, message, file_content) g.join(timeout=10.0) self.assertEqual(g.ready(), True) reply = g.value self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") print >> sys.stderr, "archive successful" print >> sys.stderr, "press [Enter] to continue" raw_input()
def close(self): """close the file and make it visible in the database""" self.sync() os.close(self._value_file_fd) if self._segment_sequence_count == 0: self._log.info("removing empty file %s" % (self._value_file_path,)) try: os.unlink(self._value_file_path) except Exception: pass return self._log.info("closing %s size=%s segment_sequence_count=%s" % ( self._value_file_path, self._size, self._segment_sequence_count )) value_file_row = value_file_template( id=self._value_file_id, space_id=self._space_id, creation_time=self._creation_time, close_time=create_timestamp(), size=self._size, hash=psycopg2.Binary(self._md5.digest()), segment_sequence_count=self._segment_sequence_count, min_segment_id=self._min_segment_id, max_segment_id=self._max_segment_id, distinct_collection_count=len(self._collection_ids), collection_ids=sorted(list(self._collection_ids)), garbage_size_estimate=0, fragmentation_estimate=0, last_cleanup_check_time=None, last_integrity_check_time=None ) _update_value_file_row(self._connection, value_file_row)
def close(self): """close the file and make it visible in the database""" self._log.debug("closing %s size=%s segment_sequence_count=%s" % ( self._value_file_path, self._size, self._segment_sequence_count )) os.fsync(self._value_file_fd) os.close(self._value_file_fd) value_file_row = value_file_template( id=self._value_file_id, space_id=self._space_id, creation_time=self._creation_time, close_time=create_timestamp(), size=self._size, hash=psycopg2.Binary(self._md5.digest()), segment_sequence_count=self._segment_sequence_count, min_segment_id=self._min_segment_id, max_segment_id=self._max_segment_id, distinct_collection_count=len(self._collection_ids), collection_ids=sorted(list(self._collection_ids)), garbage_size_estimate=0, fragmentation_estimate=0, last_cleanup_check_time=None, last_integrity_check_time=None ) _insert_value_file_row(self._connection, value_file_row)
def test_handoff_small_content(self): """test retrieving content that fits in a single message""" file_size = 10 * 64 * 1024 file_content = random_string(file_size) collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 5 file_adler32 = zlib.adler32(file_content) file_md5 = hashlib.md5(file_content) message = { "message-type": "archive-key-entire", "priority": archive_priority, "collection-id": collection_id, "key": key, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "file-size": file_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "handoff-node-name": None, } g = gevent.spawn(self._send_message_get_reply, message, file_content) g.join(timeout=10.0) self.assertEqual(g.ready(), True) reply = g.value self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") print >> sys.stderr, "archive successful" print >> sys.stderr, "press [Enter] to continue" raw_input()
def close(self): """close the file and make it visible in the database""" self.sync() os.close(self._value_file_fd) if self._segment_sequence_count == 0: self._log.info("removing empty file %s" % (self._value_file_path, )) try: os.unlink(self._value_file_path) except Exception: pass return self._log.info( "closing %s size=%s segment_sequence_count=%s" % (self._value_file_path, self._size, self._segment_sequence_count)) value_file_row = value_file_template( id=self._value_file_id, space_id=self._space_id, creation_time=self._creation_time, close_time=create_timestamp(), size=self._size, hash=psycopg2.Binary(self._md5.digest()), segment_sequence_count=self._segment_sequence_count, min_segment_id=self._min_segment_id, max_segment_id=self._max_segment_id, distinct_collection_count=len(self._collection_ids), collection_ids=sorted(list(self._collection_ids)), garbage_size_estimate=0, fragmentation_estimate=0, last_cleanup_check_time=None, last_integrity_check_time=None) _update_value_file_row(self._connection, value_file_row)
def _handle_anti_entropy_audit_request(state, message, _data): """handle a requst to audit a specific collection, not some random one""" log = logging.getLogger("_handle_anti_entropy_audit_request") timestamp = create_timestamp() state_key = (message["collection-id"], timestamp, ) database = AuditResultDatabase(state["central-database-connection"]) row_id = database.start_audit(message["collection-id"], timestamp) database.close() state["active-requests"][state_key] = _request_state_tuple( client_tag=message["client-tag"], timestamp=timestamp, timeout=time.time()+_request_timeout, retry_count=max_retry_count, replies=dict(), row_id=row_id, ) request = { "message-type" : "consistency-check", "collection-id" : message["collection-id"], "timestamp-repr": repr(timestamp), } for anti_entropy_client in state["anti-entropy-clients"]: anti_entropy_client.queue_message_for_send(request)
def close(self): """close the file and make it visible in the database""" self._log.debug( "closing %s size=%s segment_sequence_count=%s" % (self._value_file_path, self._size, self._segment_sequence_count)) os.fsync(self._value_file_fd) os.close(self._value_file_fd) value_file_row = value_file_template( id=self._value_file_id, space_id=self._space_id, creation_time=self._creation_time, close_time=create_timestamp(), size=self._size, hash=psycopg2.Binary(self._md5.digest()), segment_sequence_count=self._segment_sequence_count, min_segment_id=self._min_segment_id, max_segment_id=self._max_segment_id, distinct_collection_count=len(self._collection_ids), collection_ids=sorted(list(self._collection_ids)), garbage_size_estimate=0, fragmentation_estimate=0, last_cleanup_check_time=None, last_integrity_check_time=None) _insert_value_file_row(self._connection, value_file_row)
def run(self, halt_event): """ send 'request-handoffs' to all remote handoff servers """ if halt_event.is_set(): return if len(self._state["pending-handoffs"]) > 0: # run again later return [(self.run, time.time() + _delay_interval, )] self._log.debug("sending handoff requests") message = { "message-type" : "request-handoffs", "request-timestamp-repr" : repr(create_timestamp()), "node-name" : self._local_node_name, "node-id" : self._local_node_id, } for handoff_server_client in self._state["handoff-server-clients"]: handoff_server_client.queue_message_for_send(message) return [(self.run, self.next_run(), )]
def _start_consistency_check(state, collection_id, row_id=None, retry_count=0): log = logging.getLogger("_start_consistency_check") timestamp = create_timestamp() state_key = (collection_id, timestamp, ) database = AuditResultDatabase(state["central-database-connection"]) if row_id is None: row_id = database.start_audit(collection_id, timestamp) else: database.restart_audit(row_id, timestamp) database.close() state["active-requests"][state_key] = _request_state_tuple( client_tag=None, timestamp=timestamp, timeout=time.time()+_request_timeout, retry_count=retry_count, replies=dict(), row_id=row_id, ) request = { "message-type" : "consistency-check", "collection-id" : collection_id, "timestamp-repr": repr(timestamp), } for anti_entropy_client in state["anti-entropy-clients"]: anti_entropy_client.queue_message_for_send(request)
def test_purge_nonexistent_key(self): """test purgeing a key that does not exist, with no complicatons""" collection_id = 1001 key = self._key_generator.next() segment_num = 4 timestamp = create_timestamp() reply = self._purge(collection_id, key, timestamp, segment_num) self.assertEqual(reply["result"], "success", reply)
def test_simple_segment(self): """test writing an reading a simple segment of one sequence""" collection_id = 1001 key = "aaa/bbb/ccc" timestamp = create_timestamp() segment_num = 42 sequence_num = 0 data_size = 1024 data = random_string(data_size) data_adler32 = zlib.adler32(data) data_md5 = hashlib.md5(data) file_tombstone = False writer = Writer(self._database_connection, _repository_path) # clean out any segments that are laying around for this (test) keu reader = Reader(self._database_connection, _repository_path) writer.start_new_segment(collection_id, key, repr(timestamp), segment_num) writer.store_sequence( collection_id, key, repr(timestamp), segment_num, sequence_num, data ) writer.finish_new_segment( collection_id, key, repr(timestamp), segment_num, data_size, data_adler32, data_md5.digest(), file_tombstone, handoff_node_id=None, ) writer.close() file_info = most_recent_timestamp_for_key( self._database_connection, collection_id, key ) self.assertEqual(file_info.file_size, data_size) self.assertEqual(file_info.file_adler32, data_adler32) self.assertEqual(str(file_info.file_hash), data_md5.digest()) self.assertEqual(file_info.file_tombstone, file_tombstone) reader = Reader(self._database_connection, _repository_path) sequence_generator = reader.generate_all_sequence_rows_for_segment( collection_id, key, file_info.timestamp, file_info.segment_num ) # first yield should be a count sequence_count = sequence_generator.next() self.assertEqual(sequence_count, 1) sequence_data = sequence_generator.next() self.assertEqual(len(sequence_data), len(data)) self.assertEqual(sequence_data, data)
def xxxtest_archive_key_entire_with_meta(self): """ test archiving a key in a single message, including meta data """ file_size = 10 * 64 * 1024 content_item = random_string(file_size) user_request_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 meta_key = "".join([nimbus_meta_prefix, "test_key"]) meta_value = "pork" file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type" : "archive-key-entire", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : file_size, "zfec-padding-size" : 4, "segment-adler32" : file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "source-node-name" : _local_node_name, "handoff-node-name" : None, meta_key : meta_value } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item ) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def run(self, halt_event): if halt_event.is_set(): return # we want to dump everything for the previous hour current_time = create_timestamp() current_hour = floor_hour(current_time) prev_hour = current_hour - datetime.timedelta(hours=1) self._flush_to_database(prev_hour) return [(self.run, self.next_run(), )]
def xxxtest_destroy_nonexistent_key(self): """test destroying a key that does not exist, with no complications""" unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() collection_id = 1001 key = self._key_generator.next() segment_num = 4 timestamp = create_timestamp() reply = self._destroy(collection_id, key, unified_id, timestamp, segment_num) self.assertEqual(reply["result"], "success", reply["error-message"])
def xxxtest_destroy_tombstone(self): """test destroying a key that has already been destroyed""" file_size = 10 * 64 * 1024 content_item = random_string(file_size) message_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() archive_timestamp = create_timestamp() destroy_1_timestamp = archive_timestamp + timedelta(seconds=1) destroy_2_timestamp = destroy_1_timestamp + timedelta(seconds=1) segment_num = 2 file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) message = { "message-type" : "archive-key-entire", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(archive_timestamp), "segment-num" : segment_num, "segment-size" : file_size, "segment-adler32" : file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") reply = self._destroy( collection_id, key, destroy_1_timestamp, segment_num ) self.assertEqual(reply["result"], "success", reply["error-message"]) reply = self._destroy( collection_id, key, destroy_2_timestamp, segment_num ) self.assertEqual(reply["result"], "success", reply["error-message"])
def xxxtest_archive_key_entire_with_meta(self): """ test archiving a key in a single message, including meta data """ file_size = 10 * 64 * 1024 content_item = random_string(file_size) user_request_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 meta_key = "".join([nimbus_meta_prefix, "test_key"]) meta_value = "pork" file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type": "archive-key-entire", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": file_size, "zfec-padding-size": 4, "segment-adler32": file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size": file_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "source-node-name": _local_node_name, "handoff-node-name": None, meta_key: meta_value } reply = send_request_and_get_reply(_local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def retrieve(self, response, timeout): try: return self._retrieve(response, timeout) except Exception, instance: self._log.error("request {0} _retrieve exception".format(self.user_request_id)) self._log.exception("request {0}".format(self.user_request_id)) queue_entry = redis_queue_entry_tuple( timestamp=create_timestamp(), collection_id=self._collection_id, value=1 ) self._redis_queue.put(("retrieve_error", queue_entry)) response.status_int = httplib.SERVICE_UNAVAILABLE response.retry_after = _retrieve_retry_interval raise RetrieveFailedError(instance)
def xxxtest_destroy_tombstone(self): """test destroying a key that has already been destroyed""" file_size = 10 * 64 * 1024 content_item = random_string(file_size) message_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() archive_timestamp = create_timestamp() destroy_1_timestamp = archive_timestamp + timedelta(seconds=1) destroy_2_timestamp = destroy_1_timestamp + timedelta(seconds=1) segment_num = 2 file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) message = { "message-type": "archive-key-entire", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "timestamp-repr": repr(archive_timestamp), "segment-num": segment_num, "segment-size": file_size, "segment-adler32": file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size": file_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "handoff-node-name": None, } reply = send_request_and_get_reply(_local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") reply = self._destroy(collection_id, key, destroy_1_timestamp, segment_num) self.assertEqual(reply["result"], "success", reply["error-message"]) reply = self._destroy(collection_id, key, destroy_2_timestamp, segment_num) self.assertEqual(reply["result"], "success", reply["error-message"])
def next_run(cls): log = logging.getLogger("StateCleaner") current_time = create_timestamp() next_time = datetime.datetime( year = current_time.year, month = current_time.month, day = current_time.day, hour = current_time.hour, minute = 5, second = 0, microsecond = 0 ) if current_time.minute >= 5: next_time += datetime.timedelta(hours=1) log.info("next dump time = %s", (next_time, )) return time.mktime(next_time.timetuple())
def retrieve(self, response, timeout): try: return self._retrieve(response, timeout) except Exception, instance: self._log.error("request {0} _retrieve exception".format( self.user_request_id)) self._log.exception("request {0}".format(self.user_request_id)) queue_entry = \ redis_queue_entry_tuple(timestamp=create_timestamp(), collection_id=self._collection_id, value=1) self._redis_queue.put(( "retrieve_error", queue_entry, )) response.status_int = httplib.SERVICE_UNAVAILABLE response.retry_after = _retrieve_retry_interval raise RetrieveFailedError(instance)
def __init__(self, connection, space_id, repository_path): self._space_id = space_id self._value_file_id = _insert_value_file_default_row( connection, space_id) self._log = logging.getLogger("VF%08d" % (self._value_file_id, )) self._connection = connection self._value_file_path = compute_value_file_path( repository_path, space_id, self._value_file_id) self._log.info("opening %s" % (self._value_file_path, )) self._value_file_fd = _open_value_file(self._value_file_path) self._creation_time = create_timestamp() self._size = 0 self._md5 = hashlib.md5() self._segment_sequence_count = 0 self._min_segment_id = None self._max_segment_id = None self._collection_ids = set() self._synced = True # treat as synced until we write
def __init__(self, connection, space_id, repository_path): self._space_id = space_id self._value_file_id = _insert_value_file_default_row(connection, space_id) self._log = logging.getLogger("VF%08d" % (self._value_file_id, )) self._connection = connection self._value_file_path = compute_value_file_path( repository_path, space_id, self._value_file_id ) self._log.info("opening %s" % (self._value_file_path, )) self._value_file_fd = _open_value_file(self._value_file_path) self._creation_time = create_timestamp() self._size = 0 self._md5 = hashlib.md5() self._segment_sequence_count = 0 self._min_segment_id = None self._max_segment_id = None self._collection_ids = set() self._synced = True # treat as synced until we write
def test_archive_key_entire(self): """test archiving all data for a key in a single message""" file_size = 10 * 64 * 1024 content_item = random_string(file_size) message_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) message = { "message-type" : "archive-key-entire", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : file_size, "segment-adler32" : file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success")
def _detail_generator( total_bytes_added, total_bytes_removed, total_bytes_retrieved ): current_time = create_timestamp() for i in xrange(1000): message = { "message-type" : "space-accounting-detail", "collection-id" : _collection_id, "timestamp-repr": repr(current_time + timedelta(seconds=i)), "event" : "bytes_added", "value" : total_bytes_added / 1000, } # FIXME: losing messages if we pump them all in w/o delay time.sleep(.01) yield message, None for i in xrange(50): message = { "message-type" : "space-accounting-detail", "collection-id" : _collection_id, "timestamp-repr": repr(current_time + timedelta(seconds=i)), "event" : "bytes_removed", "value" : total_bytes_removed / 50, } yield message, None for i in xrange(25): message = { "message-type" : "space-accounting-detail", "collection-id" : _collection_id, "timestamp-repr": repr(current_time + timedelta(seconds=i)), "event" : "bytes_retrieved", "value" : total_bytes_retrieved / 25, } yield message, None
def _detail_generator(total_bytes_added, total_bytes_removed, total_bytes_retrieved): current_time = create_timestamp() for i in xrange(1000): message = { "message-type": "space-accounting-detail", "collection-id": _collection_id, "timestamp-repr": repr(current_time + timedelta(seconds=i)), "event": "bytes_added", "value": total_bytes_added / 1000, } # FIXME: losing messages if we pump them all in w/o delay time.sleep(.01) yield message, None for i in xrange(50): message = { "message-type": "space-accounting-detail", "collection-id": _collection_id, "timestamp-repr": repr(current_time + timedelta(seconds=i)), "event": "bytes_removed", "value": total_bytes_removed / 50, } yield message, None for i in xrange(25): message = { "message-type": "space-accounting-detail", "collection-id": _collection_id, "timestamp-repr": repr(current_time + timedelta(seconds=i)), "event": "bytes_retrieved", "value": total_bytes_retrieved / 25, } yield message, None
variable_value = urllib.unquote_plus(variable_value) variable_value = variable_value.decode("utf-8") kwargs[variable_name] = variable_value # translate version id to the form we use internally if "version_id_marker" in kwargs: kwargs["version_id_marker"] = self._id_translator.internal_id(kwargs["version_id_marker"]) self._log.info( "request {0}: " "_list_versions: collection = ({1}) {2} {3} {4}".format( user_request_id, collection_row["id"], collection_row["name"], collection_row["versioning"], kwargs ) ) queue_entry = redis_queue_entry_tuple(timestamp=create_timestamp(), collection_id=collection_row["id"], value=1) self._redis_queue.put(("listmatch_request", queue_entry)) try: result_dict = list_versions( self._interaction_pool, collection_row["id"], collection_row["versioning"], **kwargs ) # segment_visibility raises ValueError if it is unhappy except ValueError, instance: self._log.error("request {0}: {1}".format(user_request_id, instance)) raise exc.HTTPBadRequest(instance) except Exception: self._log.exception("request {0}".format(user_request_id)) queue_entry = redis_queue_entry_tuple( timestamp=create_timestamp(), collection_id=collection_row["id"], value=1 )
break if prev_data is None: if first_block: assert len(data) > offset_into_first_block, (len(data), offset_into_first_block) prev_data = data[offset_into_first_block:] else: prev_data = data continue self._log.debug("request {0} yielding {1} bytes".format(self.user_request_id, len(prev_data))) yield prev_data retrieve_bytes += len(prev_data) prev_data = data first_block = False urllib_response.close() self._log.debug("request {0} internal request complete".format(self.user_request_id)) # end - for entry in self._generate_key_rows(self._key_rows): if response.status_int in [httplib.OK, httplib.PARTIAL_CONTENT]: redis_entries = [("retrieve_success", 1), ("success_bytes_out", retrieve_bytes)] else: redis_entries = [("retrieve_error", 1), ("error_bytes_out", retrieve_bytes)] timestamp = create_timestamp() for key, value in redis_entries: queue_entry = redis_queue_entry_tuple(timestamp=timestamp, collection_id=self._collection_id, value=value) self._redis_queue.put((key, queue_entry))
def _handle_consistency_check_reply(state, message, _data): log = logging.getLogger("_handle_consistency_check_reply") timestamp = parse_timestamp_repr(message["timestamp-repr"]) state_key = (message["collection-id"], timestamp, ) try: request_state = state["active-requests"][state_key] except KeyError: log.warn("Unknown state_key %s from %s" % ( state_key, message["node-name"] )) return if message["node-name"] in request_state.replies: error_message = "duplicate reply from %s %s" % ( message["node-name"], state_key, ) log.error(error_message) return if message["result"] != "success": log.error("%s (%s) %s from %s" % ( state_key, message["result"], message["error-message"], message["node-name"], )) reply_value = _error_reply else: reply_value = (message["count"], message["encoded-md5-digest"], ) request_state.replies[message["node-name"]] = reply_value # not done yet, wait for more replies if len(request_state.replies) < len(state["anti-entropy-clients"]): return # at this point we should have a reply from every node, so # we don't want to preserve state anymore del state["active-requests"][state_key] database = AuditResultDatabase(state["central-database-connection"]) timestamp = create_timestamp() # push the results into a dict to see how many unique entries there are md5_digest_dict = dict() md5_digest_dict[_error_reply] = list() for node_name in request_state.replies.keys(): node_reply = request_state.replies[node_name] if node_reply == _error_reply: md5_digest_dict[_error_reply].append(node_name) continue _count, encoded_md5_digest = node_reply if not encoded_md5_digest in md5_digest_dict: md5_digest_dict[encoded_md5_digest] = list() md5_digest_dict[encoded_md5_digest].append(node_name) # if this audit was started by an anti-entropy-audit-request message, # we want to send a reply if request_state.client_tag is not None: reply = { "message-type" : "anti-entropy-audit-reply", "client-tag" : request_state.client_tag, "collection-id" : message["collection-id"], "result" : None, "error-message" : None, } else: reply = None error_reply_list = md5_digest_dict.pop(_error_reply) if reply is not None: reply["error-reply-nodes"] = error_reply_list if len(md5_digest_dict) > 1: log.error("found %s different hashes for (%s)" % ( len(md5_digest_dict), message["collection-id"], )) for index, value in enumerate(md5_digest_dict.values()): log.info(str(value)) if reply is not None: reply["mistmatch-nodes-%s" % (index+1, )] = value # ok = no errors and all nodes have the same hash for every collection if len(error_reply_list) == 0 and len(md5_digest_dict) == 1: description = "collection %s compares ok" % ( message["collection-id"], ) log.info(description) state["event-push-client"].info( "audit-ok", description, collection_id=message["collection-id"] ) database.successful_audit(request_state.row_id, timestamp) if reply is not None: reply["result"] = "success" state["resilient-server"].send_reply(reply) return # we have error(s), but the non-errors compare ok if len(error_reply_list) > 0 and len(md5_digest_dict) == 1: # if we come from anti-entropy-audit-request, don't retry if reply is not None: database.audit_error(request_state.row_id, timestamp) database.close() description = "There were error replies from %s nodes" % ( len(error_reply_list) , ) log.error(description) state["event-push-client"].error( "consistency-check-errors-replies", description, collection_id=message["collection-id"], error_reply_nodes=error_reply_list ) reply["result"] = "error" reply["error-message"] = description state["resilient-server"].send_reply(reply) return if request_state.retry_count >= max_retry_count: description = "collection %s %s errors, too many retries" % ( message["collection-id"], len(error_reply_list) ) log.error(description) state["event-push-client"].error( "audit-errors", description, collection_id=message["collection-id"] ) database.audit_error(request_state.row_id, timestamp) # TODO: needto do something here else: description = "%s Error replies from %s nodes, will retry" % ( message["collection-id"], len(error_reply_list) ) log.warn(description) state["event-push-client"].warn( "audit-retry", description, collection_id=message["collection-id"] ) state["retry-list"].append( retry_entry_tuple( retry_time=retry_time(), collection_id=message["collection-id"], row_id=request_state.row_id, retry_count=request_state.retry_count, ) ) database.wait_for_retry(request_state.row_id) database.close() return # if we make it here, we have some form of mismatch, possibly mixed with # errors description = "%s error replies from %s nodes; hash mismatch(es) = %r" % ( message["collection-id"], len(error_reply_list), md5_digest_dict.values() ) log.error(description) state["event-push-client"].warn( "audit-retry", description, collection_id=message["collection-id"] ) # if we come from anti-entropy-audit-request, don't retry if reply is not None: database.audit_error(request_state.row_id, timestamp) database.close() reply["result"] = "audit-error" reply["error-message"] = description state["resilient-server"].send_reply(reply) return if request_state.retry_count >= max_retry_count: log.error("%s too many retries" % (message["collection-id"], )) database.audit_error(request_state.row_id, timestamp) # TODO: need to do something here else: state["retry-list"].append( retry_entry_tuple( retry_time=retry_time(), collection_id=message["collection-id"], row_id=request_state.row_id, retry_count=request_state.retry_count, ) ) database.wait_for_retry(request_state.row_id) database.close()
def test_retrieve_large_content(self): """test retrieving content that fits in a multiple messages""" slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-start", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "conjoined-unified-id": None, "conjoined-part": 0, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "segment-adler32": segment_adler32, "segment-md5-digest": b64encode(segment_md5.digest()), "sequence-num": sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["result"], "success") for _ in range(slice_count - 2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-next", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "conjoined-unified-id": None, "conjoined-part": 0, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "segment-adler32": segment_adler32, "segment-md5-digest": b64encode(segment_md5.digest()), "sequence-num": sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["result"], "success") sequence_num += 1 slice_start += slice_size slice_end += slice_size self.assertEqual(slice_end, total_size) segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-final", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "conjoined-unified-id": None, "conjoined-part": 0, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "segment-adler32": segment_adler32, "segment-md5-digest": b64encode(segment_md5.digest()), "sequence-num": sequence_num, "file-size": total_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key) self.assertEqual(len(segment_rows), 1) retrieved_data_list = list() message_id = uuid.uuid1().hex message = { "message-type": "retrieve-key-start", "message-id": message_id, "collection-id": collection_id, "key": key, "timestamp-repr": repr(timestamp), "conjoined-unified-id": None, "conjoined-part": 0, "segment-num": segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], False) print "sequence-num =", reply["sequence-num"] retrieved_data_list.append(data) while True: message_id = uuid.uuid1().hex message = { "message-type": "retrieve-key-next", "message-id": message_id, "collection-id": collection_id, "key": key, "timestamp-repr": repr(timestamp), "conjoined-unified-id": None, "conjoined-part": 0, "segment-num": segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") retrieved_data_list.append(data) print "sequence-num =", reply["sequence-num"] if reply["completed"]: break retrieved_data = "".join(retrieved_data_list) self.assertEqual(len(retrieved_data), len(test_data)) self.assertEqual(retrieved_data, test_data)
def xxxtest_large_archive(self): """ test archiving a file that needs more than one message. For example, a 10 Mb file: each node would get 10 120kb zefec shares. """ slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) user_request_id = uuid.uuid1().hex collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type" : "archive-key-start", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "zfec-padding-size" : 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32" : segment_adler32, "sequence-num" : sequence_num, "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) for _ in range(slice_count-2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-next", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "zfec-padding-size" : 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32" : segment_adler32, "sequence-num" : sequence_num, "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message = { "message-type" : "archive-key-final", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "zfec-padding-size" : 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32" : segment_adler32, "sequence-num" : sequence_num, "file-size" : total_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def audit_segments(halt_event, work_dir): log = logging.getLogger("audit_segments") if not os.path.exists(anti_entropy_dir): log.info("creating {0}".format(anti_entropy_dir)) os.mkdir(anti_entropy_dir) meta_repair_file_path = compute_meta_repair_file_path() meta_repair_file = \ gzip.GzipFile(filename=meta_repair_file_path, mode="wb") data_repair_file_path = compute_data_repair_file_path() data_repair_file = \ gzip.GzipFile(filename=data_repair_file_path, mode="wb") counts = { "total" : 0, anti_entropy_missing_replicas : 0, anti_entropy_incomplete_finalization : 0, anti_entropy_damaged_records : 0, anti_entropy_missing_tombstones : 0, anti_entropy_database_inconsistancy : 0, } current_time = create_timestamp() min_segment_age = parse_timedelta_str(_min_segment_age) newest_allowable_timestamp = current_time - min_segment_age log.info("newest allowable timestamp = {0}".format( newest_allowable_timestamp.isoformat())) for row_key, segment_status, segment_data in generate_work(work_dir): if halt_event.is_set(): log.info("halt_event is set: exiting") return assert segment_status == anti_entropy_pre_audit counts["total"] += 1 # missing replicas needs to run first, because the other tests # assume there are no missing replicas if _missing_replicas(segment_data, newest_allowable_timestamp): log.debug("missing_replicas {0}".format(row_key)) counts[anti_entropy_missing_replicas] += 1 store_sized_pickle( (row_key, anti_entropy_missing_replicas, segment_data, ), data_repair_file) continue # _missing_tombstones needs to run ahead of _incomplete_finalization if _missing_tombstones(segment_data, newest_allowable_timestamp): log.debug("missing_tombstones {0}".format(row_key)) counts[anti_entropy_missing_tombstones] += 1 store_sized_pickle( (row_key, anti_entropy_missing_tombstones, segment_data, ), meta_repair_file) continue if _incomplete_finalization(segment_data, newest_allowable_timestamp): log.debug("incomplete_finalization {0}".format(row_key)) counts[anti_entropy_incomplete_finalization] += 1 store_sized_pickle( (row_key, anti_entropy_incomplete_finalization, segment_data,), data_repair_file) continue if _damaged_records(segment_data): log.debug("damaged_records {0}".format(row_key)) counts[anti_entropy_damaged_records] += 1 store_sized_pickle( (row_key, anti_entropy_damaged_records, segment_data,), data_repair_file) continue if _database_inconsistancy(row_key, segment_data): log.debug("database_inconsistancy {0}".format(row_key)) counts[anti_entropy_database_inconsistancy] += 1 store_sized_pickle( (row_key, anti_entropy_database_inconsistency, segment_data,), data_repair_file) continue meta_repair_file.close() data_repair_file.close() keys = ["total", anti_entropy_missing_replicas, anti_entropy_incomplete_finalization, anti_entropy_damaged_records, anti_entropy_missing_tombstones, anti_entropy_database_inconsistancy] for key in keys: log.info("{0} {1:,}".format(key, counts[key]))
def __init__(self): self._log = logging.getLogger("WebInternalReader") memcached_client = memcache.Client(_memcached_nodes) self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_internal_reader_pipeline_address, self._deliverator ) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_internal_reader_pipeline_address, self._deliverator, connect_messages=[] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader( node_name, resilient_client ) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address ) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception ) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client ) self._event_push_client = EventPushClient( self._zeromq_context, "web-internal-reader" ) # message sent to data readers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() self._event_push_client.info("web-reader-start", "web reader (re)start", timestamp_repr=repr(timestamp), source_node_name=_local_node_name) self._watcher = Watcher( _stats, self._data_reader_clients, self._event_push_client ) self.application = Application( memcached_client, self._central_connection, self._node_local_connection, self._cluster_row, self._data_readers, self._accounting_client, self._event_push_client, _stats ) self.wsgi_server = WSGIServer( (_web_internal_reader_host, _web_internal_reader_port), application=self.application, backlog=_wsgi_backlog )
def test_retrieve_small_content(self): """test retrieving content that fits in a single message""" file_size = 10 * 64 * 1024 file_content = random_string(file_size) collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 file_adler32 = zlib.adler32(file_content) file_md5 = hashlib.md5(file_content) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-entire", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : file_size, "segment-adler32" : file_adler32, "segment-md5-digest" : b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=file_content ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key ) self.assertEqual(len(segment_rows), 1) message_id = uuid.uuid1().hex message = { "message-type" : "retrieve-key-start", "message-id" : message_id, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "conjoined-unified-id" : None, "conjoined-part" : 0, "segment-num" : segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], True) self.assertEqual(len(data), len(file_content)) self.assertEqual(data, file_content)
def xxxtest_large_archive(self): """ test archiving a file that needs more than one message. For example, a 10 Mb file: each node would get 10 120kb zefec shares. """ slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) user_request_id = uuid.uuid1().hex collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type": "archive-key-start", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "zfec-padding-size": 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32": segment_adler32, "sequence-num": sequence_num, "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) for _ in range(slice_count - 2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-next", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "zfec-padding-size": 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32": segment_adler32, "sequence-num": sequence_num, "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message = { "message-type": "archive-key-final", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "zfec-padding-size": 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32": segment_adler32, "sequence-num": sequence_num, "file-size": total_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def test_retrieve_large_content(self): """test retrieving content that fits in a multiple messages""" slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-start", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5.digest()), "sequence-num" : sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["result"], "success") for _ in range(slice_count-2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-next", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : len( test_data[slice_start:slice_end] ), "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5.digest()), "sequence-num" : sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["result"], "success") sequence_num += 1 slice_start += slice_size slice_end += slice_size self.assertEqual(slice_end, total_size) segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-final", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5.digest()), "sequence-num" : sequence_num, "file-size" : total_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key ) self.assertEqual(len(segment_rows), 1) retrieved_data_list = list() message_id = uuid.uuid1().hex message = { "message-type" : "retrieve-key-start", "message-id" : message_id, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "conjoined-unified-id" : None, "conjoined-part" : 0, "segment-num" : segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], False) print "sequence-num =", reply["sequence-num"] retrieved_data_list.append(data) while True: message_id = uuid.uuid1().hex message = { "message-type" : "retrieve-key-next", "message-id" : message_id, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "conjoined-unified-id" : None, "conjoined-part" : 0, "segment-num" : segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") retrieved_data_list.append(data) print "sequence-num =", reply["sequence-num"] if reply["completed"]: break retrieved_data = "".join(retrieved_data_list) self.assertEqual(len(retrieved_data), len(test_data)) self.assertEqual(retrieved_data, test_data)
def __init__(self, halt_event): self._log = logging.getLogger("WebWriter") memcached_client = memcache.Client(_memcached_nodes) self._interaction_pool = gdbpool.interaction_pool.DBInteractionPool( get_central_database_dsn(), pool_name=_central_pool_name, pool_size=_database_pool_size, do_log=True) authenticator = InteractionPoolAuthenticator(memcached_client, self._interaction_pool) # Ticket #25: must run database operation in a greenlet greenlet = gevent.Greenlet.spawn(_get_cluster_row_and_node_row, self._interaction_pool) greenlet.join() self._cluster_row, node_row = greenlet.get() self._unified_id_factory = UnifiedIDFactory(node_row.id) self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer(self._zeromq_context, _web_writer_pipeliner_address, self._deliverator) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients = list() for node_name, address in zip(_node_names, _data_writer_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_writer_pipeliner_address, self._deliverator, connect_messages=[]) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients.append(resilient_client) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client) self._event_push_client = EventPushClient(self._zeromq_context, "web-server") # message sent to data writers telling them the server # is (re)starting, thereby invalidating any archives # that are in progress for this node unified_id = self._unified_id_factory.next() timestamp = create_timestamp() self._event_push_client.info("web-writer-start", "web writer (re)start", unified_id=unified_id, timestamp_repr=repr(timestamp), source_node_name=_local_node_name) id_translator_keys_path = os.environ.get( "NIMBUS_IO_ID_TRANSLATION_KEYS", os.path.join(_repository_path, "id_translator_keys.pkl")) with open(id_translator_keys_path, "r") as input_file: id_translator_keys = pickle.load(input_file) self._id_translator = InternalIDTranslator( id_translator_keys["key"], id_translator_keys["hmac_key"], id_translator_keys["iv_key"], id_translator_keys["hmac_size"]) redis_queue = gevent.queue.Queue() self._redis_sink = OperationalStatsRedisSink(halt_event, redis_queue, _local_node_name) self._redis_sink.link_exception(self._unhandled_greenlet_exception) self.application = Application(self._cluster_row, self._unified_id_factory, self._id_translator, self._data_writer_clients, authenticator, self._accounting_client, self._event_push_client, redis_queue) self.wsgi_server = WSGIServer((_web_writer_host, _web_writer_port), application=self.application, backlog=_wsgi_backlog)
def _retrieve(self, response, timeout): self._log.debug("request {0}: start _retrieve".format((self.user_request_id))) self._cache_key_rows_in_memcached(self._key_rows) self.total_file_size = sum([row["file_size"] for row in self._key_rows]) self._log.debug("total_file_size = {0}".format(self.total_file_size)) queue_entry = redis_queue_entry_tuple(timestamp=create_timestamp(), collection_id=self._collection_id, value=1) self._redis_queue.put(("retrieve_request", queue_entry)) retrieve_bytes = 0L self._log.debug("start key_rows loop") first_block = True for entry in self._generate_key_rows(self._key_rows): key_row, block_offset, block_count, offset_into_first_block, offset_into_last_block = entry self._log.debug( "request {0}: {1} {2}".format(self.user_request_id, key_row["unified_id"], key_row["conjoined_part"]) ) # if a cache port is defined, and this response isn't larger than # the configured maximum, send the request through the cache. target_port = _web_internal_reader_port if ( _web_internal_reader_cache_port is not None and key_row["file_size"] <= _web_internal_reader_max_cache_size ): target_port = _web_internal_reader_cache_port uri = "http://{0}:{1}/data/{2}/{3}".format( _web_internal_reader_host, target_port, key_row["unified_id"], key_row["conjoined_part"] ) self._log.info("request {0} internally requesting {1}".format(self.user_request_id, uri)) headers = {"x-nimbus-io-user-request-id": self.user_request_id} if block_offset > 0 and block_count is None: headers["range"] = "bytes={0}-".format(block_offset * block_size) headers["x-nimbus-io-expected-content-length"] = str(key_row["file_size"] - (block_offset * block_size)) expected_status = httplib.PARTIAL_CONTENT elif block_count is not None: headers["range"] = "bytes={0}-{1}".format( block_offset * block_size, (block_offset + block_count) * block_size - 1 ) headers["x-nimbus-io-expected-content-length"] = str(block_count * block_size) expected_status = httplib.PARTIAL_CONTENT else: headers["x-nimbus-io-expected-content-length"] = (str(key_row["file_size"]),) expected_status = httplib.OK request = urllib2.Request(uri, headers=headers) self._log.debug( "request {0} start internal; expected={1}; headers={2}".format( self.user_request_id, repr(expected_status), headers ) ) try: urllib_response = urllib2.urlopen(request, timeout=timeout) except urllib2.HTTPError, instance: if instance.code == httplib.NOT_FOUND: self._log.error("request {0}: got 404".format(self.user_request_id)) response.status_int = httplib.NOT_FOUND break if instance.code == httplib.PARTIAL_CONTENT and expected_status == httplib.PARTIAL_CONTENT: urllib_response = instance else: message = "urllib2.HTTPError '{0}' '{1}'".format(instance.code, instance) self._log.error("request {0}: exception {1}".format(self.user_request_id, message)) self._log.exception(message) response.status_int = httplib.SERVICE_UNAVAILABLE response.retry_after = _retrieve_retry_interval break except (httplib.HTTPException, socket.error) as instance: message = "{0}, '{1}'".format(instance.__class__.__name__, instance.message) self._log.error("request {0}: exception {1}".format(self.user_request_id, message)) self._log.exception(message) response.status_int = httplib.SERVICE_UNAVAILABLE response.retry_after = _retrieve_retry_interval break
# translate version id to the form we use internally if "version_id_marker" in kwargs: kwargs["version_id_marker"] = self._id_translator.internal_id( kwargs["version_id_marker"]) self._log.info("request {0}: " \ "_list_versions: collection = ({1}) {2} {3} {4}".format( user_request_id, collection_row["id"], collection_row["name"], collection_row["versioning"], kwargs)) queue_entry = \ redis_queue_entry_tuple(timestamp=create_timestamp(), collection_id=collection_row["id"], value=1) self._redis_queue.put(( "listmatch_request", queue_entry, )) try: result_dict = list_versions(self._interaction_pool, collection_row["id"], collection_row["versioning"], **kwargs) # segment_visibility raises ValueError if it is unhappy except ValueError, instance: self._log.error("request {0}: {1}".format(user_request_id, instance))
first_block = False urllib_response.close() self._log.debug("request {0} internal request complete".format( self.user_request_id)) # end - for entry in self._generate_key_rows(self._key_rows): if response.status_int in [ httplib.OK, httplib.PARTIAL_CONTENT, ]: redis_entries = [("retrieve_success", 1), ("success_bytes_out", retrieve_bytes)] else: redis_entries = [("retrieve_error", 1), ("error_bytes_out", retrieve_bytes)] timestamp = create_timestamp() for key, value in redis_entries: queue_entry = \ redis_queue_entry_tuple(timestamp=timestamp, collection_id=self._collection_id, value=value) self._redis_queue.put(( key, queue_entry, ))
for data in data_list: yield data sent += len(data) except RetrieveFailedError, instance: self._log.error('retrieve failed: {0} {1}'.format( description, instance)) self._stats["retrieves"] -= 1 response.status_int = 503 return end_time = time.time() self._stats["retrieves"] -= 1 self.accounting_client.retrieved(collection_id, create_timestamp(), sent) self._log.info( "request {0} successful retrieve".format(user_request_id)) response_headers = dict() if "range" in req.headers: status_int = httplib.PARTIAL_CONTENT response_headers["Content-Range"] = \ _content_range_header(lower_bound, upper_bound, total_file_size) response_headers["Content-Length"] = slice_size else: status_int = httplib.OK
def audit_segments(halt_event, work_dir): log = logging.getLogger("audit_segments") if not os.path.exists(anti_entropy_dir): log.info("creating {0}".format(anti_entropy_dir)) os.mkdir(anti_entropy_dir) meta_repair_file_path = compute_meta_repair_file_path() meta_repair_file = \ gzip.GzipFile(filename=meta_repair_file_path, mode="wb") data_repair_file_path = compute_data_repair_file_path() data_repair_file = \ gzip.GzipFile(filename=data_repair_file_path, mode="wb") counts = { "total": 0, anti_entropy_missing_replicas: 0, anti_entropy_incomplete_finalization: 0, anti_entropy_damaged_records: 0, anti_entropy_missing_tombstones: 0, anti_entropy_database_inconsistancy: 0, } current_time = create_timestamp() min_segment_age = parse_timedelta_str(_min_segment_age) newest_allowable_timestamp = current_time - min_segment_age log.info("newest allowable timestamp = {0}".format( newest_allowable_timestamp.isoformat())) for row_key, segment_status, segment_data in generate_work(work_dir): if halt_event.is_set(): log.info("halt_event is set: exiting") return assert segment_status == anti_entropy_pre_audit counts["total"] += 1 # missing replicas needs to run first, because the other tests # assume there are no missing replicas if _missing_replicas(segment_data, newest_allowable_timestamp): log.debug("missing_replicas {0}".format(row_key)) counts[anti_entropy_missing_replicas] += 1 store_sized_pickle(( row_key, anti_entropy_missing_replicas, segment_data, ), data_repair_file) continue # _missing_tombstones needs to run ahead of _incomplete_finalization if _missing_tombstones(segment_data, newest_allowable_timestamp): log.debug("missing_tombstones {0}".format(row_key)) counts[anti_entropy_missing_tombstones] += 1 store_sized_pickle(( row_key, anti_entropy_missing_tombstones, segment_data, ), meta_repair_file) continue if _incomplete_finalization(segment_data, newest_allowable_timestamp): log.debug("incomplete_finalization {0}".format(row_key)) counts[anti_entropy_incomplete_finalization] += 1 store_sized_pickle(( row_key, anti_entropy_incomplete_finalization, segment_data, ), data_repair_file) continue if _damaged_records(segment_data): log.debug("damaged_records {0}".format(row_key)) counts[anti_entropy_damaged_records] += 1 store_sized_pickle(( row_key, anti_entropy_damaged_records, segment_data, ), data_repair_file) continue if _database_inconsistancy(row_key, segment_data): log.debug("database_inconsistancy {0}".format(row_key)) counts[anti_entropy_database_inconsistancy] += 1 store_sized_pickle(( row_key, anti_entropy_database_inconsistency, segment_data, ), data_repair_file) continue meta_repair_file.close() data_repair_file.close() keys = [ "total", anti_entropy_missing_replicas, anti_entropy_incomplete_finalization, anti_entropy_damaged_records, anti_entropy_missing_tombstones, anti_entropy_database_inconsistancy ] for key in keys: log.info("{0} {1:,}".format(key, counts[key]))
def test_retrieve_small_content(self): """test retrieving content that fits in a single message""" file_size = 10 * 64 * 1024 file_content = random_string(file_size) collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 file_adler32 = zlib.adler32(file_content) file_md5 = hashlib.md5(file_content) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-entire", "message-id": message_id, "priority": archive_priority, "collection-id": collection_id, "key": key, "conjoined-unified-id": None, "conjoined-part": 0, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "segment-size": file_size, "segment-adler32": file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size": file_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "handoff-node-name": None, } reply = send_request_and_get_reply(_local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=file_content) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key) self.assertEqual(len(segment_rows), 1) message_id = uuid.uuid1().hex message = { "message-type": "retrieve-key-start", "message-id": message_id, "collection-id": collection_id, "key": key, "timestamp-repr": repr(timestamp), "conjoined-unified-id": None, "conjoined-part": 0, "segment-num": segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], True) self.assertEqual(len(data), len(file_content)) self.assertEqual(data, file_content)
def _retrieve(self, response, timeout): self._log.debug("request {0}: start _retrieve".format( (self.user_request_id))) self._cache_key_rows_in_memcached(self._key_rows) self.total_file_size = sum( [row["file_size"] for row in self._key_rows]) self._log.debug("total_file_size = {0}".format(self.total_file_size)) queue_entry = \ redis_queue_entry_tuple(timestamp=create_timestamp(), collection_id=self._collection_id, value=1) self._redis_queue.put(( "retrieve_request", queue_entry, )) retrieve_bytes = 0L self._log.debug("start key_rows loop") first_block = True for entry in self._generate_key_rows(self._key_rows): key_row, \ block_offset, \ block_count, \ offset_into_first_block, \ offset_into_last_block = entry self._log.debug("request {0}: {1} {2}".format( self.user_request_id, key_row["unified_id"], key_row["conjoined_part"])) # if a cache port is defined, and this response isn't larger than # the configured maximum, send the request through the cache. target_port = _web_internal_reader_port if (_web_internal_reader_cache_port is not None and key_row["file_size"] <= _web_internal_reader_max_cache_size): target_port = _web_internal_reader_cache_port uri = "http://{0}:{1}/data/{2}/{3}".format( _web_internal_reader_host, target_port, key_row["unified_id"], key_row["conjoined_part"]) self._log.info("request {0} internally requesting {1}".format( self.user_request_id, uri)) headers = {"x-nimbus-io-user-request-id": self.user_request_id} if block_offset > 0 and block_count is None: headers["range"] = \ "bytes={0}-".format(block_offset * block_size) headers["x-nimbus-io-expected-content-length"] = \ str(key_row["file_size"] - (block_offset * block_size)) expected_status = httplib.PARTIAL_CONTENT elif block_count is not None: headers["range"] = \ "bytes={0}-{1}".format( block_offset * block_size, (block_offset + block_count) * block_size - 1) headers["x-nimbus-io-expected-content-length"] = \ str(block_count * block_size) expected_status = httplib.PARTIAL_CONTENT else: headers["x-nimbus-io-expected-content-length"] = \ str(key_row["file_size"]), expected_status = httplib.OK request = urllib2.Request(uri, headers=headers) self._log.debug( "request {0} start internal; expected={1}; headers={2}".format( self.user_request_id, repr(expected_status), headers)) try: urllib_response = urllib2.urlopen(request, timeout=timeout) except urllib2.HTTPError, instance: if instance.code == httplib.NOT_FOUND: self._log.error("request {0}: got 404".format( self.user_request_id)) response.status_int = httplib.NOT_FOUND break if instance.code == httplib.PARTIAL_CONTENT and \ expected_status == httplib.PARTIAL_CONTENT: urllib_response = instance else: message = "urllib2.HTTPError '{0}' '{1}'".format( instance.code, instance) self._log.error("request {0}: exception {1}".format( self.user_request_id, message)) self._log.exception(message) response.status_int = httplib.SERVICE_UNAVAILABLE response.retry_after = _retrieve_retry_interval break except ( httplib.HTTPException, socket.error, ) as instance: message = "{0}, '{1}'".format(instance.__class__.__name__, instance.message) self._log.error("request {0}: exception {1}".format( self.user_request_id, message)) self._log.exception(message) response.status_int = httplib.SERVICE_UNAVAILABLE response.retry_after = _retrieve_retry_interval break
def __init__(self): self._log = logging.getLogger("WebInternalReader") memcached_client = memcache.Client(_memcached_nodes) self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_internal_reader_pipeline_address, self._deliverator) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_internal_reader_pipeline_address, self._deliverator, connect_messages=[]) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader(node_name, resilient_client) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client) self._event_push_client = EventPushClient(self._zeromq_context, "web-internal-reader") # message sent to data readers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() self._event_push_client.info("web-reader-start", "web reader (re)start", timestamp_repr=repr(timestamp), source_node_name=_local_node_name) self._watcher = Watcher(_stats, self._data_reader_clients, self._event_push_client) self.application = Application(memcached_client, self._central_connection, self._node_local_connection, self._cluster_row, self._data_readers, self._accounting_client, self._event_push_client, _stats) self.wsgi_server = WSGIServer( (_web_internal_reader_host, _web_internal_reader_port), application=self.application, backlog=_wsgi_backlog)
def __init__(self): self._log = logging.getLogger("WebServer") authenticator = SqlAuthenticator() self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._unified_id_factory = UnifiedIDFactory( self._central_connection, _get_shard_id(self._central_connection, self._cluster_row.id) ) self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_server_pipeline_address, self._deliverator ) self._pull_server.link_exception(self._unhandled_greenlet_exception) # message sent to data readers and writers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() start_message = { "message-type" : "web-server-start", "priority" : create_priority(), "unified-id" : self._unified_id_factory.next(), "timestamp-repr" : repr(timestamp), "source-node-name" : _local_node_name, } self._data_writer_clients = list() for node_name, address in zip(_node_names, _data_writer_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_server_pipeline_address, self._deliverator, connect_messages=[start_message, ] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients.append(resilient_client) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_server_pipeline_address, self._deliverator, connect_messages=[start_message, ] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader( node_name, resilient_client ) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address ) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception ) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client ) self._event_push_client = EventPushClient( self._zeromq_context, "web-server" ) self._watcher = Watcher( _stats, self._data_reader_clients, self._data_writer_clients, self._event_push_client ) id_translator_keys_path = os.path.join( _repository_path, "id_translator_keys.pkl" ) with open(id_translator_keys_path, "r") as input_file: id_translator_keys = pickle.load(input_file) self._id_translator = InternalIDTranslator( id_translator_keys["key"], id_translator_keys["hmac_key"], id_translator_keys["iv_key"], id_translator_keys["hmac_size"] ) self.application = Application( self._central_connection, self._node_local_connection, self._cluster_row, self._unified_id_factory, self._id_translator, self._data_writer_clients, self._data_readers, authenticator, self._accounting_client, self._event_push_client, _stats ) self.wsgi_server = WSGIServer( (_web_server_host, _web_server_port), application=self.application, backlog=_wsgi_backlog )
def __init__(self, halt_event): self._log = logging.getLogger("WebWriter") memcached_client = memcache.Client(_memcached_nodes) self._interaction_pool = gdbpool.interaction_pool.DBInteractionPool( get_central_database_dsn(), pool_name=_central_pool_name, pool_size=_database_pool_size, do_log=True) authenticator = InteractionPoolAuthenticator(memcached_client, self._interaction_pool) # Ticket #25: must run database operation in a greenlet greenlet = gevent.Greenlet.spawn(_get_cluster_row_and_node_row, self._interaction_pool) greenlet.join() self._cluster_row, node_row = greenlet.get() self._unified_id_factory = UnifiedIDFactory(node_row.id) self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_writer_pipeliner_address, self._deliverator ) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients = list() for node_name, address in zip(_node_names, _data_writer_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_writer_pipeliner_address, self._deliverator, connect_messages=[] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients.append(resilient_client) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address ) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception ) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client ) self._event_push_client = EventPushClient( self._zeromq_context, "web-server" ) # message sent to data writers telling them the server # is (re)starting, thereby invalidating any archives # that are in progress for this node unified_id = self._unified_id_factory.next() timestamp = create_timestamp() self._event_push_client.info("web-writer-start", "web writer (re)start", unified_id=unified_id, timestamp_repr=repr(timestamp), source_node_name=_local_node_name) id_translator_keys_path = os.environ.get( "NIMBUS_IO_ID_TRANSLATION_KEYS", os.path.join(_repository_path, "id_translator_keys.pkl")) with open(id_translator_keys_path, "r") as input_file: id_translator_keys = pickle.load(input_file) self._id_translator = InternalIDTranslator( id_translator_keys["key"], id_translator_keys["hmac_key"], id_translator_keys["iv_key"], id_translator_keys["hmac_size"] ) redis_queue = gevent.queue.Queue() self._redis_sink = OperationalStatsRedisSink(halt_event, redis_queue, _local_node_name) self._redis_sink.link_exception(self._unhandled_greenlet_exception) self.application = Application( self._cluster_row, self._unified_id_factory, self._id_translator, self._data_writer_clients, authenticator, self._accounting_client, self._event_push_client, redis_queue ) self.wsgi_server = WSGIServer((_web_writer_host, _web_writer_port), application=self.application, backlog=_wsgi_backlog )
def _value_file_status(connection, entry): log = logging.getLogger("_value_file_status") batch_key = make_batch_key(entry) value_file_path = compute_value_file_path(_repository_path, entry.space_id, entry.value_file_id) # Always do a stat on the value file. try: stat_result = os.stat(value_file_path) except OSError as instance: # If the value file is missing, consider all of the segment_sequences # to be missing, and handle it as such. if instance.errno == errno.ENOENT: log.error("value file missing {0} {1}".format(batch_key, value_file_path)) return _value_file_missing log.error("Error stat'ing value file {0} {1} {2}".format( str(instance), batch_key, value_file_path)) raise # If the value file is still open, consider all data in it undammaged. if entry.value_file_close_time is None: return _value_file_valid # If the value file exists, is closed, and has an md5 in the database, # has a size in the database, and the size in the stat matches the size # in the database, and has a close_time or a last_integrity_check_time # that is younger than (MAX_TIME_BETWEEN_VALUE_FILE_INTEGRITY_CHECK) # consider all records in the file undammaged. (This is the common case.) if entry.value_file_hash is None: log.info("Value file row has no md5 hash {0} {1}".format(batch_key, entry)) return _value_file_questionable if entry.value_file_size is None: log.info("Value file row has no size {0} {1}".format(batch_key, entry)) return _value_file_questionable if entry.value_file_size != stat_result.st_size: log.info("Value file row size {0} != stat size {1} {2}".format( entry.value_file_size, stat_result.st_size, batch_key)) return _value_file_questionable current_time = create_timestamp() value_file_row_age = current_time - entry.value_file_close_time if entry.value_file_last_integrity_check_time is not None: value_file_row_age = \ current_time - entry.value_file_last_integrity_check_time if value_file_row_age < _max_value_file_time: return _value_file_valid value_file_result = _value_file_valid # If the value matches all the previous criteria EXCEPT the # MAX_TIME_BETWEEN_VALUE_FILE_INTEGRITY_CHECK, then read the whole file, # and calculate the md5. If it matches, consider the whole file good as # above. Update last_integrity_check_time regardless. md5_sum = hashlib.md5() try: with open(value_file_path, "rb") as input_file: while True: data = input_file.read(_read_buffer_size) if len(data) == 0: break md5_sum.update(data) except (OSError, IOError) as instance: log.error("Error reading {0} {1}".format(value_file_path, instance)) value_file_result = _value_file_questionable if value_file_result == _value_file_valid and \ md5_sum.digest() != bytes(entry.value_file_hash): log.error( "md5 mismatch {0} {1} {2} {3}".format(md5_sum.digest(), bytes(entry.value_file_hash), batch_key, value_file_path)) value_file_result = _value_file_questionable # we're only supposed to do this after we've also read the file # and inserted any damage. not before. otherwise it's a race condition -- # we may crash before finishing checking the file, and then the file # doesn't get checked, but it's marked as checked. _update_value_file_last_integrity_check_time(connection, entry.value_file_id, create_timestamp()) return value_file_result