def handle_export_test_msg(msg): header = msg.get('header', {}) assert_message_attributes(header, ["catalogue"]) catalogue = header['catalogue'] start_timestamp = int( datetime.datetime.utcnow().replace(microsecond=0).timestamp()) process_id = header.get('process_id', f"{start_timestamp}.export_test.{catalogue}") msg["header"].update({ 'process_id': process_id, 'application': "GOBExportTest", 'entity': catalogue }) logger.configure(msg, "EXPORT_TEST") test(catalogue) summary = logger.get_summary() msg = {"header": msg.get("header"), "summary": summary, "contents": None} # To overcome distribute problems of locked files, # distribute is decoupled and starts at a certain # time triggered by in Jenkins. # # Send out a notification for a successfull export test # # if len(summary['errors']) == 0: # add_notification(msg, ExportTestNotification(header['catalogue'], # header.get('collection'), # header.get('product'))) return msg
def handle_export_msg(msg): header = msg.get('header', {}) assert_message_attributes(header, ["catalogue", "collection", "destination"]) catalogue = header['catalogue'] collection = header['collection'] product = header.get('product', None) destination = header['destination'] application = header.get('application', "GOBExport") msg["header"].update({ 'destination': destination, 'application': application, 'catalogue': catalogue, 'entity': collection, 'product': product, }) if destination == "Database": handle_export_dump_msg(msg) elif destination in ["Objectstore", "File"]: handle_export_file_msg(msg) else: logger.error( f"Unrecognized destination for export {catalogue} {collection}: {destination}" ) return { **msg, "header": msg.get("header"), "summary": logger.get_summary(), "contents": None }
def publish_result(msg, relates): result_msg = { 'header': msg['header'], 'summary': logger.get_summary(), 'contents': relates } return result_msg
def apply(msg): mode = msg['header'].get('mode', FULL_UPLOAD) logger.configure(msg, "UPDATE") logger.info("Apply events") storage = GOBStorageHandler() combinations = _get_source_catalog_entity_combinations(storage, msg) # Gather statistics of update process stats = UpdateStatistics() before = None after = None for result in combinations: model = f"{result.source} {result.catalogue} {result.entity}" logger.info(f"Apply events {model}") storage = GOBStorageHandler(result) # Track eventId before event application entity_max_eventid, last_eventid = get_event_ids(storage) before = min(entity_max_eventid or 0, before or sys.maxsize) if is_corrupted(entity_max_eventid, last_eventid): logger.error( f"Model {model} is inconsistent! data is more recent than events" ) elif entity_max_eventid == last_eventid: logger.info(f"Model {model} is up to date") apply_confirm_events(storage, stats, msg) else: logger.info(f"Start application of unhandled {model} events") with storage.get_session(): last_events = storage.get_last_events( ) # { tid: last_event, ... } apply_events(storage, last_events, entity_max_eventid, stats) apply_confirm_events(storage, stats, msg) # Track eventId after event application entity_max_eventid, last_eventid = get_event_ids(storage) after = max(entity_max_eventid or 0, after or 0) # Build result message results = stats.results() if mode == FULL_UPLOAD and _should_analyze(stats): logger.info("Running VACUUM ANALYZE on table") storage.analyze_table() stats.log() logger.info(f"Apply events {model} completed", {'data': results}) msg['summary'] = logger.get_summary() # Add a events notification telling what types of event have been applied if not msg['header'].get('suppress_notifications', False): add_notification(msg, EventNotification(stats.applied, [before, after])) return msg
def handle_brp_regression_test_msg(msg): logger.configure(msg, 'BRP Regression test') results = BrpRegression(logger).run() writer = ObjectstoreResultsWriter(results, 'regression_tests/results/brp') writer.write() logger.info( "Written test results to Objecstore at regression_tests/results/brp") return { 'header': { **msg.get('header', {}), 'timestamp': datetime.datetime.utcnow().isoformat(), }, 'summary': logger.get_summary(), }
def end_to_end_wait_handler(msg): logger.configure(msg, 'E2E Test') process_id = msg['header'].get('process_id') wait_for_process_id = msg['header'].get('wait_for_process_id') seconds = msg['header'].get('seconds') assert all([process_id, wait_for_process_id, seconds]), \ "Expecting attributes 'process_id', 'wait_for_process_id' and 'seconds' in header" E2ETest(process_id).wait(wait_for_process_id, seconds) return { 'header': { **msg.get('header', {}), }, 'summary': logger.get_summary(), }
def handle_import_object_msg(msg): logger.configure(msg, "IMPORT OBJECT") logger.info("Start import object") importer = MappinglessConverterAdapter(msg['header'].get('catalogue'), msg['header'].get('entity'), msg['header'].get('entity_id_attr')) entity = importer.convert(msg['contents']) return { 'header': { **msg['header'], 'mode': ImportMode.SINGLE_OBJECT.value, 'collection': msg['header'].get('entity'), }, 'summary': logger.get_summary(), 'contents': [entity] }
def process_relate(msg: dict): """ This function starts the actual relate process. The message is checked for completeness and the Relater builds the new or updated relations and returns the result the be compared as if it was the result of an import job. :param msg: a message from the broker containing the catalog and collections (optional) :return: the result message of the relate process """ logger.configure(msg, "RELATE SRC") _check_message(msg) header = msg.get('header') logger.info("Relate table started") full_update = header.get('mode', "update") == "full" if full_update: logger.info("Full relate requested") updater = Relater(header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY]) filename, confirms = updater.update(full_update) logger.info("Relate table completed") relation_name = get_relation_name(GOBModel(), header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY]) result_msg = { "header": { **msg["header"], "catalogue": "rel", "collection": relation_name, "entity": relation_name, "source": "GOB", "application": "GOB", "version": RELATE_VERSION, "timestamp": msg.get("timestamp", datetime.datetime.utcnow().isoformat()), }, "summary": logger.get_summary(), "contents_ref": filename, "confirms": confirms, } return result_msg
def end_to_end_check_handler(msg): logger.configure(msg, 'E2E Test') endpoint = msg['header'].get('endpoint') expect = msg['header'].get('expect') description = msg['header'].get('description') process_id = msg['header'].get('process_id') assert all([endpoint, expect, description, process_id]), \ "Expecting attributes 'endpoint', 'expect', 'description' and 'process_id' in header" E2ETest(process_id).check(endpoint, expect, description) return { 'header': { **msg.get('header', {}), }, 'summary': logger.get_summary(), }
def end_to_end_execute_workflow_handler(msg): logger.configure(msg, 'E2E Test') workflow_to_execute = msg['header'].get('execute') workflow_process_id = msg['header'].get('execute_process_id') process_id = msg['header'].get('process_id') assert all([workflow_to_execute, workflow_process_id, process_id]), \ "Expecting attributes 'execute', 'execute_process_id' and 'process_id' in header" E2ETest(process_id).execute_workflow(workflow_to_execute, workflow_process_id) return { 'header': { **msg.get('header', {}), }, 'summary': logger.get_summary(), }
def full_update(msg): """Store the events for the current dataset :param msg: the result of the application of the events :return: Result message """ logger.configure(msg, "UPDATE") logger.info( f"Update to GOB Database {GOBStorageHandler.user_name} started") # Interpret the message header message = ImportMessage(msg) metadata = message.metadata storage = GOBStorageHandler(metadata) model = f"{metadata.source} {metadata.catalogue} {metadata.entity}" logger.info(f"Store events {model}") # Get events from message events = msg["contents"] # Gather statistics of update process stats = UpdateStatistics() _process_events(storage, events, stats) # Build result message results = stats.results() stats.log() logger.info(f"Store events {model} completed", {'data': results}) results.update(logger.get_summary()) # Return the result message, with no log, no contents but pass-through any confirms message = { "header": msg["header"], "summary": results, "contents": None, "confirms": msg.get('confirms') } return message
def check_relation(msg): """ Check for any dangling relations :param msg: :return: """ header = msg.get('header', {}) catalog_name = header.get('original_catalogue') collection_name = header.get('original_collection') attribute_name = header.get('original_attribute') model = GOBModel() logger.configure(msg, "RELATE_CHECK") logger.info("Relate check started") collection = model.get_collection(catalog_name, collection_name) assert collection is not None, f"Invalid catalog/collection combination {catalog_name}/{collection_name}" reference = model._extract_references(collection['attributes']).get(attribute_name) try: is_very_many = reference['type'] == fully_qualified_type_name(VeryManyReference) check_function = check_very_many_relations if is_very_many else check_relations check_function(catalog_name, collection_name, attribute_name) except Exception as e: _log_exception(f"{attribute_name} check FAILED", e) logger.info("Relation conflicts check started") check_relation_conflicts(catalog_name, collection_name, attribute_name) logger.info("Relate check completed") return { "header": msg["header"], "summary": logger.get_summary(), "contents": None }
def data_consistency_test_handler(msg): """Request to run data consistency tests. :param msg: :return: """ catalog = msg['header'].get('catalogue') collection = msg['header'].get('collection') application = msg['header'].get('application') msg['header']['entity'] = msg['header'].get('entity', collection) logger.configure(msg, 'Data consistency test') assert all([catalog, collection ]), "Expecting header attributes 'catalogue' and 'collection'" id = f"{catalog} {collection} {application or ''}" # No return value. Results are captured by logger. logger.info(f"Data consistency test {id} started") try: DataConsistencyTest(catalog, collection, application).run() except GOBConfigException as e: logger.error(f"Dataset connection failed: {str(e)}") except (NotImplementedCatalogError, NotImplementedApplicationError, GOBException) as e: logger.error(f"Dataset test failed: {str(e)}") else: logger.info(f"Data consistency test {id} ended") return { 'header': { **msg.get('header', {}), 'timestamp': datetime.datetime.utcnow().isoformat(), }, 'summary': logger.get_summary(), }
def compare(msg): """Compare new data in msg (contents) with the current data :param msg: The new data, including header and summary :return: result message """ logger.configure(msg, "COMPARE") header = msg.get('header', {}) mode = header.get('mode', FULL_UPLOAD) logger.info( f"Compare (mode = {mode}) to GOB Database {GOBStorageHandler.user_name} started" ) # Parse the message header message = ImportMessage(msg) metadata = message.metadata # Get the model for the collection to be compared gob_model = GOBModel() entity_model = gob_model.get_collection(metadata.catalogue, metadata.entity) # Initialize a storage handler for the collection storage = GOBStorageHandler(metadata) model = f"{metadata.source} {metadata.catalogue} {metadata.entity}" logger.info(f"Compare {model}") stats = CompareStatistics() tmp_table_name = None with storage.get_session(): with ProgressTicker("Collect compare events", 10000) as progress: # Check any dependencies if not meets_dependencies(storage, msg): return { "header": msg["header"], "summary": logger.get_summary(), "contents": None } enricher = Enricher(storage, msg) populator = Populator(entity_model, msg) # If there are no records in the database all data are ADD events initial_add = not storage.has_any_entity() if initial_add: logger.info("Initial load of new collection detected") # Write ADD events directly, without using a temporary table contents_writer = ContentsWriter() contents_writer.open() # Pass a None confirms_writer because only ADD events are written collector = EventCollector(contents_writer, confirms_writer=None, version=entity_model['version']) collect = collector.collect_initial_add else: # Collect entities in a temporary table collector = EntityCollector(storage) collect = collector.collect tmp_table_name = collector.tmp_table_name for entity in msg["contents"]: progress.tick() stats.collect(entity) enricher.enrich(entity) populator.populate(entity) collect(entity) collector.close() if initial_add: filename = contents_writer.filename confirms = None contents_writer.close() else: # Compare entities from temporary table with storage.get_session(): diff = storage.compare_temporary_data(tmp_table_name, mode) filename, confirms = _process_compare_results( storage, entity_model, diff, stats) # Build result message results = stats.results() logger.info(f"Compare {model} completed", {'data': results}) results.update(logger.get_summary()) message = { "header": msg["header"], "summary": results, "contents_ref": filename, "confirms": confirms } return message