Esempio n. 1
0
def handle_export_test_msg(msg):
    header = msg.get('header', {})
    assert_message_attributes(header, ["catalogue"])

    catalogue = header['catalogue']

    start_timestamp = int(
        datetime.datetime.utcnow().replace(microsecond=0).timestamp())
    process_id = header.get('process_id',
                            f"{start_timestamp}.export_test.{catalogue}")

    msg["header"].update({
        'process_id': process_id,
        'application': "GOBExportTest",
        'entity': catalogue
    })

    logger.configure(msg, "EXPORT_TEST")

    test(catalogue)

    summary = logger.get_summary()
    msg = {"header": msg.get("header"), "summary": summary, "contents": None}

    # To overcome distribute problems of locked files,
    # distribute is decoupled and starts at a certain
    # time triggered by in Jenkins.
    #
    # Send out a notification for a successfull export test
    #
    # if len(summary['errors']) == 0:
    #     add_notification(msg, ExportTestNotification(header['catalogue'],
    #                                                  header.get('collection'),
    #                                                  header.get('product')))
    return msg
Esempio n. 2
0
def handle_export_msg(msg):
    header = msg.get('header', {})
    assert_message_attributes(header,
                              ["catalogue", "collection", "destination"])

    catalogue = header['catalogue']
    collection = header['collection']
    product = header.get('product', None)
    destination = header['destination']
    application = header.get('application', "GOBExport")

    msg["header"].update({
        'destination': destination,
        'application': application,
        'catalogue': catalogue,
        'entity': collection,
        'product': product,
    })

    if destination == "Database":
        handle_export_dump_msg(msg)
    elif destination in ["Objectstore", "File"]:
        handle_export_file_msg(msg)
    else:
        logger.error(
            f"Unrecognized destination for export {catalogue} {collection}: {destination}"
        )

    return {
        **msg, "header": msg.get("header"),
        "summary": logger.get_summary(),
        "contents": None
    }
Esempio n. 3
0
def publish_result(msg, relates):
    result_msg = {
        'header': msg['header'],
        'summary': logger.get_summary(),
        'contents': relates
    }
    return result_msg
Esempio n. 4
0
def apply(msg):
    mode = msg['header'].get('mode', FULL_UPLOAD)

    logger.configure(msg, "UPDATE")
    logger.info("Apply events")

    storage = GOBStorageHandler()
    combinations = _get_source_catalog_entity_combinations(storage, msg)

    # Gather statistics of update process
    stats = UpdateStatistics()
    before = None
    after = None
    for result in combinations:
        model = f"{result.source} {result.catalogue} {result.entity}"
        logger.info(f"Apply events {model}")
        storage = GOBStorageHandler(result)

        # Track eventId before event application
        entity_max_eventid, last_eventid = get_event_ids(storage)
        before = min(entity_max_eventid or 0, before or sys.maxsize)

        if is_corrupted(entity_max_eventid, last_eventid):
            logger.error(
                f"Model {model} is inconsistent! data is more recent than events"
            )
        elif entity_max_eventid == last_eventid:
            logger.info(f"Model {model} is up to date")
            apply_confirm_events(storage, stats, msg)
        else:
            logger.info(f"Start application of unhandled {model} events")
            with storage.get_session():
                last_events = storage.get_last_events(
                )  # { tid: last_event, ... }

            apply_events(storage, last_events, entity_max_eventid, stats)
            apply_confirm_events(storage, stats, msg)

        # Track eventId after event application
        entity_max_eventid, last_eventid = get_event_ids(storage)
        after = max(entity_max_eventid or 0, after or 0)

        # Build result message
        results = stats.results()
        if mode == FULL_UPLOAD and _should_analyze(stats):
            logger.info("Running VACUUM ANALYZE on table")
            storage.analyze_table()

        stats.log()
        logger.info(f"Apply events {model} completed", {'data': results})

    msg['summary'] = logger.get_summary()

    # Add a events notification telling what types of event have been applied
    if not msg['header'].get('suppress_notifications', False):
        add_notification(msg, EventNotification(stats.applied,
                                                [before, after]))

    return msg
Esempio n. 5
0
def handle_brp_regression_test_msg(msg):
    logger.configure(msg, 'BRP Regression test')

    results = BrpRegression(logger).run()
    writer = ObjectstoreResultsWriter(results, 'regression_tests/results/brp')
    writer.write()
    logger.info(
        "Written test results to Objecstore at regression_tests/results/brp")

    return {
        'header': {
            **msg.get('header', {}),
            'timestamp':
            datetime.datetime.utcnow().isoformat(),
        },
        'summary': logger.get_summary(),
    }
Esempio n. 6
0
def end_to_end_wait_handler(msg):
    logger.configure(msg, 'E2E Test')
    process_id = msg['header'].get('process_id')
    wait_for_process_id = msg['header'].get('wait_for_process_id')
    seconds = msg['header'].get('seconds')

    assert all([process_id, wait_for_process_id, seconds]), \
        "Expecting attributes 'process_id', 'wait_for_process_id' and 'seconds' in header"

    E2ETest(process_id).wait(wait_for_process_id, seconds)

    return {
        'header': {
            **msg.get('header', {}),
        },
        'summary': logger.get_summary(),
    }
Esempio n. 7
0
def handle_import_object_msg(msg):
    logger.configure(msg, "IMPORT OBJECT")
    logger.info("Start import object")
    importer = MappinglessConverterAdapter(msg['header'].get('catalogue'),
                                           msg['header'].get('entity'),
                                           msg['header'].get('entity_id_attr'))
    entity = importer.convert(msg['contents'])

    return {
        'header': {
            **msg['header'],
            'mode': ImportMode.SINGLE_OBJECT.value,
            'collection': msg['header'].get('entity'),
        },
        'summary': logger.get_summary(),
        'contents': [entity]
    }
Esempio n. 8
0
def process_relate(msg: dict):
    """
    This function starts the actual relate process. The message is checked for completeness and the Relater
    builds the new or updated relations and returns the result the be compared as if it was the result
    of an import job.

    :param msg: a message from the broker containing the catalog and collections (optional)
    :return: the result message of the relate process
    """
    logger.configure(msg, "RELATE SRC")

    _check_message(msg)
    header = msg.get('header')

    logger.info("Relate table started")

    full_update = header.get('mode', "update") == "full"

    if full_update:
        logger.info("Full relate requested")

    updater = Relater(header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY])

    filename, confirms = updater.update(full_update)

    logger.info("Relate table completed")

    relation_name = get_relation_name(GOBModel(), header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY])

    result_msg = {
        "header": {
            **msg["header"],
            "catalogue": "rel",
            "collection": relation_name,
            "entity": relation_name,
            "source": "GOB",
            "application": "GOB",
            "version": RELATE_VERSION,
            "timestamp": msg.get("timestamp", datetime.datetime.utcnow().isoformat()),
        },
        "summary": logger.get_summary(),
        "contents_ref": filename,
        "confirms": confirms,
    }

    return result_msg
Esempio n. 9
0
def end_to_end_check_handler(msg):
    logger.configure(msg, 'E2E Test')

    endpoint = msg['header'].get('endpoint')
    expect = msg['header'].get('expect')
    description = msg['header'].get('description')
    process_id = msg['header'].get('process_id')

    assert all([endpoint, expect, description, process_id]), \
        "Expecting attributes 'endpoint', 'expect', 'description' and 'process_id' in header"

    E2ETest(process_id).check(endpoint, expect, description)
    return {
        'header': {
            **msg.get('header', {}),
        },
        'summary': logger.get_summary(),
    }
Esempio n. 10
0
def end_to_end_execute_workflow_handler(msg):
    logger.configure(msg, 'E2E Test')
    workflow_to_execute = msg['header'].get('execute')
    workflow_process_id = msg['header'].get('execute_process_id')
    process_id = msg['header'].get('process_id')

    assert all([workflow_to_execute, workflow_process_id, process_id]), \
        "Expecting attributes 'execute', 'execute_process_id' and 'process_id' in header"

    E2ETest(process_id).execute_workflow(workflow_to_execute,
                                         workflow_process_id)

    return {
        'header': {
            **msg.get('header', {}),
        },
        'summary': logger.get_summary(),
    }
Esempio n. 11
0
def full_update(msg):
    """Store the events for the current dataset

    :param msg: the result of the application of the events
    :return: Result message
    """
    logger.configure(msg, "UPDATE")
    logger.info(
        f"Update to GOB Database {GOBStorageHandler.user_name} started")

    # Interpret the message header
    message = ImportMessage(msg)
    metadata = message.metadata

    storage = GOBStorageHandler(metadata)
    model = f"{metadata.source} {metadata.catalogue} {metadata.entity}"
    logger.info(f"Store events {model}")

    # Get events from message
    events = msg["contents"]

    # Gather statistics of update process
    stats = UpdateStatistics()

    _process_events(storage, events, stats)

    # Build result message
    results = stats.results()

    stats.log()
    logger.info(f"Store events {model} completed", {'data': results})

    results.update(logger.get_summary())

    # Return the result message, with no log, no contents but pass-through any confirms
    message = {
        "header": msg["header"],
        "summary": results,
        "contents": None,
        "confirms": msg.get('confirms')
    }
    return message
Esempio n. 12
0
def check_relation(msg):
    """
    Check for any dangling relations

    :param msg:
    :return:
    """
    header = msg.get('header', {})
    catalog_name = header.get('original_catalogue')
    collection_name = header.get('original_collection')
    attribute_name = header.get('original_attribute')

    model = GOBModel()

    logger.configure(msg, "RELATE_CHECK")
    logger.info("Relate check started")

    collection = model.get_collection(catalog_name, collection_name)
    assert collection is not None, f"Invalid catalog/collection combination {catalog_name}/{collection_name}"

    reference = model._extract_references(collection['attributes']).get(attribute_name)

    try:
        is_very_many = reference['type'] == fully_qualified_type_name(VeryManyReference)
        check_function = check_very_many_relations if is_very_many else check_relations
        check_function(catalog_name, collection_name, attribute_name)
    except Exception as e:
        _log_exception(f"{attribute_name} check FAILED", e)

    logger.info("Relation conflicts check started")
    check_relation_conflicts(catalog_name, collection_name, attribute_name)

    logger.info("Relate check completed")

    return {
        "header": msg["header"],
        "summary": logger.get_summary(),
        "contents": None
    }
Esempio n. 13
0
def data_consistency_test_handler(msg):
    """Request to run data consistency tests.

    :param msg:
    :return:
    """
    catalog = msg['header'].get('catalogue')
    collection = msg['header'].get('collection')
    application = msg['header'].get('application')
    msg['header']['entity'] = msg['header'].get('entity', collection)

    logger.configure(msg, 'Data consistency test')

    assert all([catalog, collection
                ]), "Expecting header attributes 'catalogue' and 'collection'"
    id = f"{catalog} {collection} {application or ''}"
    # No return value. Results are captured by logger.
    logger.info(f"Data consistency test {id} started")
    try:
        DataConsistencyTest(catalog, collection, application).run()
    except GOBConfigException as e:
        logger.error(f"Dataset connection failed: {str(e)}")
    except (NotImplementedCatalogError, NotImplementedApplicationError,
            GOBException) as e:
        logger.error(f"Dataset test failed: {str(e)}")
    else:
        logger.info(f"Data consistency test {id} ended")

    return {
        'header': {
            **msg.get('header', {}),
            'timestamp':
            datetime.datetime.utcnow().isoformat(),
        },
        'summary': logger.get_summary(),
    }
Esempio n. 14
0
def compare(msg):
    """Compare new data in msg (contents) with the current data

    :param msg: The new data, including header and summary
    :return: result message
    """
    logger.configure(msg, "COMPARE")
    header = msg.get('header', {})
    mode = header.get('mode', FULL_UPLOAD)
    logger.info(
        f"Compare (mode = {mode}) to GOB Database {GOBStorageHandler.user_name} started"
    )

    # Parse the message header
    message = ImportMessage(msg)
    metadata = message.metadata

    # Get the model for the collection to be compared
    gob_model = GOBModel()
    entity_model = gob_model.get_collection(metadata.catalogue,
                                            metadata.entity)

    # Initialize a storage handler for the collection
    storage = GOBStorageHandler(metadata)
    model = f"{metadata.source} {metadata.catalogue} {metadata.entity}"
    logger.info(f"Compare {model}")

    stats = CompareStatistics()

    tmp_table_name = None
    with storage.get_session():
        with ProgressTicker("Collect compare events", 10000) as progress:
            # Check any dependencies
            if not meets_dependencies(storage, msg):
                return {
                    "header": msg["header"],
                    "summary": logger.get_summary(),
                    "contents": None
                }

            enricher = Enricher(storage, msg)
            populator = Populator(entity_model, msg)

            # If there are no records in the database all data are ADD events
            initial_add = not storage.has_any_entity()
            if initial_add:
                logger.info("Initial load of new collection detected")
                # Write ADD events directly, without using a temporary table
                contents_writer = ContentsWriter()
                contents_writer.open()
                # Pass a None confirms_writer because only ADD events are written
                collector = EventCollector(contents_writer,
                                           confirms_writer=None,
                                           version=entity_model['version'])
                collect = collector.collect_initial_add
            else:
                # Collect entities in a temporary table
                collector = EntityCollector(storage)
                collect = collector.collect
                tmp_table_name = collector.tmp_table_name

            for entity in msg["contents"]:
                progress.tick()
                stats.collect(entity)
                enricher.enrich(entity)
                populator.populate(entity)
                collect(entity)

            collector.close()

    if initial_add:
        filename = contents_writer.filename
        confirms = None
        contents_writer.close()
    else:
        # Compare entities from temporary table
        with storage.get_session():
            diff = storage.compare_temporary_data(tmp_table_name, mode)
            filename, confirms = _process_compare_results(
                storage, entity_model, diff, stats)

    # Build result message
    results = stats.results()

    logger.info(f"Compare {model} completed", {'data': results})

    results.update(logger.get_summary())

    message = {
        "header": msg["header"],
        "summary": results,
        "contents_ref": filename,
        "confirms": confirms
    }

    return message