def qc_occurrence(record, data_archive):
    if record["qc"] is None:
        record["qc"] = 0

    # QC for the occ records: required fields: (1, 10)
    record["qc"] |= required_fields.check_record_required(record)
    record["qc"] |= required_fields.check_record_obis_format(record)

    # QC for the occ. records : location basic (4, 5, 18)
    record["qc"] |= location.check_basic_record(record)

    # QC for the ev. records : areas (9)
    if data_archive.areas is not None:
        record["qc"] |= location.check_record_in_areas(record,
                                                       data_archive.areas)

    # QC for the occ. records : building batch for API call (6, 19)
    if record["qc"] & (qc_flags.QCFlag.GEO_LAT_LON_VALID.bitmask
                       | qc_flags.QCFlag.GEO_LAT_LON_PRESENT.bitmask):
        data_archive.records_for_lookup.append(record)
        # Execute lookup if necessary
        if len(data_archive.records_for_lookup
               ) >= data_archive.LOOKUP_BATCH_SIZE:
            location.check_xy(data_archive.records_for_lookup)
            data_archive.pyxylookup_counter += 1
            dateTimeObj = datetime.now()
            this.logger.debug(
                f"{dateTimeObj}: Lookups B: {data_archive.pyxylookup_counter}")
            # Empty the list
            data_archive.records_for_lookup = []

    # QC for the occ. records: taxonomy (2, 3)
    record["qc"] |= taxonomy.check_record(record)

    # QC for the occ. records : time (7, 11, 12, 13)
    record["qc"] |= time_qc.check_record(record, 0)

    # QC for occ. : sex (17)
    record["qc"] |= measurements.check_sex_record(record)

    # Do the measurement of facts QC for the event records (14, 15, 16, 17)
    # This processes all emof records belonging to this occurrence record
    record["qc"] |= qc_emof(record, data_archive)

    # Complete dataset: (25)
    if data_archive.goodmetadata:
        record["qc"] |= qc_flags.QCFlag.GOODMETADATA.bitmask

    # This is useful for when core record = event
    return record["qc"]
    def test_api_call(self):
        """ Generates a number of valid/plausible geographical points and calls the pyxylookup API
            purpose is to verify capacity limit and to evaluate a viable chunck size
            note: if the values are invalid the API shall not be called, so we are forced
            to run basic lat lon checks before this one. """

        from random import uniform
        from time import time

        # Modify this to pass a different number of records to the API
        load = 1000

        rand_records = []

        for i in range(load):
            # Random points on Earth's surface at random depths
            lat = uniform(-90, 90)
            lon = uniform(-180, 180)
            depth = uniform(-300, 3000)

            rand_records.append({
                "id": i,
                "decimalLongitude": lon,
                "decimalLatitude": lat,
                "maximumDepthInMeters": depth,
                "QC": 0
            })

        # timing and calling the service
        start = time()
        results = location.check_xy(rand_records)
        this.logger.info(f"Time elapsed: {time() - start}")
        this.logger.info(results)
def qc_event(record, data_archive):
    # None means that records have not been quality checked 0 means that QCs have been attempted
    if record["qc"] is None:
        record["qc"] = 0

    # QC for the ev. records : location basic (4, 5, 18, 21)
    record["qc"] |= location.check_basic_record(record)
    # QC for the ev. records : areas (9)
    if data_archive.areas is not None:
        record["qc"] |= location.check_record_in_areas(record,
                                                       data_archive.areas)

    # Check the required fields (1) (should fail)
    record["qc"] |= required_fields.check_record_required(record, False)

    # QC for the ev. records : building batch for API call (6, 19)
    if record["qc"] & (qc_flags.QCFlag.GEO_LAT_LON_VALID.bitmask
                       | qc_flags.QCFlag.GEO_LAT_LON_PRESENT.bitmask):
        data_archive.records_for_lookup.append(record)
        # Execute lookup if necessary
        if len(data_archive.records_for_lookup
               ) >= data_archive.LOOKUP_BATCH_SIZE:
            location.check_xy(data_archive.records_for_lookup)
            data_archive.pyxylookup_counter += 1
            dateTimeObj = datetime.now()
            this.logger.debug(
                f"{dateTimeObj}: Lookups A: {data_archive.pyxylookup_counter}")
            # Empty the list
            data_archive.records_for_lookup = []

    # QC for the ev. records : time (7, 11, 12, 13)
    record["qc"] |= time_qc.check_record(record, 0)

    # Look at the event related eMoF records  (14, 15, 16, 17)
    # Disabled as per email 24/01/2021
    # record["qc"] |= qc_emof(record, data_archive)

    # goodmetadata is for full dataset. (25)
    if data_archive.goodmetadata:
        record["qc"] |= qc_flags.QCFlag.GOODMETADATA.bitmask

    return record["qc"]
    def test_check_xy(self):
        """ tests calls to the lookup service through pyxylookup """

        # first get the QC done
        for record in self.records:
            record["QC"] = location.check_basic_record(record)

        # This should call the pyxylookup and change the records, also assessing the
        results = location.check_xy(self.records)
        this.logger.info(results)

        assert (results == [
            QCFlag.GEO_LAT_LON_ON_SEA.bitmask,
            0,
            0,
            0,
            0,
            0,
            QCFlag.GEO_LAT_LON_ON_SEA.bitmask
            | QCFlag.DEPTH_MAP_VERIFIED.bitmask,
            QCFlag.GEO_LAT_LON_ON_SEA.bitmask,
            QCFlag.GEO_LAT_LON_ON_SEA.bitmask
            | QCFlag.DEPTH_MAP_VERIFIED.bitmask,
        ])
def dwca_file_qc(filename, with_logging=False):
    """ Processes a DwCA archive if it is passed as a filename,
        shall popup a file chooser dialog if this is None
        :param filename (The DwCA zip file name)
        :param with_logging (every QC passed is printed)
    """

    if filename is None:
        this.logger.warning("WARNING: Call to dwca_file_qc with no filename ")
        return None

    archive = DwCAProcessor(filename)

    # Once and for all
    geo_areas = extract_area.find_areas(archive.eml)

    archive_core_type = archive.core.type.lower()  # Can be occurrence or event

    # The core records are checked for lat lon in any case
    coord_in_occur = None

    # Determine whether occurrence records have LON LAT
    for ext_record in archive.extensions:
        if ext_record.type.lower() == "occurrence":
            if "decimalLatitude" in ext_record[
                    "fields"] and "decimalLongitude" in ext_record["fields"]:
                coord_in_occur = True
            else:
                coord_in_occur = False

    record_count = 0

    # Stock in this list records for lookup to execute QCs 6 and 19
    records_for_lookup = []

    # Batch lookup size (experimentally established, tried 100, 200, 500 and 1000)
    lookup_batch_size = 1000
    count_lookups = 0

    # to display file processing time
    time_start = time.time()
    records_by_key_for_ext_qc = {}

    for coreRecord in archive.core_records():
        record_count += 1

        # Attempt check for lat/lon in full record
        full_core = coreRecord["full"]
        full_core["type"] = archive_core_type

        dwca_core = DwCACore(full_core)
        dwca_cores.append(dwca_core)

        # All the extension records shall contribute to the core record QC
        records_by_key_for_ext_qc[coreRecord["pk"]] = full_core

        # Core Record (any type)
        # Check location
        qc_mask = location.check_basic_record(full_core)
        if "qc" in full_core:
            full_core["qc"] = full_core["qc"] | qc_mask
        else:
            full_core["qc"] = qc_mask

        # If locations are present and valid - QC 6 and 19 - all types
        if qc_mask & (qc_flags.QCFlag.GEO_LAT_LON_PRESENT.bitmask
                      | qc_flags.QCFlag.GEO_LAT_LON_VALID.bitmask):
            records_for_lookup.append(full_core)
            if len(records_for_lookup) >= lookup_batch_size:
                location.check_xy(records_for_lookup)
                count_lookups += 1
                # Empty the list
                records_for_lookup = []

        full_core["type"] = archive.core.type.lower()

        if full_core["type"] == "event":

            # Check location in area
            if geo_areas is not None:
                qc_mask = location.check_record_in_areas(full_core, geo_areas)
                full_core["qc"] = full_core["qc"] | qc_mask

            # Check dates (This is a repeat)
            qc_mask = time_qc.check_record(full_core, 0)
            full_core["qc"] = full_core["qc"] | qc_mask

        elif archive.core.type.lower() == "occurrence":

            # QC 9 (basis of records)
            qc_mask = required_fields.check_record_obis_format(full_core)
            if "qc" in full_core:
                full_core["qc"] = full_core["qc"] | qc_mask
            else:
                full_core["qc"] = qc_mask

            # QC 1 (required fields)
            qc_mask = required_fields.check_record_required(full_core)
            full_core["qc"] = full_core["qc"] | qc_mask

            # QC 4 and 5 (Check location)
            qc_mask = location.check_basic_record(full_core)
            full_core["qc"] = full_core["qc"] | qc_mask

            # QC 9 (Area Check)
            if geo_areas is not None:
                qc_mask = location.check_record_in_areas(full_core, geo_areas)
                full_core["qc"] = full_core["qc"] | qc_mask

            # QC 2 and 3 (Taxonomy)
            qc_mask = taxonomy.check_record(full_core)
            full_core["qc"] = full_core["qc"] | qc_mask

            # QC 7, 11, 12 13 (Dates - times)
            qc_mask = time_qc.check_record(full_core, 0)
            full_core["qc"] = full_core["qc"] | qc_mask

            # Sex
            qc_mask = measurements.check_sex_record(full_core)
            full_core["qc"] = full_core["qc"] | qc_mask

            # Dynamic properties
            qc_mask = measurements.check_dyn_prop_record(full_core)
            full_core["qc"] = full_core["qc"] | qc_mask

        else:
            # Skip taxons and other record types
            full_core["qc"] = 0

        extensions_to_update = {}

        for e in archive.extensions:

            if e.type.lower() not in dwca_core.extensions:
                dwca_core.extensions[e.type.lower()] = []

            if e.type.lower() in [
                    "occurrence", "measurementorfact",
                    "extendedmeasurementorfact"
            ]:
                for extensionRecord in archive.extension_records(e):
                    record_count += 1
                    full_extension = extensionRecord["full"]
                    full_extension["fk"] = extensionRecord["fk"]
                    full_extension["type"] = e.type.lower()

                    if e.type.lower() == "occurrence":

                        # Redundant, it must have ID!
                        occurrence_key = full_extension[
                            "occurrenceID"] if "occurrenceID" in full_extension else None

                        if occurrence_key is not None:
                            if occurrence_key in extensions_to_update:
                                # This record does not yet have QC - we are starting
                                full_extension["qc"] = extensions_to_update[
                                    occurrence_key]["qc"]
                            else:
                                extensions_to_update[
                                    occurrence_key] = full_extension

                        # QC 9 (basis of records)
                        qc_mask = required_fields.check_record_obis_format(
                            full_extension)
                        if "qc" in full_extension:
                            full_extension[
                                "qc"] = full_extension["qc"] | qc_mask
                        else:
                            full_extension["qc"] = qc_mask

                        # QC 1 (required fields)
                        qc_mask = required_fields.check_record_required(
                            full_extension)
                        full_extension["qc"] = full_extension["qc"] | qc_mask

                        # Check location if necessary
                        if coord_in_occur:
                            qc_mask = location.check_basic_record(
                                full_extension)
                            full_extension[
                                "qc"] = full_extension["qc"] | qc_mask

                            # Also add it for the lookup if OK
                            if qc_mask & (
                                    qc_flags.QCFlag.GEO_LAT_LON_PRESENT.bitmask
                                    |
                                    qc_flags.QCFlag.GEO_LAT_LON_VALID.bitmask):
                                records_for_lookup.append(full_extension)
                                if len(records_for_lookup
                                       ) >= lookup_batch_size:
                                    location.check_xy(records_for_lookup)
                                    count_lookups += 1
                                    records_for_lookup = []

                        # Check taxonomy
                        qc_mask = taxonomy.check_record(full_extension)
                        full_extension["qc"] = full_extension["qc"] | qc_mask

                        # Check dates (This is a repeat)
                        qc_mask = time_qc.check_record(full_extension, 0)
                        full_extension["qc"] = full_extension["qc"] | qc_mask

                        # Check sex
                        qc_mask = measurements.check_sex_record(full_extension)
                        full_extension["qc"] = full_extension["qc"] | qc_mask

                        # Check dynamic properties
                        qc_mask = measurements.check_dyn_prop_record(
                            full_extension)
                        full_extension["qc"] = full_extension["qc"] | qc_mask

                        # This is an extension but it is also an occurrence. Update the core event record
                        full_core["qc"] |= full_extension["qc"]

                    elif e.type.lower() in [
                            "measurementorfact", "extendedmeasurementorfact"
                    ]:

                        if archive.core.type.lower() == "event":
                            occurrence_key = full_extension[
                                "occurrenceID"] if "occurrenceID" in full_extension else None
                        else:
                            occurrence_key = None

                        full_extension = extensionRecord["full"]
                        # Check measurements
                        qc_mask = measurements.check_record(full_extension)

                        # Need tp update core record and possibly occurrence if core is event
                        if occurrence_key is not None:
                            # Update occurrence record
                            if occurrence_key in extensions_to_update:
                                extensions_to_update[occurrence_key][
                                    "qc"] |= qc_mask
                            else:
                                extensions_to_update[occurrence_key] = {
                                    "qc": qc_mask
                                }

                        full_core["qc"] |= qc_mask

                    else:
                        # Skip taxons and other types
                        pass

                    dwca_core.extensions[e.type.lower()].append(full_extension)

    # do I need a last lookup ?
    if len(records_for_lookup) >= 0:
        # The records will be modified with the correct QC flags so we do not care about the results
        location.check_xy(records_for_lookup)

        # How do I update the event records if I passed occurrences to the lookup ? Looking them up in the reference!
        for record in records_for_lookup:
            if "fk" in record:
                records_by_key_for_ext_qc[record["fk"]]["qc"] |= record["qc"]

        count_lookups += 1

    if with_logging:
        this.logger.info(f"Filename processed: {filename}")
        this.logger.info(f"Archive core record type: {archive_core_type}")
        this.logger.info(f"XY lookups: {count_lookups}")
        this.logger.info(
            f"Records looked up: {1000 * (count_lookups - 1) + len(records_for_lookup)}"
        )
        this.logger.info(f"Records processed: {record_count}")
        this.logger.info(f"Total time: {time.time() - time_start}")

    # Rescan the prepared lookup for logging the results
    if with_logging:
        for print_record in dwca_cores:
            if print_record.core["qc"] > 0:
                this.logger.info(
                    f"Core record {print_record.core}. \nPassed quality checks: "
                    f"{qc_flags.QCFlag.decode_mask(print_record.core['qc'])}")

            for e in print_record.extensions.keys():

                for full_extension in print_record.extensions[e]:
                    this.logger.info(f"--- extension: {e}")
                    if e == "occurrence":
                        this.logger.info(
                            f"The occurrence record {full_extension} passed quality checks: "
                            f"{qc_flags.QCFlag.decode_mask(full_extension['qc'])} "
                        )
                    else:
                        if with_logging:
                            this.logger.info(full_extension)
def dataset_qc_labeling(dataset_id,
                        disable_index=True,
                        with_logging=True,
                        pool_no=0):
    """ Processes an eurobis dataset if it is passed as a dataset_id,
        shall popup a file chooser dialog if this is None
        :param dataset_id (The dataset identifier from the dataproviderstable)
        :param disable_index: Whether we are eventually allowed to disable the index at this level
        :param with_logging (every QC passed is printed)
        """

    if dataset_id is None:
        this.logger.warning(
            "WARNING: Call to dataset_qc_labeling with no dataset_id ")
        return None

    data_archive = eurobis_dataset.EurobisDataset()
    data_archive.load_dataset(dataset_id)

    if with_logging:
        this.logger.info(f"--------------------------------------------------")
        this.logger.info(
            f"Loaded dataset {data_archive.dataset_name}, id = {data_archive.dataprovider_id} "
        )
        this.logger.info(
            f"Number of event records: {len(data_archive.event_recs)}")
        this.logger.info(
            f"Number of occurrence records: {len(data_archive.occurrence_recs)}"
        )
        this.logger.info(
            f"Number of emof records: {len(data_archive.emof_recs)}")
        this.logger.info(f"Interesting areas: {data_archive.areas}")
        this.logger.info(f"Imis dataset ID: {data_archive.imis_das_id}")
        this.logger.info(
            f"Good metadata: {'OK' if data_archive.goodmetadata == True else 'Not OK'}"
        )
        this.logger.info(
            f"Type of core records: {'Event' if data_archive.darwin_core_type == 2 else 'Occurrence'}"
        )
        this.logger.info(f"Poolno: {pool_no}")
        this.logger.info(f"--------------------------------------------------")

    # Starting the QCs:
    # After loading, measure processing time
    time_start = time.time()

    # Proceed top-down...
    if data_archive.darwin_core_type == data_archive.EVENT:
        this.logger.info(f"1A. Event")
        # For all event records, qc event, then occurrence records
        # (which shall recurse into eMof), then own eMof and then "or" all
        for record in data_archive.event_recs:
            # qc_event shall also take care of emof for event
            # this.logger.info(f"1A. Event")
            qc_ev = qc_event(record, data_archive)
            record["qc"] |= qc_ev

            # Generate key and lookup occurrences...
            key = f"{record['dataprovider_id']}_{record['eventID']}"
            if key in data_archive.occ_indices:
                for occ_record in data_archive.occ_indices[key]:
                    # qc_occurrence sall also take care of emof for occurrence
                    qc_occ = qc_occurrence(occ_record, data_archive)
                    # Check that the combination of event and occurrence have the required fields. Assign to occurrence
                    # Consequence of email 24/01/2021
                    occ_record["qc"] |= required_fields.check_ev_occ_required(
                        record, occ_record, False)
                    occ_record[
                        "qc"] |= qc_occ  # make sure it is assigned other than just calculated
                    occ_record[
                        "qc"] |= qc_ev  # Occurrence also inherit 'father' event qc (email 24/01/2021)
                    # qc_ev |= qc_occ  # No aggregation upwards (email 24/01/2021)

                # No longer true after email 24/01/2021
                # Needs to propagate the REQUIRED FIELDS CHECK for the event and its occurrences
                # qc_req_agg = [record]
                # qc_req_agg.extend(data_archive.occ_indices[key])
                # record["qc"] |= required_fields.check_aggregate(qc_req_agg)

    else:  # Only occurrence and emof records
        this.logger.info(f"1B. Occurence and emof")
        for occ_record in data_archive.occurrence_recs:
            # The QC is either 0 or a QC mask - emof are considered inside the occurrence
            qc_occurrence(occ_record, data_archive)

    # Are there any lookups left to do (any record type)
    if len(data_archive.records_for_lookup):
        location.check_xy(data_archive.records_for_lookup)
        data_archive.pyxylookup_counter += 1
        dateTimeObj = datetime.now()
        this.logger.debug(
            f"{dateTimeObj}: Lookups C: {data_archive.pyxylookup_counter}")

        # Must propagate the QC of these records (in case)
        if data_archive.darwin_core_type == data_archive.EVENT:
            for looked_up_record in data_archive.records_for_lookup:
                if looked_up_record[
                        "DarwinCoreType"] == data_archive.OCCURRENCE:
                    key = f"{looked_up_record['dataprovider_id']}_{looked_up_record['eventID']}"
                    if key in data_archive.event_indices:
                        data_archive.event_indices[key][0][
                            "qc"] |= looked_up_record["qc"]

        # Empty the list
        data_archive.records_for_lookup = []

    # Disable QC - if necessary
    if disable_index:
        if len(data_archive.event_recs) + len(
                data_archive.occurrence_recs) > data_archive.INDEX_TRESHOLD:
            eurobis_dataset.EurobisDataset.disable_qc_index()

    # RECORDS UPDATE!
    this.PROCESS_BATCH_SIZE = 1000  # Shall commit at every batch

    # EVENTS
    if len(data_archive.event_recs):
        # Getting the splits
        split_events_lists = misc.split_in_chunks(data_archive.event_recs,
                                                  this.PROCESS_BATCH_SIZE)

        for idx, process_batch in enumerate(split_events_lists):
            eurobis_dataset.EurobisDataset.update_record_qc(
                process_batch, idx, this.PROCESS_BATCH_SIZE,
                data_archive.dataprovider_id, data_archive.EVENT)

    # OCCURRENCES
    if len(data_archive.occurrence_recs):
        # Getting the splits
        split_occurrences_lists = misc.split_in_chunks(
            data_archive.occurrence_recs, this.PROCESS_BATCH_SIZE)
        for idx, process_batch in enumerate(split_occurrences_lists):
            eurobis_dataset.EurobisDataset.update_record_qc(
                process_batch, idx, this.PROCESS_BATCH_SIZE,
                data_archive.dataprovider_id, data_archive.OCCURRENCE)

    # REBUILD QC index
    if disable_index:
        if len(data_archive.event_recs) + len(
                data_archive.occurrence_recs) > data_archive.INDEX_TRESHOLD:
            eurobis_dataset.EurobisDataset.rebuild_qc_index()

    duration = time.time() - time_start
    # Dataset QC finished, taking note of the time.

    if with_logging:
        this.logger.info(
            f"Total net processing time for {data_archive.dataprovider_id} : "
            f"{data_archive.dataset_name} in: {duration} ")
Exemple #7
0
def process_random_record(with_logging=True):
    """ select a random dataset, then a random core event from it and perform QC """

    # To select a specific type of record,
    # This selects 1 dataset with  less than 10000 events/occurrences reported in the dataproviders table
    # To select Event based data sets, add this between e.dataprovider_id and group_by: where d.core = 2
    sql_random_dataset = f"SELECT TOP 1  d.id, count(e.dataprovider_id) FROM  dataproviders d " \
                         f" inner join eurobis e on d.id = e.dataprovider_id  group by d.id " \
                         f" having count(e.dataprovider_id) < 10000 ORDER BY NEWID()"

    # Go and get the id!
    dataset_id = None

    # Connect to the database to get dataset list
    if not mssql.conn:
        mssql.open_db()

    if mssql.conn is None:
        # Should find a way to exit and advice
        this.logger.error("No connection to DB, nothing can be done! ")
        exit(0)
    else:
        # Fetch a random set of datasets
        cur = mssql.conn.cursor()
        cur.execute(sql_random_dataset)
        dataset = cur.fetchone()
        dataset_id = dataset[0]

    data_archive = eurobis_dataset.EurobisDataset()
    data_archive.load_dataset(dataset_id)

    if with_logging:
        this.logger.info(f"--------------------------------------------------")
        this.logger.info(
            f"Loaded dataset {data_archive.dataset_name}, id = {data_archive.dataprovider_id} "
        )
        this.logger.info(
            f"Number of event records: {len(data_archive.event_recs)}")
        this.logger.info(
            f"Number of occurrence records: {len(data_archive.occurrence_recs)}"
        )
        this.logger.info(
            f"Number of emof records: {len(data_archive.emof_recs)}")
        this.logger.info(f"Interesting areas: {data_archive.areas}")
        this.logger.info(f"Imis dataset ID: {data_archive.imis_das_id}")
        this.logger.info(
            f"Type of core records: {'Event' if data_archive.darwin_core_type == 2 else 'Occurrence'}"
        )
        this.logger.info(f"--------------------------------------------------")
    pass

    # Now everything is in data_archive, we must select a random CORE record, and its children, calculate QC and
    # display all records that originate that reasoning.
    # Proceed top-down as in pipeline ...
    if data_archive.darwin_core_type == data_archive.EVENT:
        # select random core event:
        record_idx = randint(0, len(data_archive.event_recs) - 1)
        record = data_archive.event_recs[record_idx]

        # make sure we start at "Empty"
        record["qc"] = None

        # Perform basic QC:
        qc_ev = mssql_pipeline.qc_event(record, data_archive)
        record["qc"] |= qc_ev  # Make sure it is stamped

        # Generate key and lookup occurrences...
        key = f"{record['dataprovider_id']}_{record['eventID']}"
        if key in data_archive.occ_indices:
            for occ_record in data_archive.occ_indices[key]:
                # qc_occurrence sall also take care of emof for occurrence
                qc_occ = mssql_pipeline.qc_occurrence(occ_record, data_archive)
                qc_occ |= required_fields.check_ev_occ_required(
                    record, occ_record, False)
                occ_record['qc'] |= qc_occ  # also give to occurrence record
                occ_record[
                    'qc'] |= qc_ev  # Inherits the event QC (email 24/01/2021)

            # No longer true as per email 24/01/2021
            # Needs to propagate the REQUIRED FIELDS CHECK for the event and its occurrences
            # qc_req_agg = [record]
            # qc_req_agg.extend(data_archive.occ_indices[key])
            # record["qc"] |= required_fields.check_aggregate(qc_req_agg)
            # qc_ev |= record["qc"]

        # Are there any lookups left to do (any record type)
        if len(data_archive.records_for_lookup):
            location.check_xy(data_archive.records_for_lookup)

            # Need to propagate the (new) QC of the events down to the occurrences records
            for looked_up_record in data_archive.records_for_lookup:
                if looked_up_record["DarwinCoreType"] == data_archive.EVENT:
                    key = f"{looked_up_record['dataprovider_id']}_{looked_up_record['eventID']}"
                    if key in data_archive.occ_indices:
                        for occ_record in data_archive.occ_indices[key]:
                            occ_record["qc"] |= looked_up_record["qc"]

        this.logger.info(f"Calculated quality mask: {qc_ev}, consisting of:")
        this.logger.info(
            f"QC NUMBERS: -------------> {QCFlag.decode_numbers(record['qc'])}"
        )
        this.logger.info(
            f"QC FLAG NAMES: ----------> {QCFlag.decode_mask(record['qc'])}")
        this.logger.info(f"--------------------------------------------------")
        this.logger.info(f"Event Record: {record}")
        this.logger.info(f"--------------------------------------------------")

        if key in data_archive.occ_indices:
            for occ_record in data_archive.occ_indices[key]:
                this.logger.info(f"Occurrence Record: {occ_record}")
                this.logger.info(
                    f"Calculated quality mask: {occ_record['qc']}, consisting of:"
                )
                this.logger.info(
                    f"QC NUMBERS: -------------> {QCFlag.decode_numbers(occ_record['qc'])}"
                )
                this.logger.info(
                    f"QC FLAG NAMES: ----------> {QCFlag.decode_mask(occ_record['qc'])}"
                )
                this.logger.info(
                    f"--------------------------------------------------")
                key_o = f"{occ_record['dataprovider_id']}_" \
                        f"{'NULL' if occ_record['eventID'] is None else occ_record['eventID']}_" \
                        f"{'NULL' if occ_record['occurrenceID'] is None else occ_record['occurrenceID']}"
                if key_o in data_archive.emof_indices:
                    for emof in data_archive.emof_indices[key_o]:
                        this.logger.info(f"eMoF Record: {emof}")
                        this.logger.info(
                            f"--------------------------------------------------"
                        )

        if key in data_archive.emof_indices:
            for emof in data_archive.emof_indices[key]:
                this.logger.info(f"eMoF Record for event: {emof}")
                this.logger.info(
                    f"--------------------------------------------------")

    else:
        # The QC is either 0 or a QC mask
        record_idx = randint(0, len(data_archive.occurrence_recs) - 1)
        record = data_archive.occurrence_recs[record_idx]
        qc_occ = mssql_pipeline.qc_occurrence(record, data_archive)

        # Are there any lookups left to do (any record type)?
        if len(data_archive.records_for_lookup):
            location.check_xy(data_archive.records_for_lookup)

            for lookup_record in data_archive.records_for_lookup:
                record['qc'] |= lookup_record["qc"]
                qc_occ |= lookup_record["qc"]

            data_archive.records_for_lookup = []

        this.logger.info(f"Calculated quality mask: {qc_occ}, consisting of:")
        this.logger.info(
            f"QC NUMBERS: -------------> {QCFlag.decode_numbers(qc_occ)}")
        this.logger.info(
            f"QC FLAG NAMES: ----------> {QCFlag.decode_mask(qc_occ)}")
        this.logger.info(f"--------------------------------------------------")
        this.logger.info(f"Occurrence Record: {record}")
        this.logger.info(f"--------------------------------------------------")

        key_o = f"{record['dataprovider_id']}_NULL_" \
                f"{'NULL' if record['occurrenceID'] is None else record['occurrenceID']}"
        if key_o in data_archive.emof_indices:
            for emof in data_archive.emof_indices[key_o]:
                this.logger.info(f"eMoF Record: {emof}")
                this.logger.info(
                    f"--------------------------------------------------")