Python open_db Beispiele, dbworks.mssql_db_functions.open_db Python Beispiele

Beispiel #1

0

Datei anzeigen

    def get_provider_data(self, das_prov_id):
        """ Obtain the dataset info from the dataproviders table
            and stores that into the instance fields
            :param - das_prov_id - Dataset id """

        if not mssql.conn:
            mssql.open_db()

        if mssql.conn is None:
            # Should find a way to exit and advice
            pass
        else:
            # Can start querying the DB using the dataset provider ID
            sql_str = f"select * from dataproviders d where id = {das_prov_id};"

            # Fetch the provider line
            cur = mssql.conn.cursor()
            cur.execute(sql_str)
            # Get a dictionary
            provider_fields = [
                description[0] for description in cur.description
            ]

            provider = cur.fetchone()

            self.provider_record = dict(zip(provider_fields, provider))
            self.imis_das_id = self.provider_record['IMIS_DasID']
            self.dataprovider_id = self.provider_record[
                'id']  # Of course this we have already
            self.darwin_core_type = self.provider_record['core']
            self.dataset_name = self.provider_record['name']
            # Should see if more is needed
            mssql.close_db()

Beispiel #2

0

Datei anzeigen

    def disable_qc_index(cls):
        """ Disable non-clustered index on QC
            important - this depends on the index name being kept the same in the
            variable sql_disable_index below
            """

        # Check DB connection...
        if not mssql.conn:
            mssql.open_db()

        if mssql.conn is None:
            # Should find a way to exit and advice (raise an exception may be)
            return "Fail, no connection "
        else:
            try:
                sql_disable_index = "ALTER INDEX IX_eurobis_qc ON dbo.eurobis DISABLE;"
                cursor = mssql.conn.cursor()
                cursor.execute(sql_disable_index)
                mssql.conn.commit()
                this.logger.debug(f"Non clustered index on qc disabled")
                mssql.close_db()
                return "Success"
            except Error as e:
                mssql.close_db()
                return f"Failed to disable clustered index: {e}"

Beispiel #3

0

Datei anzeigen

    def test_connect(self):
        """ Demonstrates connectivity with a MS SQL Server DB and
            queries all records in a table, outputting the results
            as a Python dictionary. """

        mssql_db_functions.open_db()
        cursor = mssql_db_functions.conn.cursor()
        cursor.execute(f'SELECT * FROM {self.table};')
        columns = [column[0] for column in cursor.description]
        results = []
        for row in cursor:
            results.append(dict(zip(columns, row)))

        this.logger.info(results)

Beispiel #4

0

Datei anzeigen

    def get_ev_occ_records(self, das_prov_id):
        """ retrieves event and occurrence records for the dataset in das_id from SQL Server
            also builds search indexes to easily retrieve dependant records
            :param das_prov_id """
        if not mssql.conn:
            mssql.open_db()

        if mssql.conn is None:
            # Should find a way to exit and advice
            pass
        else:
            # get records
            sql_string = self.query_builder_eve_occur(das_prov_id)
            cursor = mssql.conn.cursor()
            cursor.execute(sql_string)

            columns = [column[0] for column in cursor.description]
            records = []
            for row in cursor:
                records.append(dict(zip(columns, row)))

            # We proceed differently in accordance with the type of CORE record...
            if self.darwin_core_type == self.EVENT:
                for record in records:
                    if record['DarwinCoreType'] == self.OCCURRENCE:
                        self.occurrence_recs.append(record)
                        if record[
                                'eventID'] is not None:  # To link we need an eventID
                            # make a key - it must be lined by the eventID
                            key = f"{record['dataprovider_id']}_{record['eventID']}"
                            if key in self.occ_indices:
                                # All the records with this key shall be in a list at 'key'
                                self.occ_indices[key].append(record)
                            else:
                                self.occ_indices[key] = [record]
                    elif record['DarwinCoreType'] == self.EVENT:
                        self.event_recs.append(record)
                        # If coretype is event, then eventid is the key - only used to reverse lookup.
                        key = f"{record['dataprovider_id']}_{record['eventID']}"
                        self.event_indices[key] = [record]

            else:  # Occurrence records (Datasets with core_type = OCCURRENCE do not have events - verified)
                for record in records:
                    if record['DarwinCoreType'] == self.OCCURRENCE:
                        self.occurrence_recs.append(record)
                    else:  # Should really never happen !!
                        self.event_recs.append(record)
            mssql.close_db()

Beispiel #5

0

Datei anzeigen

    def get_emof_records(self, das_prov_id):
        """ retrieves measurementorfact records for the dataset in das_id from SQL Server
            also building search indexes to easily retrieve dependant records
            NOTE: mof records do not exist for records that have eventID and occurrenceID NULL
            :param das_prov_id"""

        if not mssql.conn:
            mssql.open_db()

        if mssql.conn is None:
            # Should find a way to exit and advice
            pass
        else:
            # get records
            sql_string = self.query_builder_emof(das_prov_id)
            cursor = mssql.conn.cursor()
            cursor.execute(sql_string)

            columns = [column[0] for column in cursor.description]
            records = []
            for row in cursor:
                records.append(dict(zip(columns, row)))

            for record in records:
                self.emof_recs.append(record)

                if self.darwin_core_type == self.EVENT:

                    key = f"{record['dataprovider_id']}_{'NULL' if record['eventID'] is None else record['eventID']}_" \
                          f"{'NULL' if record['occurrenceID'] is None else record['occurrenceID']}"
                else:
                    # Occurrence records in datasets with core = Occurrence may have other info in EventID and NULL in
                    # eMoF record.
                    key = f"{record['dataprovider_id']}_NULL_" \
                          f"{'NULL' if record['occurrenceID'] is None else record['occurrenceID']}"

                if key in self.emof_indices:
                    # All the records with this key shall be in a list at 'key'
                    self.emof_indices[key].append(record)
                else:
                    self.emof_indices[key] = [record]
            mssql.close_db()

Beispiel #6

0

Datei anzeigen

    def test_retrieve_sample_dataset(self):
        """ simple example to retrieve a dataset of only occurrences and measurementorfact records
            Note: This is not useful for the update, as there are no ids to push the QC back to DB
            for the moment it is only an experiment. (and the number of datasets does not match)
        """

        start_time = time.time()

        collection = 'BayOfPuck'

        mssql_db_functions.open_db()
        cursor1 = mssql_db_functions.conn.cursor()
        cursor1.execute(
            f"SELECT  * from eurobis where collectionCode ='BayOfPuck';")
        columns = [column[0] for column in cursor1.description]
        event_or_occurrences = []
        for row in cursor1:
            event_or_occurrences.append(dict(zip(columns, row)))
        # cursor1.close()

        # cursor2 = mssql_db_functions.conn.cursor()
        cursor1.execute(
            f"SELECT  eurobis_measurementorfact.* from eurobis join eurobis_measurementorfact on "
            f"eurobis.occurrenceID = "
            f"eurobis_measurementorfact.occurrenceID where eurobis.CollectionCode ='{collection}'"
        )

        columns = [column[0] for column in cursor1.description]
        emof = []
        for row in cursor1:
            emof.append(dict(zip(columns, row)))
        cursor1.close()

        this.logger.info(f"Duration: {time.time() - start_time}")
        this.logger.info(
            f"Number of event/occurrences: {len(event_or_occurrences)}, number of emof records: {len(emof)}"
        )
        this.logger.info(event_or_occurrences[len(event_or_occurrences) - 1])
        this.logger.info(emof[len(emof) - 1])

Beispiel #7

0

Datei anzeigen

    def rebuild_qc_index(cls):
        """ Rebuild non-clustered index on QC
            important - this depends on the index name as per disable_qc_index """

        # Check DB connection...
        if not mssql.conn:
            mssql.open_db()

        if mssql.conn is None:
            # Should find a way to exit and advice (raise an exception may be)
            return "Fail, no connection "
        else:
            try:
                sql_disable_index = "ALTER INDEX IX_eurobis_qc ON dbo.eurobis REBUILD;"
                cursor = mssql.conn.cursor()
                cursor.execute(sql_disable_index)
                mssql.conn.commit()
                this.logger.debug(f"Non clustered index on qc rebuilt")
                mssql.close_db()
                return "Success"
            except Error as e:
                mssql.close_db()
                return f"Failed to rebuild clustered index: {e}"

Beispiel #8

0

Datei anzeigen

def grab_datasets(sql_string):
    if mssql_db_functions.conn is None:
        this.conn = mssql_db_functions.open_db()

    if this.conn is not None:
        cursor = this.conn.cursor()
        cursor.execute(sql_string)
        # Should extract something meaningful from the dataset...
        for row in cursor:
            this.names.append(f"{row[0]:05d} | {row[1]}")
            # this.dataset_ids.append(row[0])

        this.names.sort()
        return this.names
    else:
        this.logger.error("No DB connection!")
        exit(0)

Beispiel #9

0

Datei anzeigen

def grab_datasets(sql_string):
    """ Queries the database to retrieve a list of all datasets with records
        in eurobis IMPORTANT, datasets are sorted """
    this.dataset_ids = []
    this.dataset_names = []

    if mssql_db_functions.conn is None:
        this.conn = mssql_db_functions.open_db()

    if this.conn is not None:
        cursor = this.conn.cursor()
        cursor.execute(sql_string)
        # Should extract something meaningful from the dataset...
        for row in cursor:
            this.dataset_ids.append(row[0])
            this.dataset_names.append(row[1])
    else:
        this.logger.error("No DB connection!")
        exit(0)

Beispiel #10

0

Datei anzeigen

Datei: test_dataset.py Projekt: lifewatch/eurobis-qc

    def test_query_builder_emof(self):
        """ Retrieving eMof records in a dataset from MS SQL
            using the assembled query string """

        data_archive = eurobis_dataset.EurobisDataset()

        sql_string = data_archive.query_builder_emof(self.test_dataset)

        # Try to execute it..
        if not mssql_db_functions.conn:
            conn = mssql_db_functions.open_db()
        else:
            conn = mssql_db_functions.conn

        cursor = conn.cursor()
        cursor.execute(sql_string)

        columns = [column[0] for column in cursor.description]
        records = []
        for row in cursor:
            records.append(dict(zip(columns, row)))

        assert (len(records) == self.test_dataset_emof)

Beispiel #11

0

Datei anzeigen

def grab_datasets_optimized(sql_string):
    """ Queries the database to retrieve a list of all datasets with records
        in eurobis IMPORTANT, datasets are sorted """
    this.dataset_ids = []
    this.dataset_names = []
    this.dataset_numbers = []

    if mssql_db_functions.conn is None:
        this.conn = mssql_db_functions.open_db()

    if this.conn is not None:
        cursor = this.conn.cursor()
        cursor.execute(sql_string)
        # Should extract something meaningful from the dataset...
        for row in cursor:
            this.dataset_ids.append(row[0])
            this.dataset_names.append(row[1])
            this.dataset_numbers.append(row[2])
        #sql_trans = f"SET TRANSACTION ISOLATION LEVEL SNAPSHOT;\n"
        #cursor.execute(sql_trans)
        #this.logger.info("SET TRANSACTION ISOLATION")
    else:
        this.logger.error("No DB connection!")
        exit(0)

Beispiel #12

0

Datei anzeigen

Datei: mssql_pipeline.py Projekt: lifewatch/eurobis-qc

def process_dataset_list(pool_no,
                         dataset_id_list,
                         from_pool=False,
                         with_logging=False):
    """ Processes a list of DwCA archives, ideally to be called in parallel
        :param pool_no - Pool number to take track of the pools
        :param dataset_id_list (The list of datasets to be processed)
        :param from_pool: If it comes from a multiprocessing pool, then disabling of the QC index is taken care of
        :param with_logging (Logging enabled or not) """
    # Prints pool data
    start = time.time()
    if with_logging:
        this.logger.info(f"Pool {pool_no} started")

    # Connect to the database, each pool should have its own connection
    conn = None

    mssql.close_db()
    # Make sure db connection is ours
    if not mssql.conn:
        conn = mssql.open_db()

    if conn is None:
        # Should find a way to exit and advice
        this.logger.error("No connection to DB, nothing can be done! ")
        return pool_no

    # Disable index on QC once
    if not from_pool:
        eurobis_dataset.EurobisDataset.disable_qc_index()

    errors = 0

    for dataset_id in dataset_id_list:
        start_file = time.time()

        if with_logging:
            this.logger.info(
                f"Pool Number: {pool_no}, processsing dataset {dataset_id} ")

        try:
            dataset_qc_labeling(dataset_id, False, with_logging, pool_no)
        except Exception:
            errors += 1
            this.logger.error(traceback.format_exc())
            this.logger.warning(
                f"WARNING: Pool Number: {pool_no}, processsing dataset {dataset_id} FAILED "
            )

        if with_logging:
            this.logger.info(
                f"Processed dataset {dataset_id} in  {time.time() - start_file}"
            )

    # REBUILD index on QC once
    if not from_pool:
        eurobis_dataset.EurobisDataset.rebuild_qc_index()

    if with_logging:
        this.logger.info(f"Pool {pool_no} completed in {time.time() - start}")

    return pool_no, errors

Beispiel #13

0

Datei anzeigen

    def update_record_qc(cls, records, batch_update_count, batch_size, ds_id,
                         record_type):
        """ Shall update a batch of records from a dataset
            update queries shall be built record by record and sent to the DB in batches for execution
            :param records : A list of the records being updated
            :param batch_update_count: The dataset's batch number
            :param batch_size : Here only used to report the status of the update
            :param ds_id: dataset being processed
            :param record_type : For query optimization, occurrence and event records may be treated differently
            also used for printing
            NOTE: Special methods are provided to DISABLE the index on QC and REBUILD it after the updates.
            These improve vastly the query run time.
            """

        # Check DB connection...
        if not mssql.conn:
            mssql.open_db()

        if mssql.conn is None:
            # Should find a way to exit and advice (raise an exception may be)
            return "Fail, no connection "
        else:
            record_count = len(records)
            # sql_update = f"BEGIN TRAN; \n"
            sql_update = ""
            for record in records:
                # Compose update query
                # physloc = bytes.hex(record['physloc'])

                # Note The fields other than physloc and dataprovider_id are used to optimize
                # the update queries execution plans and thus to reduce browsing the records
                # and using the existing indexes on the eurobis table. Observed speed improvements
                # are between 2.5 and 5 times faster.

                # This is a temporary fix - some qc values are set to None.
                if record['qc'] is None:
                    record['qc'] = 0

                sql_update += f"{cls.sql_update_start}{record['qc']}{cls.sql_update_middle} {record['dataprovider_id']}"
                """
                Disabled - using auto_id now.
                if record['decimalLatitude'] is not None:
                    sql_update = f"{sql_update}{cls.sql_if_lat}{record['decimalLatitude']}"
                else:
                    sql_update = f"{sql_update} AND Latitude IS NULL "

                if record['decimalLongitude'] is not None:
                    sql_update = f"{sql_update} {cls.sql_if_lon}{record['decimalLongitude']}"
                else:
                    sql_update = f"{sql_update} AND Longitude IS NULL "

                if record_type == EurobisDataset.EVENT:
                    if record['eventID'] is not None and misc.is_clean_for_sql(record['eventID']):
                        sql_update = f"{sql_update} {cls.sql_if_event_id}'{record['eventID']}'"
                """

                # sql_update = f"{sql_update} {cls.sql_update_end} 0x{physloc} \n"
                sql_update = f"{sql_update} {cls.sql_update_end} {record['auto_id']} \n"

            try:
                # sql_update += f"COMMIT TRAN;\n"
                cursor = mssql.conn.cursor()
                cursor.execute(sql_update)
                mssql.conn.commit()
                rec_type = "EVENT" if record_type == EurobisDataset.EVENT else "OCCURRENCE"
                dateTimeObj = datetime.now()
                this.logger.debug(
                    f"{dateTimeObj}: {rec_type} records update count: {batch_update_count * batch_size + record_count}  "
                    f"of dataset {ds_id};")
                batch_update_count += 1
                return "Success"
            except Error as e:
                return f"Fail, batch {batch_update_count} not updated, exception {str(e)}"

            # Added close_DB to make sure that transactions are "separated".
            mssql.close_db()

Beispiel #14

0

Datei anzeigen

def do_db_multi_random_percent(percent):
    """ Example of processing multiple datasets at the same time in
        order to exploit the computing resources (cores) performs a random
        selection of 2% of available datasets having less than 4000 records """

    start_time = time.time()

    # Now set to a percent of datasets...
    # sql_random_percent_of_datasets = f"SELECT id FROM dataproviders WHERE {percent} >= CAST(CHECKSUM(NEWID(), id) " \
    #                                  "& 0x7fffffff AS float) / CAST (0x7fffffff AS int)"

    # This selects percent from SMALL datasets (less than 4000 events/occurrences)
    sql_random_percent_of_datasets = f"select a.id, a.displayname, a.rec_count from " \
                                     f"(select d.id, d.displayname, rec_count from dataproviders d " \
                                     f"inner join eurobis e on e.dataprovider_id = d.id " \
                                     f"where rec_count <= 4000 group by d.id, d.displayname, rec_count) a " \
                                     f"where {percent} >= CAST(CHECKSUM(NEWID(), id) & 0x7fffffff AS float) " \
                                     f"/ CAST (0x7fffffff AS int) order by id "

    dataset_ids = []
    dataset_names = []

    # we dedicate to the task the total number of processors - 3 or 1 if we only have 2 cores or less.
    # Knowing that mssql needs 2 cores at least.
    reserve_cpus = 1 + (0 if not mssql.server_local else 2)

    if mp.cpu_count() > reserve_cpus:
        n_cpus = mp.cpu_count() - reserve_cpus
    else:
        n_cpus = 1

    pool = mp.Pool(n_cpus)

    # Connect to the database to get dataset list
    if not mssql.conn:
        mssql.open_db()

    if mssql.conn is None:
        # Should find a way to exit and advice
        this.logger.error("No connection to DB, nothing can be done! ")
        exit(0)
    else:
        # Fetch a random set of datasets
        cur = mssql.conn.cursor()
        cur.execute(sql_random_percent_of_datasets)
        for row in cur:
            dataset_ids.append(row[0])
            # tuples names - size
            dataset_names.append((row[0], row[1], row[2]))

    mssql.close_db()

    # Retrieved list, now need to split
    dataset_id_lists = misc.split_list(dataset_ids, n_cpus)  # We are OK until here.
    dataset_names_lists = misc.split_list(dataset_names, n_cpus)

    result_pool = []
    for i, dataset_id_list in enumerate(dataset_id_lists):
        this.logger.info(f"Pool {i} will process {dataset_names_lists[i]} ")
        result_pool.append(pool.apply_async(process_dataset_list, args=(i, dataset_id_list, True, True)))

    for r in result_pool:
        res = r.get()
        if res[1] > 0:
            this.logger.warning(f"Pool {res[0]} failed to process {res[1]} datasets")

    pool.terminate()
    pool.join()

    this.logger.info(f"Started at: {start_time}. All processes have completed after {time.time() - start_time}")

Beispiel #15

0

Datei anzeigen

def process_random_record(with_logging=True):
    """ select a random dataset, then a random core event from it and perform QC """

    # To select a specific type of record,
    # This selects 1 dataset with  less than 10000 events/occurrences reported in the dataproviders table
    # To select Event based data sets, add this between e.dataprovider_id and group_by: where d.core = 2
    sql_random_dataset = f"SELECT TOP 1  d.id, count(e.dataprovider_id) FROM  dataproviders d " \
                         f" inner join eurobis e on d.id = e.dataprovider_id  group by d.id " \
                         f" having count(e.dataprovider_id) < 10000 ORDER BY NEWID()"

    # Go and get the id!
    dataset_id = None

    # Connect to the database to get dataset list
    if not mssql.conn:
        mssql.open_db()

    if mssql.conn is None:
        # Should find a way to exit and advice
        this.logger.error("No connection to DB, nothing can be done! ")
        exit(0)
    else:
        # Fetch a random set of datasets
        cur = mssql.conn.cursor()
        cur.execute(sql_random_dataset)
        dataset = cur.fetchone()
        dataset_id = dataset[0]

    data_archive = eurobis_dataset.EurobisDataset()
    data_archive.load_dataset(dataset_id)

    if with_logging:
        this.logger.info(f"--------------------------------------------------")
        this.logger.info(
            f"Loaded dataset {data_archive.dataset_name}, id = {data_archive.dataprovider_id} "
        )
        this.logger.info(
            f"Number of event records: {len(data_archive.event_recs)}")
        this.logger.info(
            f"Number of occurrence records: {len(data_archive.occurrence_recs)}"
        )
        this.logger.info(
            f"Number of emof records: {len(data_archive.emof_recs)}")
        this.logger.info(f"Interesting areas: {data_archive.areas}")
        this.logger.info(f"Imis dataset ID: {data_archive.imis_das_id}")
        this.logger.info(
            f"Type of core records: {'Event' if data_archive.darwin_core_type == 2 else 'Occurrence'}"
        )
        this.logger.info(f"--------------------------------------------------")
    pass

    # Now everything is in data_archive, we must select a random CORE record, and its children, calculate QC and
    # display all records that originate that reasoning.
    # Proceed top-down as in pipeline ...
    if data_archive.darwin_core_type == data_archive.EVENT:
        # select random core event:
        record_idx = randint(0, len(data_archive.event_recs) - 1)
        record = data_archive.event_recs[record_idx]

        # make sure we start at "Empty"
        record["qc"] = None

        # Perform basic QC:
        qc_ev = mssql_pipeline.qc_event(record, data_archive)
        record["qc"] |= qc_ev  # Make sure it is stamped

        # Generate key and lookup occurrences...
        key = f"{record['dataprovider_id']}_{record['eventID']}"
        if key in data_archive.occ_indices:
            for occ_record in data_archive.occ_indices[key]:
                # qc_occurrence sall also take care of emof for occurrence
                qc_occ = mssql_pipeline.qc_occurrence(occ_record, data_archive)
                qc_occ |= required_fields.check_ev_occ_required(
                    record, occ_record, False)
                occ_record['qc'] |= qc_occ  # also give to occurrence record
                occ_record[
                    'qc'] |= qc_ev  # Inherits the event QC (email 24/01/2021)

            # No longer true as per email 24/01/2021
            # Needs to propagate the REQUIRED FIELDS CHECK for the event and its occurrences
            # qc_req_agg = [record]
            # qc_req_agg.extend(data_archive.occ_indices[key])
            # record["qc"] |= required_fields.check_aggregate(qc_req_agg)
            # qc_ev |= record["qc"]

        # Are there any lookups left to do (any record type)
        if len(data_archive.records_for_lookup):
            location.check_xy(data_archive.records_for_lookup)

            # Need to propagate the (new) QC of the events down to the occurrences records
            for looked_up_record in data_archive.records_for_lookup:
                if looked_up_record["DarwinCoreType"] == data_archive.EVENT:
                    key = f"{looked_up_record['dataprovider_id']}_{looked_up_record['eventID']}"
                    if key in data_archive.occ_indices:
                        for occ_record in data_archive.occ_indices[key]:
                            occ_record["qc"] |= looked_up_record["qc"]

        this.logger.info(f"Calculated quality mask: {qc_ev}, consisting of:")
        this.logger.info(
            f"QC NUMBERS: -------------> {QCFlag.decode_numbers(record['qc'])}"
        )
        this.logger.info(
            f"QC FLAG NAMES: ----------> {QCFlag.decode_mask(record['qc'])}")
        this.logger.info(f"--------------------------------------------------")
        this.logger.info(f"Event Record: {record}")
        this.logger.info(f"--------------------------------------------------")

        if key in data_archive.occ_indices:
            for occ_record in data_archive.occ_indices[key]:
                this.logger.info(f"Occurrence Record: {occ_record}")
                this.logger.info(
                    f"Calculated quality mask: {occ_record['qc']}, consisting of:"
                )
                this.logger.info(
                    f"QC NUMBERS: -------------> {QCFlag.decode_numbers(occ_record['qc'])}"
                )
                this.logger.info(
                    f"QC FLAG NAMES: ----------> {QCFlag.decode_mask(occ_record['qc'])}"
                )
                this.logger.info(
                    f"--------------------------------------------------")
                key_o = f"{occ_record['dataprovider_id']}_" \
                        f"{'NULL' if occ_record['eventID'] is None else occ_record['eventID']}_" \
                        f"{'NULL' if occ_record['occurrenceID'] is None else occ_record['occurrenceID']}"
                if key_o in data_archive.emof_indices:
                    for emof in data_archive.emof_indices[key_o]:
                        this.logger.info(f"eMoF Record: {emof}")
                        this.logger.info(
                            f"--------------------------------------------------"
                        )

        if key in data_archive.emof_indices:
            for emof in data_archive.emof_indices[key]:
                this.logger.info(f"eMoF Record for event: {emof}")
                this.logger.info(
                    f"--------------------------------------------------")

    else:
        # The QC is either 0 or a QC mask
        record_idx = randint(0, len(data_archive.occurrence_recs) - 1)
        record = data_archive.occurrence_recs[record_idx]
        qc_occ = mssql_pipeline.qc_occurrence(record, data_archive)

        # Are there any lookups left to do (any record type)?
        if len(data_archive.records_for_lookup):
            location.check_xy(data_archive.records_for_lookup)

            for lookup_record in data_archive.records_for_lookup:
                record['qc'] |= lookup_record["qc"]
                qc_occ |= lookup_record["qc"]

            data_archive.records_for_lookup = []

        this.logger.info(f"Calculated quality mask: {qc_occ}, consisting of:")
        this.logger.info(
            f"QC NUMBERS: -------------> {QCFlag.decode_numbers(qc_occ)}")
        this.logger.info(
            f"QC FLAG NAMES: ----------> {QCFlag.decode_mask(qc_occ)}")
        this.logger.info(f"--------------------------------------------------")
        this.logger.info(f"Occurrence Record: {record}")
        this.logger.info(f"--------------------------------------------------")

        key_o = f"{record['dataprovider_id']}_NULL_" \
                f"{'NULL' if record['occurrenceID'] is None else record['occurrenceID']}"
        if key_o in data_archive.emof_indices:
            for emof in data_archive.emof_indices[key_o]:
                this.logger.info(f"eMoF Record: {emof}")
                this.logger.info(
                    f"--------------------------------------------------")