Esempio n. 1
0
    def test_update_status(self, mock_get_cosmos_client):
        dsh = DataSetHelper()

        latest_dataset_doc = {}
        latest_dataset_doc["version"] = 3
        latest_dataset_doc["builds"] = {"courses": {"status": "pending"}}
        latest_dataset_doc["updated_at"] = "dave"
        dsh.get_latest_doc = mock.MagicMock(return_value=latest_dataset_doc)

        dsh.cosmos_client.UpsertItem = mock.MagicMock()

        dsh.update_status("courses", "in progress", "dave")

        expected_connection_link = (
            "dbs/test-db-id/colls/test-dataset-collection-id")
        expected_dataset_doc = {}
        expected_dataset_doc["version"] = 3
        expected_dataset_doc["builds"] = {"courses": {"status": "in progress"}}
        expected_dataset_doc["updated_at"] = "dave"
        dsh.cosmos_client.UpsertItem.assert_called_once_with(
            expected_connection_link, expected_dataset_doc)
def main(msgin: func.QueueMessage, msgout: func.Out[str]):
    # TODO: apw: Ensure that UseLocalTestXMLFile is set to false in local.settings.json before going live.
    use_local_test_XML_file = os.environ.get('UseLocalTestXMLFile')

    msgerror = ""

    mail_helper = MailHelper()
    environment = os.environ["Environment"]

    dsh = DataSetHelper()

    try:
        logging.info(f"CreateInst message queue triggered\n")

        function_start_datetime = datetime.today().strftime(
            "%d-%m-%Y %H:%M:%S")

        logging.info(
            f"CreateInst function started on {function_start_datetime}")
        """ DECOMPRESSION - Decompress the compressed HESA XML """
        # The XML blob provided to this function will be gzip compressed.
        # This is a work around for a limitation discovered in Azure,
        # where Functions written in Python do not get triggered # correctly with large blobs. Tests showed this is not a limitation
        # with Funtions written in C#.

        blob_helper = BlobHelper()

        storage_container_name = os.environ["AzureStorageHesaContainerName"]
        storage_blob_name = os.environ["AzureStorageHesaBlobName"]

        if use_local_test_XML_file:
            mock_xml_source_file = open(os.environ["LocalTestXMLFile"], "r")
            xml_string = mock_xml_source_file.read()
        else:
            xml_string = blob_helper.get_str_file(storage_container_name,
                                                  storage_blob_name)

        version = dsh.get_latest_version_number()
        """ LOADING - extract data and load JSON Documents """

        logging.info(f"using version number: {version}")
        dsh.update_status("institutions", "in progress")

        inst_docs = InstitutionDocs(xml_string, version)
        inst_docs.create_institution_docs()
        dsh.update_status("institutions", "succeeded")

        function_end_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")

        logging.info(
            f"CreateInst successfully finished on {function_end_datetime}")

        msgout.set(msgin.get_body().decode("utf-8") + msgerror)

    except exceptions.StopEtlPipelineWarningException:

        # A WARNING is raised while the function is running and
        # StopEtlPipelineOnWarning=True. For example, the incoming raw XML
        # is not valid against its XSD
        error_message = (
            "A WARNING has been encountered while the function is running. "
            "The function will be stopped since StopEtlPipelineOnWarning is "
            "set to TRUE in the Application Settings.")
        logging.error(error_message)

        function_fail_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")
        function_fail_date = datetime.today().strftime("%d.%m.%Y")

        mail_helper.send_message(
            f"Automated data import failed on {function_fail_datetime} at CreateInst"
            + msgin.get_body().decode("utf-8") + msgerror,
            f"Data Import {environment} - {function_fail_date} - Failed")

        logging.error(f"CreateInst failed on {function_fail_datetime}")
        dsh.update_status("institutions", "failed")
        raise Exception(error_message)

    except Exception as e:
        # Unexpected exception
        dsh.update_status("institutions", "failed")

        function_fail_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")
        function_fail_date = datetime.today().strftime("%d.%m.%Y")

        mail_helper.send_message(
            f"Automated data import failed on {function_fail_datetime} at CreateInst"
            + msgin.get_body().decode("utf-8") + msgerror,
            f"Data Import {environment} - {function_fail_date} - Failed")

        logging.error(f"CreateInst failed on {function_fail_datetime}",
                      exc_info=True)

        # Raise to Azure
        raise e
def main(msgin: func.QueueMessage, msgout: func.Out[str]):
    """Creates the UKRLP lookup tables for later use

    This Azure Function carries out the following steps:
    * Decompresses the XML HESA DataSet

    * Parses the INSTITUTION data from the DataSet

    * Retrieves enrichment data from the UKRLP API for each institution

    * Creates a lookup item for each Institution and writes it to CosmosDB

    * Currently, once completed successfully this function triggers the Etl function by copying
      the compressed XML passed in to a Blob storage monitored by the Etl function.

    """
    # TODO: apw: Ensure that UseLocalTestXMLFile is set to false in local.settings.json before going live.
    use_local_test_XML_file = os.environ.get('UseLocalTestXMLFile')

    msgerror = ""

    mail_helper = MailHelper()
    environment = os.environ["Environment"]

    dsh = DataSetHelper()

    try:
        logging.info(f"CreateUkrlp message queue triggered")

        function_start_datetime = datetime.today().strftime(
            "%d-%m-%Y %H:%M:%S")

        logging.info(
            f"CreateUkrlp function started on {function_start_datetime}")

        blob_helper = BlobHelper()

        storage_container_name = os.environ["AzureStorageHesaContainerName"]
        storage_blob_name = os.environ["AzureStorageHesaBlobName"]

        if use_local_test_XML_file:
            mock_xml_source_file = open(os.environ["LocalTestXMLFile"], "r")
            xml_string = mock_xml_source_file.read()
        else:
            xml_string = blob_helper.get_str_file(storage_container_name,
                                                  storage_blob_name)

        version = dsh.get_latest_version_number()

        storage_container_name = os.environ[
            "AzureStorageWelshUnisContainerName"]
        storage_blob_name = os.environ["AzureStorageWelshUnisBlobName"]

        csv_string = blob_helper.get_str_file(storage_container_name,
                                              storage_blob_name)

        # Parse the xml and create the lookups

        logging.info(f"using version number: {version}")
        dsh.update_status("institutions", "in progress")
        lookup_creator = LookupCreator(xml_string, csv_string, version)
        ukrlp_no_info_list = lookup_creator.create_ukrlp_lookups()

        msgerror += f"\n\nUKRLP did not return info for the following {len(ukrlp_no_info_list)} ukprn(s):\n"

        for ukprn in ukrlp_no_info_list:
            msgerror += f"\t{ukprn}\n"

        function_end_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")

        logging.info(
            f"CreateUkrlp successfully finished on {function_end_datetime}")

        msgout.set(msgin.get_body().decode("utf-8") + msgerror)

    except Exception as e:
        # Unexpected exception
        function_fail_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")
        function_fail_date = datetime.today().strftime("%d.%m.%Y")

        mail_helper.send_message(
            f"Automated data import failed on {function_fail_datetime} at CreateUkrlp"
            + msgin.get_body().decode("utf-8") + msgerror,
            f"Data Import {environment} - {function_fail_date} - Failed")

        logging.error(f"CreateUkrlp faile on {function_fail_datetime}")
        logging.error(traceback.format_exc())

        # Raise to Azure
        raise e
def main(msgin: func.QueueMessage):
    msgerror = ""
    mail_helper = MailHelper()
    environment = os.environ["Environment"]

    dsh = DataSetHelper()

    try:

        logging.info(
            f"CourseSearchBuilder message queue triggered \n"
        )

        function_start_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")

        logging.info(
            f"CourseSearchBuilder function started on {function_start_datetime}"
        )

        api_key = os.environ["SearchAPIKey"]
        search_url = os.environ["SearchURL"]
        api_version = os.environ["AzureSearchAPIVersion"]

        version = dsh.get_latest_version_number()

        dsh.update_status("search", "in progress")

        search.build_synonyms(search_url, api_key, api_version)

        search.build_index(search_url, api_key, api_version, version)

        courses = utils.get_courses_by_version(version)

        number_of_courses = len(courses)

        logging.info(
            f"attempting to load courses to azure search\n\
                        number_of_courses: {number_of_courses}\n"
        )

        search.load_index(search_url, api_key, api_version, version, courses)
        dsh.update_status("search", "succeeded")
        courses = None

        if dsh.have_all_builds_succeeded():
            build_institutions_json_files()
            build_subjects_json_file()
            build_version_json_file()

            dsh.update_status("root", "succeeded")
        else:
            dsh.update_status("root", "failed")

        function_end_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")
        function_end_date = datetime.today().strftime("%d.%m.%Y")

        mail_helper.send_message(
            f"Automated data import completed on {function_end_datetime}" + msgin.get_body().decode("utf-8") + msgerror,
            f"Data Import {environment} - {function_end_date} - Completed"
        )

        logging.info(
            f"CourseSearchBuilder successfully finished on {function_end_datetime}"
        )

    except Exception as e:
        # Unexpected exception
        dsh.update_status("search", "failed")
        dsh.update_status("root", "failed")

        function_fail_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")
        function_fail_date = datetime.today().strftime("%d.%m.%Y")

        mail_helper.send_message(
            f"Automated data import failed on {function_fail_datetime} at CourseSearchBuilder" + msgin.get_body().decode("utf-8") + msgerror,
            f"Data Import {environment} - {function_fail_date} - Failed"
        )

        logging.error(f"CourseSearchBuilder failed on {function_fail_datetime}")
        logging.error(traceback.format_exc())

        # Raise to Azure
        raise e