Exemple #1
0
def build_subjects_json_file():
    version = DataSetHelper().get_latest_version_number()
    blob_helper = BlobHelper()

    cosmos_db_client = get_cosmos_client()
    collection_link = get_collection_link("AzureCosmosDbDatabaseId",
                                          "AzureCosmosDbSubjectsCollectionId")

    query = f"SELECT * from c where c.version = {version}"

    options = {"enableCrossPartitionQuery": True}

    subjects_list = list(
        cosmos_db_client.QueryItems(collection_link, query, options))

    subjects_file = io.StringIO()

    subjects = []
    for subject in subjects_list:
        subject_entry = get_subject_entry(subject)
        subjects.append(subject_entry)

    subjects.sort(key=lambda x: x["english_name"])

    json.dump(subjects, subjects_file, indent=4)
    encoded_file = subjects_file.getvalue().encode('utf-8')

    storage_container_name = os.environ["AzureStorageJSONFilesContainerName"]
    storage_blob_name = os.environ["AzureStorageSubjectsJSONFileBlobName"]
    blob_helper.write_stream_file(storage_container_name, storage_blob_name,
                                  encoded_file)
    subjects_file.close()
Exemple #2
0
    def __init__(self, xml_string, welsh_uni_string, version):
        self.version = version
        self.cosmosdb_client = get_cosmos_client()
        self.xml_string = xml_string
        self.lookups_created = []
        self.ukrlp_no_info_list = []
        self.collection_link = get_collection_link(
            "AzureCosmosDbDatabaseId", "AzureCosmosDbUkRlpCollectionId"
        )
        __location__ = os.path.realpath(
            os.path.join(os.getcwd(), os.path.dirname(__file__))
        )
        with open(
            os.path.join(__location__, "institution_whitelist.txt")
        ) as f:
            institutions_whitelist = f.readlines()
            self.institutions_whitelist = [
                institution.strip() for institution in institutions_whitelist
            ]

        if welsh_uni_string:
            rows = welsh_uni_string.splitlines()

            # csv header row
            if not self.validate_column_headers(rows[0]):
                logging.error(
                    "file headers are incorrect, expecting the following: code, english_label, level, welsh_label"
                )
                raise exceptions.StopEtlPipelineErrorException

            self.welsh_uni = rows
        else:
            self.welsh_uni = []
    def create_institution_docs(self):
        """Parse HESA XML and create JSON institution docs in Cosmos DB."""

        cosmosdb_client = get_cosmos_client()

        collection_link = get_collection_link(
            "AzureCosmosDbDatabaseId", "AzureCosmosDbInstitutionsCollectionId"
        )

        options = {"partitionKey": str(self.version)}
        sproc_link = collection_link + "/sprocs/bulkImport"

        institution_count = 0
        new_docs = []
        sproc_count = 0
        for institution in self.root.iter("INSTITUTION"):
            institution_count += 1
            sproc_count += 1
            new_docs.append(self.get_institution_doc(institution))
            if sproc_count == 100:
                logging.info(f"Begining execution of stored procedure for {sproc_count} documents")
                cosmosdb_client.ExecuteStoredProcedure(sproc_link, [new_docs], options)
                logging.info(f"Successfully loaded another {sproc_count} documents")
                # Reset values
                new_docs = []
                sproc_count = 0
                time.sleep(2)

        if sproc_count > 0:
            logging.info(f"Begining execution of stored procedure for {sproc_count} documents")
            cosmosdb_client.ExecuteStoredProcedure(sproc_link, [new_docs], options)
            logging.info(f"Successfully loaded another {sproc_count} documents")

        logging.info(f"Processed {institution_count} institutions")
 def __init__(self, xml_string):
     self.cosmosdb_client = utils.get_cosmos_client()
     self.xml_string = xml_string
     self.lookups_created = []
     self.ukrlp_no_info_list = []
     self.db_entries_list = []
     self.collection_link = utils.get_collection_link(
         "AzureCosmosDbDatabaseId", "AzureCosmosDbUkRlpCollectionId")
def test_institution_fetcher():
    db_id = "AzureCosmosDbDatabaseId"
    collection_id = "AzureCosmosDbInstitutionsCollectionId"

    # Get the relevant properties from Application Settings
    collection_link = utils.get_collection_link(db_id, collection_id)

    client = utils.get_cosmos_client()
    institution_fetcher = InstitutionFetcher(client, collection_link)
    institution_id = "10007857"
    version = 1
    return institution_fetcher.get_institution(institution_id, version)
Exemple #6
0
def create_course_docs(xml_string):
    """Parse HESA XML passed in and create JSON course docs in Cosmos DB."""

    # TODO Investigate writing docs to CosmosDB in bulk to speed things up.
    cosmosdb_client = utils.get_cosmos_client()

    logging.info(
        "adding ukrlp data into memory ahead of building course documents"
    )
    enricher = UkRlpCourseEnricher()
    logging.info(
        "adding subject data into memory ahead of building course documents"
    )
    subject_enricher = SubjectCourseEnricher()

    collection_link = utils.get_collection_link(
        "AzureCosmosDbDatabaseId", "AzureCosmosDbCoursesCollectionId"
    )

    # Import the XML dataset
    root = ET.fromstring(xml_string)

    # Import accreditations, common, kisaims and location nodes
    accreditations = Accreditations(root)
    kisaims = KisAims(root)
    locations = Locations(root)

    course_count = 0
    for institution in root.iter("INSTITUTION"):

        raw_inst_data = xmltodict.parse(ET.tostring(institution))[
            "INSTITUTION"
        ]
        ukprn = raw_inst_data["UKPRN"]
        for course in institution.findall("KISCOURSE"):

            raw_course_data = xmltodict.parse(ET.tostring(course))["KISCOURSE"]
            locids = get_locids(raw_course_data, ukprn)
            course_doc = get_course_doc(
                accreditations,
                locations,
                locids,
                raw_inst_data,
                raw_course_data,
                kisaims,
            )

            enricher.enrich_course(course_doc)
            subject_enricher.enrich_course(course_doc)

            cosmosdb_client.CreateItem(collection_link, course_doc)
            course_count += 1
    logging.info(f"Processed {course_count} courses")
    def create_institution_docs(self):
        """Parse HESA XML and create JSON institution docs in Cosmos DB."""

        # TODO Investigate writing docs to CosmosDB in bulk to speed things up.
        cosmosdb_client = get_cosmos_client()

        collection_link = get_collection_link(
            "AzureCosmosDbDatabaseId", "AzureCosmosDbInstitutionsCollectionId"
        )

        institution_count = 0
        for institution in self.root.iter("INSTITUTION"):
            institution_count += 1
            institution_doc = self.get_institution_doc(institution)
            cosmosdb_client.CreateItem(collection_link, institution_doc)
        logging.info(f"Processed {institution_count} institutions")
Exemple #8
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    """Implements the REST API endpoint for getting course documents.

    The endpoint implemented is:
        /institutions/{institution_id}/courses/{course_id}/modes/{mode}

    The API is fully documented in a swagger document in the same repo
    as this module.
    """

    try:
        logging.info("Process a request for a course.")
        logging.info(f"url: {req.url}")
        logging.info(f"params: {req.params}")
        logging.info(f"route_params: {req.route_params}")

        # Put all the parameters together
        params = dict(req.route_params)
        version = req.params.get("version", "1")
        params["version"] = version
        logging.info(f"Parameters: {params}")

        #
        # The params are used in DB queries, so let's do
        # some basic sanitisation of them.
        #
        if not valid_course_params(params):
            logging.error(f"valid_course_params returned false for {params}")
            return func.HttpResponse(
                get_http_error_response_json(
                    "Bad Request", "Parameter Error", "Invalid parameter passed"
                ),
                headers={"Content-Type": "application/json"},
                status_code=400,
            )

        logging.info("The parameters look good")

        # Intialise a CourseFetcher
        client = get_cosmos_client()
        collection_link = get_collection_link(
            "AzureCosmosDbDatabaseId", "AzureCosmosDbCoursesCollectionId"
        )
        course_fetcher = CourseFetcher(client, collection_link)

        # Get the course
        course = course_fetcher.get_course(**params)

        if course:
            logging.info(f"Found a course {course}")
            return func.HttpResponse(
                course, headers={"Content-Type": "application/json"}, status_code=200
            )
        else:
            return func.HttpResponse(
                get_http_error_response_json(
                    "Not Found", "course", "Course was not found."
                ),
                headers={"Content-Type": "application/json"},
                status_code=404,
            )

    except Exception as e:
        logging.error(traceback.format_exc())

        # Raise so Azure sends back the HTTP 500
        raise e
def build_institutions_json_files():
    version = DataSetHelper().get_latest_version_number()
    blob_helper = BlobHelper()

    cosmos_db_client = get_cosmos_client()
    collection_link = get_collection_link(
        "AzureCosmosDbDatabaseId", "AzureCosmosDbInstitutionsCollectionId")

    query = f"SELECT * from c where c.version = {version}"

    options = {"enableCrossPartitionQuery": True}

    institution_list = list(
        cosmos_db_client.QueryItems(collection_link, query, options))

    institutions_file = io.StringIO()

    institutions = []
    for val in institution_list:
        institution = val["institution"]
        if isinstance(institution["pub_ukprn_name"], str):
            inst_entry = get_inst_entry(institution["pub_ukprn_name"])
            institutions.append(inst_entry)

    institutions.sort(key=lambda x: x["order_by_name"])

    json.dump(institutions, institutions_file, indent=4)
    encoded_file = institutions_file.getvalue().encode('utf-8')

    storage_container_name = os.environ["AzureStorageJSONFilesContainerName"]
    storage_blob_name = os.environ[
        "AzureStorageInstitutionsENJSONFileBlobName"]
    blob_helper.write_stream_file(storage_container_name, storage_blob_name,
                                  encoded_file)
    institutions_file.close()

    institutions_file = io.StringIO()

    institutions = []
    for val in institution_list:
        institution = val["institution"]
        isnt_name = institution["pub_ukprn_name"]
        inst_welsh_name = institution["pub_ukprn_welsh_name"]

        if isinstance(inst_welsh_name, str):
            inst_entry = get_inst_entry(inst_welsh_name)
            institutions.append(inst_entry)
        elif isinstance(isnt_name, str):
            inst_entry = get_inst_entry(isnt_name)
            institutions.append(inst_entry)

    institutions.sort(key=lambda x: x["order_by_name"])

    json.dump(institutions, institutions_file, indent=4)
    encoded_file = institutions_file.getvalue().encode('utf-8')

    storage_container_name = os.environ["AzureStorageJSONFilesContainerName"]
    storage_blob_name = os.environ[
        "AzureStorageInstitutionsCYJSONFileBlobName"]
    blob_helper.write_stream_file(storage_container_name, storage_blob_name,
                                  encoded_file)
    institutions_file.close()
 def __init__(self):
     self.cosmosdb_client = utils.get_cosmos_client()
     self.collection_link = utils.get_collection_link(
         "AzureCosmosDbDatabaseId", "AzureCosmosDbFeedbackCollectionId")
def main(req: func.HttpRequest) -> func.HttpResponse:

    try:
        logging.info("PostFeedback http triggered.")

        function_start_datetime = datetime.today().strftime(
            "%d-%m-%Y %H:%M:%S")

        logging.info(
            f"PostFeedback function started on {function_start_datetime}")

        try:
            feedback = req.get_json()
        except ValueError as e:
            logging.error(f"JSON decode error {e}")

            function_fail_datetime = datetime.today().strftime(
                "%d-%m-%Y %H:%M:%S")

            logging.info(
                f"PostFeedback function failed on {function_fail_datetime}")
            return func.HttpResponse(
                get_http_error_response_json("Bad Request",
                                             "json decoding error", str(e)),
                headers={"Content-Type": "application/json"},
                status_code=400,
            )

        try:
            validate_feedback(feedback)
        except ValidationError as e:
            logging.error(f"The feedback data is not valid {feedback}")
            logging.error(f"validate_feedback error message {e.message}")

            function_fail_datetime = datetime.today().strftime(
                "%d-%m-%Y %H:%M:%S")

            logging.info(
                f"PostFeedback function failed on {function_fail_datetime}")
            return func.HttpResponse(
                get_http_error_response_json("Bad Request",
                                             "JSON Validation Error",
                                             e.message),
                headers={"Content-Type": "application/json"},
                status_code=400,
            )

        logging.info(f"received feedback: {feedback}")

        sanitise_feedback(feedback)

        logging.info(f"received feedback after sanitise: {feedback}")

        add_created_at_to_feedback(feedback)

        collection_link = get_collection_link(cosmosdb_database_id,
                                              cosmosdb_collection_id)
        feedback_creator = FeedbackCreator(client, collection_link)

        feedback_creator.write_feedback_to_db(feedback)

        function_end_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")

        logging.info(
            f"PostFeedback function finished on {function_end_datetime}")
        return func.HttpResponse(status_code=201)

    except Exception as e:
        logging.error(traceback.format_exc())

        function_fail_datetime = datetime.today().strftime("%d-%m-%Y %H:%M:%S")

        logging.info(
            f"PostFeedback function failed on {function_fail_datetime}")

        # Raise so Azure sends back the HTTP 500
        raise e
 def test_get_collection_link(self):
     expected_link = "dbs/" + self.db_id + "/colls/" + self.collection_id
     collection_link = get_collection_link(
         self.db_id_env, self.collection_id_env
     )
     self.assertEqual(collection_link, expected_link)
 def __init__(self):
     logging.info("Init for DataSetHelper")
     self.cosmos_client = get_cosmos_client()
     self.collection_link = get_collection_link(
         "AzureCosmosDbDatabaseId", "AzureCosmosDbDataSetCollectionId")
def main(req: func.HttpRequest) -> func.HttpResponse:
    """Implements the REST API endpoint for getting an institution document.

    The endpoint implemented is:
        /institutions/{institution_id}/

    The API is documented in a swagger document.
    """

    try:
        logging.info("Process a request for an institution resource.")
        logging.info(f"url: {req.url}")
        logging.info(f"params: {req.params}")
        logging.info(f"route_params: {req.route_params}")

        # Put all the parameters together
        params = dict(req.route_params)
        version = req.params.get("version", "1")
        params["version"] = version
        logging.info(f"Parameters: {params}")

        if not valid_institution_params(params):
            logging.error(
                f"valid_institution_params returned false for {params}")
            return func.HttpResponse(
                get_http_error_response_json("Bad Request", "Parameter Error",
                                             "Invalid parameter passed"),
                headers={"Content-Type": "application/json"},
                status_code=400,
            )

        logging.info("The parameters look good")

        # Intialise an InstitutionFetcher
        client = get_cosmos_client()
        collection_link = get_collection_link(
            "AzureCosmosDbDatabaseId", "AzureCosmosDbInstitutionsCollectionId")
        institution_fetcher = InstitutionFetcher(client, collection_link)

        # Get the institution
        institution = institution_fetcher.get_institution(**params)

        if institution:
            logging.info(f"Found a institution {institution}")
            return func.HttpResponse(
                institution,
                headers={"Content-Type": "application/json"},
                status_code=200,
            )

        return func.HttpResponse(
            get_http_error_response_json("Not Found", "institution",
                                         "Institution was not found."),
            headers={"Content-Type": "application/json"},
            status_code=404,
        )

    except Exception as e:
        logging.error(traceback.format_exc())

        # Raise so Azure sends back the HTTP 500
        raise e