Esempio n. 1
0
def generate_empty_report(hpo_id, bucket, folder_prefix):
    """
    Generate an empty report with a "validation failed" error
    Also write processed.txt to folder to prevent processing in the future

    :param hpo_id: identifies the HPO site
    :param bucket: name of the bucket with the submission
    :param folder_prefix: folder containing the submission
    :return: report_data: dict whose keys are params in resources/templates/hpo_report.html
    """
    report_data = dict()
    processed_datetime_str = datetime.datetime.now().strftime(
        '%Y-%m-%dT%H:%M:%S')
    report_data[report_consts.HPO_NAME_REPORT_KEY] = get_hpo_name(hpo_id)
    report_data[report_consts.FOLDER_REPORT_KEY] = folder_prefix
    report_data[report_consts.TIMESTAMP_REPORT_KEY] = processed_datetime_str
    report_data[
        report_consts.
        SUBMISSION_ERROR_REPORT_KEY] = f'Submission folder name {folder_prefix} does not follow the ' \
                                       f'naming convention {consts.FOLDER_NAMING_CONVENTION}, where vN represents ' \
                                       f'the version number for the day, starting at v1 each day. ' \
                                       f'Please resubmit the files in a new folder with the correct naming convention'
    logging.info(
        'Processing skipped. Reason: Folder %s does not follow naming convention %s. '
        'Saving timestamp %s to `gs://%s/%s`.', folder_prefix,
        consts.FOLDER_NAMING_CONVENTION, processed_datetime_str, bucket,
        folder_prefix + common.PROCESSED_TXT)
    _write_string_to_file(bucket, folder_prefix + common.PROCESSED_TXT,
                          processed_datetime_str)
    results_html = hpo_report.render(report_data)
    _write_string_to_file(bucket, folder_prefix + common.RESULTS_HTML,
                          results_html)
    return report_data
Esempio n. 2
0
    def test_render(self):
        report_data = {
            consts.HPO_NAME_REPORT_KEY: self.hpo_name,
            consts.TIMESTAMP_REPORT_KEY: self.timestamp,
            consts.FOLDER_REPORT_KEY: self.folder,
            consts.RESULTS_REPORT_KEY: self.results,
            consts.ERRORS_REPORT_KEY: [],
            consts.WARNINGS_REPORT_KEY: [],
            consts.NONUNIQUE_KEY_METRICS_REPORT_KEY: [],
            consts.DRUG_CLASS_METRICS_REPORT_KEY: self.drug_class_metrics,
            consts.HEEL_ERRORS_REPORT_KEY: self.heel_errors,
            consts.COMPLETENESS_REPORT_KEY: self.completeness
        }
        # report ok
        render_output = hpo_report.render(report_data)
        self.save_render_output(render_output)
        self.assert_report_data_in_output(report_data, render_output)

        # submission error
        report_data[
            consts.SUBMISSION_ERROR_REPORT_KEY] = 'Required files are missing'
        render_output = hpo_report.render(report_data)
        self.save_render_output(render_output)
        self.assert_report_data_in_output(report_data, render_output)

        # error occurred
        report_data[consts.ERROR_OCCURRED_REPORT_KEY] = True
        render_output = hpo_report.render(report_data)
        self.save_render_output(render_output)
        self.assert_report_data_in_output(report_data, render_output)

        report_data[consts.ERRORS_REPORT_KEY] = [{
            'file_name': 'visit_occurrence.csv',
            'message': 'Fake error'
        }]
        render_output = hpo_report.render(report_data)
        self.save_render_output(render_output)
        self.assert_report_data_in_output(report_data, render_output)
Esempio n. 3
0
def perform_reporting(hpo_id, report_data, folder_items, bucket,
                      folder_prefix):
    """
    Generate html report, upload to GCS and send email if possible

    :param hpo_id: identifies the hpo site
    :param report_data: dictionary containing items for populating hpo_report.html
    :param folder_items: items in the folder without folder prefix
    :param bucket: bucket containing the folder
    :param folder_prefix: submission folder
    :return:
    """
    processed_time_str = get_eastern_time()
    report_data[report_consts.TIMESTAMP_REPORT_KEY] = processed_time_str
    results_html = hpo_report.render(report_data)

    results_html_path = folder_prefix + common.RESULTS_HTML
    logging.info(f"Saving file {common.RESULTS_HTML} to "
                 f"gs://{bucket}/{results_html_path}.")
    upload_string_to_gcs(bucket, results_html_path, results_html)

    processed_txt_path = folder_prefix + common.PROCESSED_TXT
    logging.info(f"Saving timestamp {processed_time_str} to "
                 f"gs://{bucket}/{processed_txt_path}.")
    upload_string_to_gcs(bucket, processed_txt_path, processed_time_str)

    folder_uri = f"gs://{bucket}/{folder_prefix}"
    if folder_items and is_first_validation_run(folder_items):
        logging.info(f"Attempting to send report via email for {hpo_id}")
        email_msg = en.generate_email_message(hpo_id, results_html, folder_uri,
                                              report_data)
        if email_msg is None:
            logging.info(
                f"Not enough info in contact list to send emails for hpo_id {hpo_id}"
            )
        else:
            result = en.send_email(email_msg)
            if result is None:
                logging.info(
                    'Mandrill error occurred. Please check logs for more details'
                )
            else:
                result_ids = ', '.join(
                    [result_item['_id'] for result_item in result])
                logging.info(
                    f"Sending emails for hpo_id {hpo_id} with Mandrill tracking ids: {result_ids}"
                )
    logging.info(f"Reporting complete")
    return
Esempio n. 4
0
def generate_metrics(hpo_id, bucket, folder_prefix, summary):
    """
    Generate metrics regarding a submission

    :param hpo_id: identifies the HPO site
    :param bucket: name of the bucket with the submission
    :param folder_prefix: folder containing the submission
    :param summary: file summary from validation
     {results: [(file_name, found, parsed, loaded)],
      errors: [(file_name, message)],
      warnings: [(file_name, message)]}
    :return:
    """
    report_data = summary.copy()
    processed_datetime_str = datetime.datetime.now().strftime(
        '%Y-%m-%dT%H:%M:%S')
    error_occurred = False

    # TODO separate query generation, query execution, writing to GCS
    gcs_path = 'gs://%s/%s' % (bucket, folder_prefix)
    report_data[report_consts.HPO_NAME_REPORT_KEY] = get_hpo_name(hpo_id)
    report_data[report_consts.FOLDER_REPORT_KEY] = folder_prefix
    report_data[report_consts.TIMESTAMP_REPORT_KEY] = processed_datetime_str
    results = report_data['results']
    try:
        # TODO modify achilles to run successfully when tables are empty
        # achilles queries will raise exceptions (e.g. division by zero) if files not present
        if all_required_files_loaded(results):
            logging.info('Running achilles on %s.', folder_prefix)
            run_achilles(hpo_id)
            run_export(datasource_id=hpo_id, folder_prefix=folder_prefix)
            logging.info('Uploading achilles index files to `%s`.', gcs_path)
            _upload_achilles_files(hpo_id, folder_prefix)
            heel_error_query = get_heel_error_query(hpo_id)
            report_data[report_consts.HEEL_ERRORS_REPORT_KEY] = query_rows(
                heel_error_query)
        else:
            report_data[
                report_consts.
                SUBMISSION_ERROR_REPORT_KEY] = 'Required files are missing'
            logging.info(
                'Required files are missing in %s. Skipping achilles.',
                gcs_path)

        # non-unique key metrics
        logging.info('Getting non-unique key stats for %s...' % hpo_id)
        nonunique_metrics_query = get_duplicate_counts_query(hpo_id)
        report_data[
            report_consts.NONUNIQUE_KEY_METRICS_REPORT_KEY] = query_rows(
                nonunique_metrics_query)

        # drug class metrics
        logging.info('Getting drug class for %s...' % hpo_id)
        drug_class_metrics_query = get_drug_class_counts_query(hpo_id)
        report_data[report_consts.DRUG_CLASS_METRICS_REPORT_KEY] = query_rows(
            drug_class_metrics_query)

        # missing PII
        logging.info('Getting missing record stats for %s...' % hpo_id)
        missing_pii_query = get_hpo_missing_pii_query(hpo_id)
        missing_pii_results = query_rows(missing_pii_query)
        report_data[report_consts.MISSING_PII_KEY] = missing_pii_results

        # completeness
        logging.info('Getting completeness stats for %s...' % hpo_id)
        completeness_query = completeness.get_hpo_completeness_query(hpo_id)
        report_data[report_consts.COMPLETENESS_REPORT_KEY] = query_rows(
            completeness_query)

        # lab concept metrics
        logging.info('Getting lab concepts for %s...' % hpo_id)
        lab_concept_metrics_query = required_labs.get_lab_concept_summary_query(
            hpo_id)
        report_data[report_consts.LAB_CONCEPT_METRICS_REPORT_KEY] = query_rows(
            lab_concept_metrics_query)

        logging.info(
            'Processing complete. Saving timestamp %s to `gs://%s/%s`.',
            processed_datetime_str, bucket,
            folder_prefix + common.PROCESSED_TXT)
        _write_string_to_file(bucket, folder_prefix + common.PROCESSED_TXT,
                              processed_datetime_str)

    except HttpError as err:
        # cloud error occurred- log details for troubleshooting
        logging.exception(
            'Failed to generate full report due to the following cloud error:\n\n%s'
            % err.content)
        error_occurred = True

        # re-raise error
        raise err
    finally:
        # report all results collected (attempt even if cloud error occurred)
        report_data[report_consts.ERROR_OCCURRED_REPORT_KEY] = error_occurred
        results_html = hpo_report.render(report_data)
        _write_string_to_file(bucket, folder_prefix + common.RESULTS_HTML,
                              results_html)
    return report_data