コード例 #1
0
ファイル: completeness.py プロジェクト: all-of-us/curation
def get_hpo_ids():
    """
    Get identifiers for all HPO sites

    :return: A list of HPO ids
    """
    return [hpo_item[consts.HPO_ID] for hpo_item in bq_utils.get_hpo_info()]
コード例 #2
0
ファイル: top_heel_errors.py プロジェクト: rfrancis1/curation
def get_hpo_ids():
    """
    Retrieves list of hpo_ids from lookup_tables.hpo_site_id_mappings

    :return: List of hpo_ids
    """
    return [item['hpo_id'] for item in bq_utils.get_hpo_info()]
コード例 #3
0
ファイル: add_hpo.py プロジェクト: all-of-us/curation
def add_hpo_site_mappings_file_df(hpo_id, hpo_name, org_id, display_order):
    """
    Creates dataframe with hpo_id, hpo_name, org_id, display_order

    :param hpo_id: hpo_ identifier
    :param hpo_name: name of the hpo
    :param org_id: hpo organization identifier
    :param display_order: index number in which hpo should be added in table
    :raises ValueError if hpo_id already exists in the lookup table
    """
    hpo_table = bq_utils.get_hpo_info()
    hpo_table_df = pd.DataFrame(hpo_table)
    if hpo_id in hpo_table_df['hpo_id'] or hpo_name in hpo_table_df['name']:
        raise ValueError(
            f"{hpo_id}/{hpo_name} already exists in site lookup table")

    hpo_file_df = pd.read_csv(resources.hpo_site_mappings_path)
    verify_hpo_mappings_up_to_date(hpo_file_df, hpo_table_df)

    if display_order is None:
        display_order = hpo_file_df['Display_Order'].max() + 1

    hpo_file_df.loc[hpo_file_df['Display_Order'] >= display_order,
                    'Display_Order'] += 1
    hpo_file_df.loc['-1'] = [org_id, hpo_id, hpo_name, display_order]
    LOGGER.info(f'Added new entry for hpo_id {hpo_id} to '
                f'config/hpo_site_mappings.csv at position {display_order}. '
                f'Please upload to curation-devops repo.')
    return hpo_file_df.sort_values(by='Display_Order')
コード例 #4
0
def validate_all_hpos():
    """
    validation end point for all hpo_ids
    """
    for item in bq_utils.get_hpo_info():
        hpo_id = item['hpo_id']
        process_hpo(hpo_id)
    return 'validation done!'
コード例 #5
0
 def test_convert_to_bq_string(self, mock_hpo_list):
     mock_hpo_list.return_value = self.hpo_list
     hpo_rdr_mapping_list = gen_ext.get_hpo_and_rdr_mappings()
     hpo_bq_list = []
     for hpo in bq_utils.get_hpo_info():
         hpo_bq_list.append(self.bq_string.format(hpo_name=hpo["hpo_id"]))
     hpo_bq_list.append(f'("{gen_ext.RDR}", "{gen_ext.PPI_PM}")')
     expected = ', '.join(hpo_bq_list)
     actual = gen_ext.convert_to_bq_string(hpo_rdr_mapping_list)
     self.assertEqual(len(actual), len(expected))
コード例 #6
0
def get_hpo_site_names():
    """
    Return a list of hpo site ids.

    :return:  A list of string hpo site ids
    """
    hpo_ids = []
    for site in bq_utils.get_hpo_info():
        hpo_ids.append(site[consts.HPO_ID])
    return hpo_ids
コード例 #7
0
def generate_site_mappings():
    """
    Generates the mapping table for the site names and the masked names
    :return: returns dict with key: hpo_id, value: rand int
    """
    hpo_list = bq_utils.get_hpo_info()
    rand_list = random.sample(range(100, 999), len(hpo_list))
    mapping_dict = dict()
    for i, hpo_dict in enumerate(hpo_list):
        mapping_dict[hpo_dict["hpo_id"]] = rand_list[i]
    return mapping_dict
コード例 #8
0
def find_hpo(hpo_id, hpo_name):
    """
    Finds if the HPO is already available in lookup_tables.hpo_site_id_mappings
    :param hpo_id: hpo identifier
    :param hpo_name: HPO name
    :return:
    """
    hpos = bq_utils.get_hpo_info()
    for hpo in hpos:
        if hpo['hpo_id'] == hpo_id or hpo['name'] == hpo_name:
            return hpo
    return None
コード例 #9
0
ファイル: ehr_union.py プロジェクト: all-of-us/curation
def main(input_dataset_id, output_dataset_id, project_id, hpo_ids_ex=None):
    """
    Create a new CDM which is the union of all EHR datasets submitted by HPOs

    :param input_dataset_id identifies a dataset containing multiple CDMs, one for each HPO submission
    :param output_dataset_id identifies the dataset to store the new CDM in
    :param project_id: project containing the datasets
    :param hpo_ids_ex: (optional) list that identifies HPOs not to process, by default process all
    :returns: list of tables generated successfully
    """
    client = get_client(project_id)

    logging.info('EHR union started')
    # Get all hpo_ids.
    hpo_ids = [item['hpo_id'] for item in bq_utils.get_hpo_info()]
    if hpo_ids_ex:
        hpo_ids = [hpo_id for hpo_id in hpo_ids if hpo_id not in hpo_ids_ex]

    # Create empty output tables to ensure proper schema, clustering, etc.
    for table in resources.CDM_TABLES:
        result_table = output_table_for(table)
        logging.info(f'Creating {output_dataset_id}.{result_table}...')
        bq_utils.create_standard_table(table,
                                       result_table,
                                       drop_existing=True,
                                       dataset_id=output_dataset_id)

    # Create mapping tables
    for domain_table in cdm.tables_to_map():
        logging.info(f'Mapping {domain_table}...')
        mapping(domain_table, hpo_ids, input_dataset_id, output_dataset_id,
                project_id, client)

    # Load all tables with union of submitted tables
    for table_name in resources.CDM_TABLES:
        logging.info(f'Creating union of table {table_name}...')
        load(table_name, hpo_ids, input_dataset_id, output_dataset_id)

    logging.info('Creation of Unioned EHR complete')

    # create person mapping table
    domain_table = common.PERSON
    logging.info(f'Mapping {domain_table}...')
    mapping(domain_table, hpo_ids, input_dataset_id, output_dataset_id,
            project_id, client)

    logging.info('Starting process for Person to Observation')
    # Map and move EHR person records into four rows in observation, one each for race, ethnicity, dob and gender
    map_ehr_person_to_observation(output_dataset_id)
    move_ehr_person_to_observation(output_dataset_id)

    logging.info('Completed Person to Observation')
コード例 #10
0
def render():
    """
    Render cron file

    :return: a str representation of the cron file
    """
    j2_env = jinja2.Environment(
        loader=jinja2.FileSystemLoader(resources.TEMPLATES_PATH))
    tpl = j2_env.get_template(resources.CRON_TPL_YAML)
    # TODO obtain cron urls from validation.main/app_base.yaml instead of through template
    hpos = bq_utils.get_hpo_info()
    yesterday = get_yesterday_expr()
    result = tpl.render(hpos=hpos, yesterday=yesterday)
    return result
コード例 #11
0
ファイル: bq_utils_test.py プロジェクト: all-of-us/curation
 def test_get_hpo_info(self):
     hpo_info = bq_utils.get_hpo_info()
     self.assertGreater(len(hpo_info), 0)
コード例 #12
0
def get_hpo_name(hpo_id):
    hpo_list_of_dicts = bq_utils.get_hpo_info()
    for hpo_dict in hpo_list_of_dicts:
        if hpo_dict['hpo_id'].lower() == hpo_id.lower():
            return hpo_dict['name']
    raise ValueError('%s is not a valid hpo_id' % hpo_id)
コード例 #13
0
def is_hpo_id(hpo_id):
    return hpo_id in [item['hpo_id'] for item in bq_utils.get_hpo_info()]