def test_pop_and_2_obs_with_all_pv(self):
        """Use separate templates for Pop Obs, and use Obs template repeatedly."""
        templater = mcf_template_filler.Filler(POP_TEMPLATE,
                                               required_vars=['geo_id'])
        template_vars = {
            'geo_id': 'geoId/06',
            'naics_code': '11',
            'operation_type': 'Manufacturer',
            'tax_status': 'ExemptFromTax'
        }
        result = templater.fill(template_vars)

        expected = """
Node: Pop_payroll_est_geoId/06_11_Manufacturer_ExemptFromTax
typeOf: schema:StatisticalPopulation
populationType: dcs:USCEstablishment
location: dcid:geoId/06
payrollStatus: dcs:WithPayroll
naics: dcs:NAICS/11
operationType: dcs:Manufacturer
taxStatus: dcs:ExemptFromTax
"""
        self.assertEqual(result, expected)

        templater = mcf_template_filler.Filler(
            OBS_TEMPLATE, required_vars=['year', 'mprop', 'mval'])
        template_vars['year'] = '2000'
        template_vars['mprop'] = 'count'
        template_vars['mval'] = 0
        result = templater.fill(template_vars)

        expected = """
Node: Obs_on_Pop_payroll_est_geoId/06_11_Manufacturer_ExemptFromTax_2000_count
typeOf: schema:Observation
observedNode: l:Pop_payroll_est_geoId/06_11_Manufacturer_ExemptFromTax
observationDate: "2000"
measuredProperty: dcs:count
measuredValue: 0
"""
        self.assertEqual(result, expected)

        template_vars['year'] = '2001'
        template_vars['mprop'] = 'count'
        template_vars['mval'] = 144
        result = templater.fill(template_vars)

        expected = """
Node: Obs_on_Pop_payroll_est_geoId/06_11_Manufacturer_ExemptFromTax_2001_count
typeOf: schema:Observation
observedNode: l:Pop_payroll_est_geoId/06_11_Manufacturer_ExemptFromTax
observationDate: "2001"
measuredProperty: dcs:count
measuredValue: 144
"""
        self.assertEqual(result, expected)
    def test_require_node_name(self):
        with self.assertRaises(ValueError):
            mcf_template_filler.Filler(NAMELESS_POP_TEMPLATE)

        with self.assertRaises(ValueError):
            mcf_template_filler.Filler(NAMELESS_OBS_TEMPLATE)

        bad_node = """
typeOf: badNode
location: dcid:badPlace
"""
        with self.assertRaises(ValueError):
            mcf_template_filler.Filler(POP_TEMPLATE + bad_node)
    def test_example_usage(self):
        example_template = """
Node: People_in_geoId_{geo_id}_{race}_{gender}_{random_field}
typeOf: schema:StatisticalPopulation
populationType: schema:Person
location: geoId/{geo_id}
race: dcs:{race}
gender: dcs:{gender}
randomOptionalProperty: {random_field}
"""

        templater = mcf_template_filler.Filler(example_template,
                                               required_vars=['geo_id'])
        var_dict1 = {'geo_id': '05', 'race': 'White'}
        pop1 = templater.fill(var_dict1)
        expected = """
Node: People_in_geoId_05_White__
typeOf: schema:StatisticalPopulation
populationType: schema:Person
location: geoId/05
race: dcs:White
"""
        self.assertEqual(pop1, expected)

        var_dict2 = {'geo_id': '05', 'gender': 'Female'}
        pop2 = templater.fill(var_dict2)
        expected = """
Node: People_in_geoId_05__Female_
typeOf: schema:StatisticalPopulation
populationType: schema:Person
location: geoId/05
gender: dcs:Female
"""
        self.assertEqual(pop2, expected)
    def test_unified_pop_obs_with_missing_optional_pv(self):
        # Can combine templates, like Pop + Obs
        pop_obs_template = POP_TEMPLATE + OBS_TEMPLATE
        templater = mcf_template_filler.Filler(
            pop_obs_template,
            required_vars=['geo_id', 'year', 'mprop', 'mval'])
        template_vars = {
            'geo_id': 'geoId/06',
            'naics_code': '11',
            'tax_status': 'ExemptFromTax',
            'year': '2000',
            'mprop': 'count',
            'mval': 42,
        }
        result = templater.fill(template_vars)

        expected = """
Node: Pop_payroll_est_geoId/06_11__ExemptFromTax
typeOf: schema:StatisticalPopulation
populationType: dcs:USCEstablishment
location: dcid:geoId/06
payrollStatus: dcs:WithPayroll
naics: dcs:NAICS/11
taxStatus: dcs:ExemptFromTax

Node: Obs_on_Pop_payroll_est_geoId/06_11__ExemptFromTax_2000_count
typeOf: schema:Observation
observedNode: l:Pop_payroll_est_geoId/06_11__ExemptFromTax
observationDate: "2000"
measuredProperty: dcs:count
measuredValue: 42
"""
        self.assertEqual(result, expected)
 def test_pop_with_missing_req_pv(self):
     templater = mcf_template_filler.Filler(
         POP_TEMPLATE, required_vars=['geo_id', 'tax_status'])
     template_vars = {
         'geo_id': 'geoId/06',
         'naics_code': '11',
         'operation_type': 'Manufacturer',
     }
     with self.assertRaises(ValueError):
         templater.fill(template_vars)
Beispiel #6
0
def zip_ingred_semi_sep(mcf_file, strength_format_map, row):
    """Zips ingredients and strengths together when the ingredients are
    separated by a semi colon in the strengths column.

    Ex: strengths: 1 mg, 2mg, 3mg; 4mg, 5mg, 6mg
        ingredients: ingred1 ; ingred2
    resulting DrugStrength Nodes where ActiveIngredientAmount nodes are comma
    separated:
      * Strength 1:  1mg - ingred1, 4mg-ingred2
      * Strength 2: 2mg - ingred1, 5mg - ingred2
      * Strength 3: 3mg-ingred1, 6mg - ingred2
    """
    strengths = row['CleanStrength']
    active_ingreds = row['CleanActiveIngredient']

    strength_lists = []
    strength_lists.append(strengths.split(';')[0].split(','))

    base_dcid = strength_format_map['strength_dcid']
    strength_dcids = []

    # get all lists in strength_list
    for strength_list in strengths.split(';')[1:]:
        strength_list_comma_sep = strength_list.split(',')
        strength_lists.append(strength_list_comma_sep)

    for index, stren in enumerate(strength_lists[0]):
        strength_dcids.append(base_dcid + '_' + str(index))
        active_ingred_dcids = []
        for ingred_index, ingred_pair_list in enumerate(strength_lists):
            strength = ingred_pair_list[index]
            ingred_name = active_ingreds.split(
                ';')[ingred_index].strip().title()
            ingred_dcid = write_active_ingred_node(mcf_file, strength,
                                                   ingred_name)
            active_ingred_dcids.append(ingred_dcid)
        strength_format_map['strength_dcid'] = base_dcid + '_' + str(index)
        strength_format_map['name'] = (base_dcid + '_' + str(index)).replace(
            'dcid:bio/', '')
        strength_format_map['active_ingred_dcids'] = ','.join(
            active_ingred_dcids)

        strength_format_map = {
            key: value
            for key, value in strength_format_map.items() if value
        }
        strength_templater = mcf_template_filler.Filler(STRENGTH_TEMPLATE,
                                                        required_vars=['dcid'])
        strength_mcf = strength_templater.fill(strength_format_map)
        mcf_file.write(strength_mcf)
    return strength_dcids
Beispiel #7
0
def write_active_ingred_node(mcf_file, amount, ingredient):
    """Writes an active ingredient node in mcf format to mcf_file given an
    ingredient and the ingredient's amount.

    Sometimes the amount has two quantities as in '500mg/25ml (20mg/ml)',
    causing the need for the parentheses check seen in the function. Amount can
    be a single quantity or a quantity range.
    """

    if '-' in amount and 'OMEGA-3' not in amount and 'SINGLE-USE' not in amount:
        amount_qty = get_qty_range_format(amount.split('(')[0])
    else:
        amount_qty = get_qty_format(amount.split('(')[0])

    if '(' in amount:
        if '-' in amount and 'OMEGA-3' not in amount and 'SINGLE-USE' not in amount:
            second_amount_qty = get_qty_range_format(
                amount.split('(')[1].replace(')', ''))
        else:
            second_amount_qty = get_qty_format(
                amount.split('(')[1].replace(')', ''))
        amount_qty = amount_qty + ',' + second_amount_qty

    name = (ingredient.strip() + '_' + amount_qty).strip()

    for special_format, replace_format in INGREDIENT_REPLACEMENTS.items():
        name = name.replace(special_format, replace_format).strip()
    name = re.sub("[^0-9a-zA-Z_-]+", "", name).title()
    dcid = 'dcid:bio/' + name

    ingred_templater = mcf_template_filler.Filler(ACTIVE_INGRED_TEMPLATE,
                                                  required_vars=['dcid'])
    ingred_mcf = ingred_templater.fill({
        'active_ingred_dcid': dcid,
        'ingred_amount_qty': amount_qty,
        'ingred_name': ingredient.strip(),
        'name': name,
    })
    mcf_file.write(ingred_mcf)

    return dcid
Beispiel #8
0
def parse_row(mcf_file, seen_fda_apps, row):
    """Writes nodes in mcf format to mcf_file.

    First writes FDA Application node. Parses strength nodes, writing Active
    Ingreident Amount nodes when necessary, then writes the strength nodes.
    Finally, one drug node is written per row.
    """

    fda_app = 'dcid:bio/FDA_Application_' + str(row['ApplNo'])

    if row['ApplNo'] not in seen_fda_apps:

        app_template_map = {
            'fda_app_dcid': fda_app,
            'appl_num': str(row['ApplNo']),
            'name': 'FDA_Application_' + str(row['ApplNo']),
            'sponsor_name': row['SponsorName'].title(),
            'appl_type_enums': row['ApplTypeEnum'],
        }
        app_template_map = {
            key: value
            for key, value in app_template_map.items() if value
        }
        fda_app_templater = mcf_template_filler.Filler(FDA_APP_TEMPLATE,
                                                       required_vars=['dcid'])
        fda_app_mcf = fda_app_templater.fill(app_template_map)

        mcf_file.write(fda_app_mcf)
        seen_fda_apps.append(row['ApplNo'])

    strength_dcids = parse_strength_nodes(mcf_file, fda_app, row)
    ingred_name_list = '","'.join(
        [ingred.strip() for ingred in row['CleanActiveIngredient'].split(';')])
    drug_format_map = {
        'drug_ref': 'bio/' + row['DrugRef'],
        'name': row['DrugRef'],
        'synonyms': '","'.join(row['DrugName'].split(';')).title(),
        'strength_dcids': ','.join(strength_dcids),
        'ingred_names': ingred_name_list,
        'dosage_form_enum': row['DosageFormEnums'],
        'admin_route_enum': row['AdminRouteEnums'],
        'additional_info': row['AdditionalInfo'],
    }

    if row['ReferenceStandard'] == 0:
        drug_format_map['is_ref_std'] = 'False'
    if row['ReferenceStandard'] and row['ReferenceStandard'] > 0:
        drug_format_map['is_ref_std'] = 'True'
    if row['ReferenceDrug'] == 0:
        drug_format_map['is_available_generically'] = 'False'
    if row['ReferenceDrug'] and row['ReferenceDrug'] > 0:
        drug_format_map['is_available_generically'] = 'True'

    drug_format_map = {
        key: value
        for key, value in drug_format_map.items() if value
    }
    drug_templater = mcf_template_filler.Filler(DRUG_TEMPLATE,
                                                required_vars=['dcid'])
    drug_mcf = drug_templater.fill(drug_format_map)
    mcf_file.write(drug_mcf)
Beispiel #9
0
def parse_strength_nodes(mcf_file, fda_app, row):
    """Determines if active ingredient nodes need to be generated and written to
    file by zipping Strength and ActiveIngredient columns.

    If the columns Strength and ActiveIngredient cannot be zipped, then single
    drug strength nodes is created and wirrten to mcf_file. This drug strength
    node has the strengths as a list of quantities and active ingredients as a
    list of strings. Otherwise the drug strength would point to Active
    Ingredient Amount nodes via dcids.
    """
    strength_dcid = 'dcid:bio/' + row['DrugRef'] + '_Strength-' + str(
        row['ApplNo']) + '-' + str(row['ProductNo'])
    ingred_name_list = '","'.join(
        [ingred.strip() for ingred in row['CleanActiveIngredient'].split(';')])
    strength_format_map = {
        'strength_dcid':
        strength_dcid,
        'fda_app_dcid':
        fda_app,
        'fda_prod_no':
        str(row['ProductNo']),
        'name':
        row['DrugRef'] + '_Strength-' + str(row['ApplNo']) + '-' +
        str(row['ProductNo']),
        'ingred_names':
        ingred_name_list,
        'te_enums':
        row['TECodes'],
        'ms_enums':
        row['MarketStatus'],
        'course_qty':
        row['DrugCourse'],
        'is_single_dose':
        row['SingleDose'],
        'sponsor':
        row['SponsorName'].title(),
        'final_vol_qty':
        row['FinalVolQty'],
    }

    strengths = row['CleanStrength']
    active_ingreds = row['CleanActiveIngredient']

    if active_ingreds and strengths:

        if len(strengths.split(';')) == len(active_ingreds.split(';')):
            return zip_ingred_semi_sep(mcf_file, strength_format_map, row)

        if strengths.split(';')[0].count(',') == len(
                active_ingreds.split(';')) - 1:
            return zip_ingred_comma_sep(mcf_file, strength_format_map, row)

    strength_format_map['strength_qty'] = get_strength_qtys(
        row['CleanStrength'])
    strength_format_map = {
        key: value
        for key, value in strength_format_map.items() if value
    }
    strength_templater = mcf_template_filler.Filler(STRENGTH_TEMPLATE,
                                                    required_vars=['dcid'])
    strength_mcf = strength_templater.fill(strength_format_map)
    mcf_file.write(strength_mcf)

    return [strength_dcid]