Python boolean_convert Exemples, biothings.utils.dataload.boolean_convert Python Exemples

Exemple #1

0

Afficher le fichier

def restructure_dict(dictionary):
    restr_dict = dict()
    _flag = 0
    for key in list(dictionary):  # this is for 1
        if key == 'molecule_chembl_id':
            restr_dict['_id'] = dictionary[key]
        if key == 'molecule_structures' and type(
                dictionary['molecule_structures']) == dict:
            restr_dict['chembl'] = dictionary
            _flag = 1
            for x, y in iter(dictionary['molecule_structures'].items()):
                if x == 'standard_inchi_key':
                    restr_dict['chembl'].update(dictionary)
                    restr_dict['chembl'].update({'inchi_key': y})
                if x == 'canonical_smiles':
                    restr_dict['chembl']['smiles'] = y
                if x == 'standard_inchi':
                    restr_dict['chembl']['inchi'] = y

    if _flag == 0:
        restr_dict['chembl'] = dictionary
    del restr_dict['chembl']['molecule_structures']
    restr_dict = unlist(restr_dict)
    restr_dict = dict_sweep(restr_dict,
                            vals=[
                                None, ".", "-", "", "NA", "None", "none", " ",
                                "Not Available", "unknown", "null"
                            ])
    restr_dict = value_convert_to_number(
        restr_dict, skipped_keys=["chebi_par_id", "first_approval"])
    restr_dict = boolean_convert(restr_dict, [
        "topical", "oral", "parenteral", "dosed_ingredient", "polymer_flag",
        "therapeutic_flag", "med_chem_friendly", "molecule_properties.ro3_pass"
    ])
    return restr_dict

Exemple #2

0

Afficher le fichier

Fichier : chembl_parser.py Projet : shunsunsun/mychem.info

    def reformat(cls, dictionary):
        ret_dict = dict()
        _flag = 0
        for key in list(dictionary):
            if key == 'molecule_chembl_id':
                ret_dict['_id'] = dictionary[key]
            if key == 'molecule_structures' and type(
                    dictionary['molecule_structures']) == dict:
                ret_dict['chembl'] = dictionary
                _flag = 1
                for x, y in iter(dictionary['molecule_structures'].items()):
                    if x == 'standard_inchi_key':
                        ret_dict['chembl'].update(dictionary)
                        ret_dict['chembl'].update({'inchi_key': y})
                    if x == 'canonical_smiles':
                        ret_dict['chembl']['smiles'] = y
                    if x == 'standard_inchi':
                        ret_dict['chembl']['inchi'] = y

        if _flag == 0:
            ret_dict['chembl'] = dictionary
        if 'cross_references' in ret_dict['chembl'] and ret_dict['chembl'][
                'cross_references']:
            ret_dict['chembl'][
                'xrefs'] = MoleculeCrossReferenceListTransformer.transform_to_dict(
                    ret_dict['chembl']['cross_references'])

        del ret_dict['chembl']['molecule_structures']
        del ret_dict['chembl']['cross_references']

        ret_dict = unlist(ret_dict)

        # Add "CHEBI:" prefix, standardize the way representing CHEBI IDs
        if 'chebi_par_id' in ret_dict['chembl'] and ret_dict['chembl'][
                'chebi_par_id']:
            ret_dict['chembl']['chebi_par_id'] = 'CHEBI:' + str(
                ret_dict['chembl']['chebi_par_id'])
        else:
            # clean, could be a None
            ret_dict['chembl'].pop("chebi_par_id", None)

        ret_dict = dict_sweep(ret_dict,
                              vals=[
                                  None, ".", "-", "", "NA", "None", "none",
                                  " ", "Not Available", "unknown", "null"
                              ])
        ret_dict = value_convert_to_number(
            ret_dict, skipped_keys=["chebi_par_id", "first_approval"])
        ret_dict = boolean_convert(ret_dict, [
            "topical", "oral", "parenteral", "dosed_ingredient",
            "polymer_flag", "therapeutic_flag", "med_chem_friendly",
            "molecule_properties.ro3_pass"
        ])
        return ret_dict

Exemple #3

0

Afficher le fichier

Fichier : chembl_parser.py Projet : SuLab/biothings.drugs

def restructure_dict(dictionary):
    restr_dict = dict()
    _flag = 0
    for key in list(dictionary): # this is for 1
        if key == 'molecule_chembl_id':
            restr_dict['_id']=dictionary[key]
        if key == 'molecule_structures' and type(dictionary['molecule_structures'])==dict:
            restr_dict['chembl'] = dictionary
            _flag=1
            for x,y in iter(dictionary['molecule_structures'].items()):
                if x == 'standard_inchi_key':
                    restr_dict['chembl'].update(dictionary)
                    restr_dict['chembl'].update({'inchi_key':y})
                if x == 'canonical_smiles':
                    restr_dict['chembl']['smiles'] = y
                if x == 'standard_inchi':
                    restr_dict['chembl']['inchi'] = y

    if _flag == 0:
        restr_dict['chembl'] = dictionary
    if 'cross_references' in restr_dict['chembl'] and restr_dict['chembl']['cross_references']:
        restr_dict['chembl']['xrefs'] = restructure_xref(restr_dict['chembl']['cross_references'])

    del restr_dict['chembl']['molecule_structures']
    del restr_dict['chembl']['cross_references']
    restr_dict = unlist(restr_dict)
    # Add "CHEBI:" prefix, standardize the way representing CHEBI IDs
    if 'chebi_par_id' in restr_dict['chembl'] and restr_dict['chembl']['chebi_par_id']:
        restr_dict['chembl']['chebi_par_id'] = 'CHEBI:' + str(restr_dict['chembl']['chebi_par_id'])
    else:
        # clean, could be a None
        restr_dict['chembl'].pop("chebi_par_id",None)

    restr_dict = dict_sweep(restr_dict, vals=[None,".", "-", "", "NA", "None","none", " ", "Not Available", "unknown","null"])
    restr_dict = value_convert_to_number(restr_dict, skipped_keys=["chebi_par_id","first_approval"])
    restr_dict = boolean_convert(restr_dict, ["topical","oral","parenteral","dosed_ingredient","polymer_flag",
        "therapeutic_flag","med_chem_friendly","molecule_properties.ro3_pass"])
    return restr_dict

Exemple #4

0

Afficher le fichier

def restructure_dict(dictionary):
    restr_dict = dict()
    d1 = dict()
    pred_properties_dict = {}
    products_list = []
    categories_list = []
    enzymes_list = []
    targets_list = []
    carriers_list = []
    transporters_list = []
    atccode_list = []

    for key, value in iter(dictionary.items()):
        if key == 'name' and value:
            d1[key] = value

        elif key == 'drugbank-id' and value:
            id_list = []
            if isinstance(value, list):
                for ele in value:
                    if isinstance(ele, collections.OrderedDict):
                        assert "@primary" in ele
                        for x, y in iter(ele.items()):
                            if x == '#text':
                                # make sure we always have DB ID as drugbank_id
                                d1.update({'drugbank_id': y})
                                restr_dict['_id'] = y

                    if isinstance(ele, str):
                        key = key.replace('-', '_')
                        id_list.append(ele)
                        d1.update({'accession_number': id_list})

            elif isinstance(value, dict) or isinstance(
                    value, collections.OrderedDict):
                for x, y in iter(value.items()):
                    if x == '#text':
                        key = key.replace('-', '_')
                        id_list.append(y)
                        d1.update({key: id_list})
                        restr_dict['_id'] = y

        elif key == 'description':
            d1.update({'pharmacology': {key: value}})

        elif key == 'groups':
            for i, j in iter(value.items()):
                d1[key] = j

        elif key == 'indication':
            d1['pharmacology'].update({key: value})

        elif key == 'pharmacodynamics':
            d1['pharmacology'].update({key: value})

        elif key == 'mechanism-of-action':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'toxicity':
            d1['pharmacology'].update({key: value})

        elif key == 'metabolism':
            d1['pharmacology'].update({key: value})

        elif key == 'absorption':
            d1['pharmacology'].update({key: value})

        elif key == 'half-life':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'protein-binding':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'route-of-elimination':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'volume-of-distribution':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'clearance':
            d1['pharmacology'].update({key: value})

        elif key == 'classification' and value:
            for m, n in iter(value.items()):
                m = m.lower().replace('-', '_')
                d1.update({'taxonomy': value})

        elif key == 'salts' and value:
            salts_list = []

            for m, n in iter(value.items()):
                if isinstance(n, list):
                    for ele in n:
                        for k in ele:
                            if k == 'name':
                                salts_list.append(ele[k])
                                d1.update({key: salts_list})

                elif isinstance(n, dict) or isinstance(
                        n, collections.OrderedDict):
                    d1.update({key: n['name']})

        elif key == 'synonyms' and value:
            synonym_list = []
            if isinstance(value, collections.OrderedDict):
                for x, y in iter(value.items()):
                    for ele in y:
                        for name in ele:
                            if name == '#text':
                                synonym_list.append(ele[name])
                                d1.update({key: synonym_list})

        elif key == 'products' and value:

            def restr_product_dict(dictionary):
                products_dict = {}
                for x in dictionary:
                    if x == 'name':
                        products_dict[x] = dictionary[x]
                    elif x == 'dosage-form':
                        products_dict['dosage_form'] = dictionary[x]
                    elif x == 'strength':
                        products_dict[x] = dictionary[x]
                    elif x == 'route':
                        products_dict[x] = dictionary[x]
                    elif x == 'over-the-counter':
                        products_dict['otc'] = dictionary[x]
                    elif x == 'generic':
                        products_dict[x] = dictionary[x]
                    elif x == 'ndc-id':
                        products_dict['ndc_id'] = dictionary[x]
                    elif x == 'ndc-product-code':
                        products_dict['ndc_product_code'] = dictionary[x]
                    elif x == 'dpd-id':
                        products_dict['dpd'] = dictionary[x]
                    elif x == 'started-marketing-on':
                        products_dict[x.replace('-', '_')] = dictionary[x]
                    elif x == 'ended-marketing-on':
                        products_dict[x.replace('-', '_')] = dictionary[x]
                    elif x == 'fda-application-number':
                        products_dict[x.replace('-', '_')] = dictionary[x]
                    elif x == 'approved':
                        products_dict[x] = dictionary[x]
                    elif x == 'country':
                        products_dict[x] = dictionary[x]
                    elif x == 'source':
                        products_dict[x] = dictionary[x]
                return products_dict

            for x, y in iter(value.items()):
                if isinstance(y, dict) or isinstance(y,
                                                     collections.OrderedDict):
                    _d = restr_product_dict(y)
                    products_list.append(_d)

                elif isinstance(y, list):
                    for _d in y:
                        products_list.append(restr_product_dict(_d))

        elif key == 'packagers' and value:
            pack_list = []
            for pack in value:
                for pack1 in value[pack]:
                    for s in pack1:
                        if s == 'name' and pack1[s]:
                            pack_list.append(pack1[s])
                            d1.update({key: pack_list})

        elif key == 'manufacturers' and value:
            manuf_list = []
            for x, y in iter(value.items()):
                if isinstance(y, dict) or isinstance(y,
                                                     collections.OrderedDict):
                    for i in y:
                        if i == '#text':
                            manuf_list.append(y[i])
                            d1.update({key: manuf_list})

                if isinstance(y, list):
                    for i in y:
                        for m, n in iter(i.items()):
                            if m == '#text':
                                manuf_list.append(n)
                                d1.update({key: manuf_list})

        elif key == 'categories' and value:
            for x, y in iter(value.items()):
                d1.update({key: y})

        elif key == "snp-effects" and value:
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == "snp-adverse-drug-reactions" and value:
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'affected-organisms' and value:
            for x, y in iter(value.items()):
                key = key.replace('-', '_')
                d1['pharmacology'].update({key: value["affected-organism"]})

        elif key == 'ahfs-codes' and value:
            for x in value:
                key = key.replace('-', '_')
                d1.update({key: value[x]})

        elif key == 'food-interactions' and value:
            food_interaction_list = []
            for x, y in iter(value.items()):
                if isinstance(y, list):
                    key = key.replace('-', '_')
                    for i in y:
                        food_interaction_list.append(i)
                        d1.update({key: food_interaction_list})
                else:
                    d1.update({key: y})

        elif key == 'drug-interactions' and value:
            key = key.replace('-', '_')
            for x, y in iter(value.items()):
                d1.update({key: y})

        elif key == 'sequences' and value:
            for x, y in iter(value.items()):
                for i in y:
                    if i == '@format':
                        str1 = y[i] + '_sequences'
                        d1[str1] = y['#text'].replace('\n', ' ')

        elif key == 'experimental-properties' and value:
            d1_exp_properties = {}

            def restr_properties_dict(dictionary):
                for x, y in iter(dictionary.items()):
                    k1 = dictionary['kind']
                    k1 = k1.lower().replace(' ', '_').replace('-', '_')
                    if k1 == "isoelectric_point":
                        # make sure value are floats, if intervals, then list(float)
                        try:
                            d1_exp_properties[k1] = float(dictionary['value'])
                        except ValueError:
                            # not a float, maybe a range ? "5.6 - 7.6"
                            vals = dictionary['value'].split("-")
                            try:
                                for i, val in enumerate([v for v in vals]):
                                    vals[i] = float(val)
                                logging.info("Document ID '%s' has a range " % restr_dict["_id"] + \
                                             "as isoelectric_point: %s" % vals)
                                d1_exp_properties[k1] = vals
                            except ValueError as e:
                                # not something we can handle, skip it
                                logging.warning("Document ID '%s' has non-convertible " % restr_dict["_id"] + \
                                                " value for isoelectric_point, field ignored: %s" % dictionary['value'])
                                continue
                    else:
                        d1_exp_properties[k1] = dictionary['value']
                return d1_exp_properties

            for ele in value:
                key = key.replace('-', '_')
                if isinstance(value[ele], list):
                    for _d in value[ele]:
                        _d = restr_properties_dict(_d)
                        d1.update({key: _d})

                if isinstance(value[ele], dict) or isinstance(
                        value[ele], collections.OrderedDict):
                    _d = restr_properties_dict(value[ele])
                    d1.update({key: _d})

        elif key == 'calculated-properties' and value:

            def restr_properties_dict(dictionary):
                for x in dictionary:
                    k = dictionary['kind']
                    k = k.lower().replace(' ', '_').replace('-', '_')
                    pred_properties_dict[k] = dictionary['value']

                    if dictionary['kind'] == "IUPAC Name":
                        d1.update({'iupac': dictionary['value']})
                    elif dictionary['kind'] == "SMILES":
                        d1.update({'smiles': dictionary['value']})
                    elif dictionary['kind'] == "Molecular Formula":
                        d1.update({'formula': dictionary['value']})
                    elif dictionary['kind'] == "InChI":
                        d1.update({'inchi': dictionary['value']})
                    elif dictionary['kind'] == "InChIKey":
                        if dictionary['value'][0:9] == 'InChIKey=':
                            d1.update({'inchi_key': dictionary['value'][9:]})
                        else:
                            d1.update({'inchi_key': dictionary['value']})
                    elif dictionary['kind'] == "Molecular Weight":
                        d1.update({'weight': {'average': dictionary['value']}})
                    elif dictionary['kind'] == "Monoisotopic Weight":
                        d1['weight'].update(
                            {'monoisotopic': dictionary['value']})

            for x, y in iter(value.items()):
                if isinstance(y, list):
                    for _d in y:
                        _d = restr_properties_dict(_d)

                if isinstance(y, dict) or isinstance(y,
                                                     collections.OrderedDict):
                    _d = restr_properties_dict(y)

        elif key == 'external-identifiers' and value:
            for ele in value['external-identifier']:
                for x in ele:
                    if x == 'resource':
                        if ele[x] == "Drugs Product Database (DPD)":
                            d1['dpd'] = ele['identifier']
                        elif ele[x] == "KEGG Drug":
                            d1['kegg_drug'] = ele['identifier']
                        elif ele[x] == "KEGG Compound":
                            d1['kegg_compound'] = ele['identifier']
                        elif ele[x] == "National Drug Code Directory":
                            d1['ndc_directory'] = ele['identifier']
                        elif ele[x] == "PharmGKB":
                            d1['pharmgkb'] = ele['identifier']
                        elif ele[x] == "UniProtKB":
                            d1['uniprotkb'] = ele['identifier']
                        elif ele[x] == "Wikipedia":
                            d1['wikipedia'] = ele['identifier']
                        elif ele[x] == "ChemSpider":
                            d1['chemspider'] = ele['identifier']
                        elif ele[x] == "ChEBI":
                            d1['chebi'] = ele['identifier']
                        elif ele[x] == "PubChem Compound":
                            d1['pubchem_compound'] = ele['identifier']
                        elif ele[x] == "PubChem Substance":
                            d1['pubchem_substance'] = ele['identifier']
                        elif ele[x] == "UniProtKB":
                            d1['uniprotkb'] = ele['identifier']
                        elif ele[x] == "GenBank":
                            d1['genbank'] = ele['identifier']
                        else:
                            source = ele[x].lower().replace('-', '_').replace(
                                ' ', '_')
                            d1[source] = ele['identifier']

        elif key == 'external-links' and value:
            if isinstance(value['external-link'], list):
                for ele in value['external-link']:
                    for x in ele:
                        try:
                            resource = ele['resource']
                            d1[resource.lower().replace('.', '_')] = ele['url']
                        except:
                            pass
            else:
                try:
                    resource = ele['resource']
                    d1[resource.lower().replace('.', '_')] = ele['url']
                except:
                    pass

        elif key == 'patents' and value:
            if isinstance(value, dict):
                for x in value:
                    d1.update({key: value[x]})

        elif key == 'international-brands' and value:
            key = key.lower().replace('-', '_')
            d1.update({key: value['international-brand']})

        elif key == 'mixtures' and value:
            d1.update({key: value['mixture']})

        elif key == 'pathways' and value:
            _li = []

            def restr_pathway_dict(dictionary):
                _dict = {}
                for x, y in iter(dictionary.items()):
                    if x == 'smpdb-id':
                        _dict.update({'smpdb_id': y})
                    elif x == 'name':
                        _dict.update({x: y})
                    elif x == 'drugs':
                        _dict.update({x: y['drug']})
                    elif x == 'enzymes':
                        _dict.update({x: y})
                return _dict

            if isinstance(value['pathway'], list):
                for ele in value['pathway']:
                    _dict = restr_pathway_dict(ele)
                    _li.append(_dict)
                    d1.update({key: _li})

            elif isinstance(value['pathway'], dict) or isinstance(
                    value['pathway'], OrderedDict):
                _dict = restr_pathway_dict(value['pathway'])
                d1.update({key: _dict})

        elif key == 'targets' and value:
            if isinstance(value['target'], list):
                for dictionary in value['target']:
                    _dict = restr_protein_dict(dictionary)
                    targets_list.append(_dict)

            elif isinstance(value['target'], dict) or isinstance(
                    value['target'], OrderedDict):
                _dict = restr_protein_dict(value['target'])
                targets_list.append(_dict)

        elif key == 'enzymes' and value:
            if isinstance(value['enzyme'], list):
                for dictionary in value['enzyme']:
                    _dict = restr_protein_dict(dictionary)
                    enzymes_list.append(_dict)

            elif isinstance(value['enzyme'], dict) or isinstance(
                    value['enzyme'], OrderedDict):
                _dict = restr_protein_dict(value['enzyme'])
                enzymes_list.append(_dict)

        elif key == 'transporters' and value:
            if isinstance(value['transporter'], list):
                for dictionary in value['transporter']:
                    _dict = restr_protein_dict(dictionary)
                    transporters_list.append(_dict)

            elif isinstance(value['transporter'], dict) or isinstance(
                    value['transporter'], OrderedDict):
                _dict = restr_protein_dict(value['transporter'])
                transporters_list.append(_dict)

        elif key == 'carriers' and value:
            if isinstance(value['carrier'], list):
                for dictionary in value['carrier']:
                    _dict = restr_protein_dict(dictionary)
                    carriers_list.append(_dict)

            elif isinstance(value['carrier'], dict) or isinstance(
                    value['carrier'], OrderedDict):
                _dict = restr_protein_dict(value['carrier'])
                carriers_list.append(_dict)

        elif key == 'atc-codes' and value:

            def restr_atccode_dict(dictionary):
                for x in dictionary:
                    if x == '@code':
                        atccode_list.append(dictionary[x])
                return atccode_list

            if isinstance(value['atc-code'], list):
                for _d in value['atc-code']:
                    restr_atccode_dict(_d)

            elif isinstance(value['atc-code'], dict) or isinstance(
                    value['atc-code'], OrderedDict):
                restr_atccode_dict(value['atc-code'])

    d1['atc_codes'] = atccode_list
    d1['targets'] = targets_list
    d1['carriers'] = carriers_list
    d1['enzymes'] = enzymes_list
    d1['transporters'] = transporters_list
    d1['predicted_properties'] = pred_properties_dict
    d1['products'] = products_list
    restr_dict['drugbank'] = d1
    restr_dict = unlist(restr_dict)
    restr_dict = dict_sweep(restr_dict,
                            vals=[
                                None, math.inf, "INF", ".", "-", "", "NA",
                                "none", " ", "Not Available", "unknown",
                                "null", "None"
                            ])
    if restr_dict["drugbank"].get(
            'inchi_key') == "IOFPEOPOAMOMBE-MRVPVSSYSA-N":
        print(repr(restr_dict["drugbank"].get("pdb")))
    restr_dict = boolean_convert(restr_dict, [
        "predicted_properties.mddr_like_rule",
        "predicted_properties.bioavailability",
        "predicted_properties.ghose_filter",
        "predicted_properties.rule_of_five", "products.generic",
        "products.otc", "products.approved", "products.pediatric-extension"
    ])
    restr_dict = value_convert_to_number(restr_dict,
                                         skipped_keys=[
                                             "dpd", "chemspider", "chebi",
                                             "pubchem_compound",
                                             "pubchem_substance", "bindingdb"
                                         ])
    return restr_dict

Exemple #5

0

Afficher le fichier

def restructure_dict(dictionary):
    restr_dict = dict()
    d1 = dict()
    pred_properties_dict = {}
    exp_prop_dict = {}
    products_list = []
    categories_list = []
    enzymes_list = []
    targets_list = []
    carriers_list = []
    transporters_list = []
    atccode_list = []
    xrefs_dict = {}
    xrefs_pubchem_dict = {}

    for key, value in iter(dictionary.items()):
        if key == 'name' and value:
            d1[key] = value

        elif key == 'drugbank-id' and value:
            # NOTE: 'drugbank.drugbank_id' has been moved to 'drugbank.id'
            key = 'id'
            id_list = []
            if isinstance(value, list):
                for ele in value:
                    if isinstance(ele, collections.OrderedDict):
                        assert "@primary" in ele
                        for x, y in iter(ele.items()):
                            if x == '#text':
                                # make sure we always have DB ID as drugbank_id (now 'id')
                                d1.update({key: y})
                                restr_dict['_id'] = y

                    if isinstance(ele, str):
                        id_list.append(ele)
                        d1.update({'accession_number': id_list})

            elif isinstance(value, dict) or isinstance(
                    value, collections.OrderedDict):
                for x, y in iter(value.items()):
                    if x == '#text':
                        id_list.append(y)
                        d1.update({key: id_list})
                        restr_dict['_id'] = y

        elif key == 'description':
            d1.update({'pharmacology': {key: value}})

        elif key == 'groups':
            for i, j in iter(value.items()):
                d1[key] = j

        elif key == 'indication':
            d1['pharmacology'].update({key: value})

        elif key == 'pharmacodynamics':
            d1['pharmacology'].update({key: value})

        elif key == 'mechanism-of-action':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'toxicity':
            d1['pharmacology'].update({key: value})

        elif key == 'metabolism':
            d1['pharmacology'].update({key: value})

        elif key == 'absorption':
            d1['pharmacology'].update({key: value})

        elif key == 'half-life':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'protein-binding':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'route-of-elimination':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'volume-of-distribution':
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'clearance':
            d1['pharmacology'].update({key: value})

        elif key == 'classification' and value:
            for m, n in iter(value.items()):
                m = m.lower().replace('-', '_')
                d1.update({'taxonomy': value})

        elif key == 'salts' and value:
            salts_list = []

            for m, n in iter(value.items()):
                if isinstance(n, list):
                    for ele in n:
                        for k in ele:
                            if k == 'name':
                                salts_list.append(ele[k])
                                d1.update({key: salts_list})

                elif isinstance(n, dict) or isinstance(
                        n, collections.OrderedDict):
                    d1.update({key: n['name']})

        elif key == 'synonyms' and value:
            synonym_list = []
            if isinstance(value, collections.OrderedDict):
                for x, y in iter(value.items()):
                    for ele in y:
                        for name in ele:
                            if name == '#text':
                                synonym_list.append(ele[name])
                                d1.update({key: synonym_list})

        elif key == 'products' and value:

            def restr_product_dict(dictionary):
                products_dict = {}
                for x in dictionary:
                    if x == 'name':
                        products_dict[x] = dictionary[x]
                    elif x == 'dosage-form':
                        products_dict['dosage_form'] = dictionary[x]
                    elif x == 'strength':
                        products_dict[x] = dictionary[x]
                    elif x == 'route':
                        products_dict[x] = dictionary[x]
                    elif x == 'over-the-counter':
                        products_dict['otc'] = dictionary[x]
                    elif x == 'generic':
                        products_dict[x] = dictionary[x]
                    elif x == 'ndc-id':
                        products_dict['ndc_id'] = dictionary[x]
                    elif x == 'ndc-product-code':
                        products_dict['ndc_product_code'] = dictionary[x]
                    elif x == 'dpd-id':
                        products_dict['dpd'] = dictionary[x]
                    elif x == 'started-marketing-on':
                        products_dict[x.replace('-', '_')] = dictionary[x]
                    elif x == 'ended-marketing-on':
                        products_dict[x.replace('-', '_')] = dictionary[x]
                    elif x == 'fda-application-number':
                        products_dict[x.replace('-', '_')] = dictionary[x]
                    elif x == 'approved':
                        products_dict[x] = dictionary[x]
                    elif x == 'country':
                        products_dict[x] = dictionary[x]
                    elif x == 'source':
                        products_dict[x] = dictionary[x]
                return products_dict

            for x, y in iter(value.items()):
                if isinstance(y, dict) or isinstance(y,
                                                     collections.OrderedDict):
                    _d = restr_product_dict(y)
                    products_list.append(_d)

                elif isinstance(y, list):
                    for _d in y:
                        products_list.append(restr_product_dict(_d))

        elif key == 'packagers' and value:
            pack_list = []
            for pack in value:
                for pack1 in value[pack]:
                    for s in pack1:
                        if s == 'name' and pack1[s]:
                            pack_list.append(pack1[s])
                            d1.update({key: pack_list})

        elif key == 'manufacturers' and value:
            manuf_list = []
            for x, y in iter(value.items()):
                if isinstance(y, dict) or isinstance(y,
                                                     collections.OrderedDict):
                    for i in y:
                        if i == '#text':
                            manuf_list.append(y[i])
                            d1.update({key: manuf_list})

                if isinstance(y, list):
                    for i in y:
                        for m, n in iter(i.items()):
                            if m == '#text':
                                manuf_list.append(n)
                                d1.update({key: manuf_list})

        elif key == 'categories' and value:
            for x, y in iter(value.items()):
                d1.update({key: y})

        elif key == "snp-effects" and value:
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})
        elif key == "snp-adverse-drug-reactions" and value:
            key = key.replace('-', '_')
            d1['pharmacology'].update({key: value})

        elif key == 'affected-organisms' and value:
            for x, y in iter(value.items()):
                key = key.replace('-', '_')
                d1['pharmacology'].update({key: value["affected-organism"]})

        elif key == 'ahfs-codes' and value:
            for x in value:
                key = key.replace('-', '_')
                xrefs_dict.update({key: value[x]})

        elif key == 'food-interactions' and value:
            food_interaction_list = []
            for x, y in iter(value.items()):
                if isinstance(y, list):
                    key = key.replace('-', '_')
                    for i in y:
                        food_interaction_list.append(i)
                        d1.update({key: food_interaction_list})
                else:
                    d1.update({key: y})

        elif key == 'drug-interactions' and value:
            key = key.replace('-', '_')
            for x, y in iter(value.items()):
                d1.update({key: y})

        elif key == 'sequences' and value:
            for x, y in iter(value.items()):
                for i in y:
                    if i == '@format':
                        str1 = y[i] + '_sequences'
                        d1[str1] = y['#text'].replace('\n', ' ')

        elif key == 'experimental-properties' and value:
            key = 'experimental_properties'

            def restr_properties_dict(dictionary):
                # Note: the side effect of this function sets a global variable
                for x, y in iter(dictionary.items()):
                    k1 = dictionary['kind']
                    k1 = k1.lower().replace(' ', '_').replace('-', '_')
                    if k1 == "isoelectric_point":
                        # make sure value are floats, if intervals, then list(float)
                        try:
                            exp_prop_dict[k1] = float(dictionary['value'])
                        except ValueError:
                            # not a float, maybe a range ? "5.6 - 7.6"
                            vals = dictionary['value'].split("-")
                            try:
                                for i, val in enumerate([v for v in vals]):
                                    vals[i] = float(val)
                                logging.info("Document ID '%s' has a range " % restr_dict["_id"] + \
                                             "as isoelectric_point: %s" % vals)
                                exp_prop_dict[k1] = vals
                            except ValueError as e:
                                # not something we can handle, skip it
                                logging.warning("Document ID '%s' has non-convertible " % restr_dict["_id"] + \
                                                " value for isoelectric_point, field ignored: %s" % dictionary['value'])
                                continue
                    else:
                        exp_prop_dict[k1] = dictionary['value']

            for ele in value:
                if isinstance(value[ele], list):
                    for _d in value[ele]:
                        restr_properties_dict(_d)

                elif isinstance(value[ele], dict) or isinstance(
                        value[ele], collections.OrderedDict):
                    restr_properties_dict(value[ele])

                else:
                    raise ValueError(
                        "Unexpted type for 'experimental-properties'")

        elif key == 'calculated-properties' and value:

            def restr_properties_dict(dictionary):
                for x in dictionary:
                    k = dictionary['kind']
                    k = k.lower().replace(' ', '_').replace('-', '_')
                    if dictionary['kind'] == "IUPAC Name":
                        d1.update({'iupac': dictionary['value']})
                    elif dictionary['kind'] == "Traditional IUPAC Name":
                        d1.update(
                            {'traditional_iupac_name': dictionary['value']})
                    elif dictionary['kind'] == "SMILES":
                        d1.update({'smiles': dictionary['value']})
                    elif dictionary['kind'] == "Molecular Formula":
                        d1.update({'formula': dictionary['value']})
                    elif dictionary['kind'] == "InChI":
                        d1.update({'inchi': dictionary['value']})
                    elif dictionary['kind'] == "InChIKey":
                        if dictionary['value'][0:9] == 'InChIKey=':
                            d1.update({'inchi_key': dictionary['value'][9:]})
                        else:
                            d1.update({'inchi_key': dictionary['value']})
                    elif dictionary['kind'] == "Molecular Weight":
                        d1.update({'weight': {'average': dictionary['value']}})
                    elif dictionary['kind'] == "Monoisotopic Weight":
                        d1['weight'].update(
                            {'monoisotopic': dictionary['value']})
                    else:
                        pred_properties_dict[k] = dictionary['value']

            for x, y in iter(value.items()):
                if isinstance(y, list):
                    for _d in y:
                        _d = restr_properties_dict(_d)

                if isinstance(y, dict) or isinstance(y,
                                                     collections.OrderedDict):
                    _d = restr_properties_dict(y)

        elif key == 'external-identifiers' and value:
            kegg_dict = {}
            for ele in value['external-identifier']:
                for x in ele:
                    if x == 'resource':
                        if ele[x] == "Drugs Product Database (DPD)":
                            xrefs_dict['dpd'] = ele['identifier']
                        elif ele[x] == "KEGG Drug":
                            kegg_dict['did'] = ele['identifier']
                            xrefs_dict['kegg'] = kegg_dict
                        elif ele[x] == "KEGG Compound":
                            kegg_dict['cid'] = ele['identifier']
                            xrefs_dict['kegg'] = kegg_dict
                        elif ele[x] == "PharmGKB":
                            xrefs_dict['pharmgkb'] = ele['identifier']
                        elif ele[x] == "Wikipedia":
                            wiki_dict = {'url_stub': ele['identifier']}
                            xrefs_dict['wikipedia'] = wiki_dict
                        elif ele[x] == "ChemSpider":
                            xrefs_dict['chemspider'] = ele['identifier']
                        elif ele[x] == "ChEBI":
                            xrefs_dict['chebi'] = 'CHEBI:' + str(
                                ele['identifier'])
                        elif ele[x] == "PubChem Compound":
                            xrefs_pubchem_dict['cid'] = ele['identifier']
                        elif ele[x] == "PubChem Substance":
                            xrefs_pubchem_dict['sid'] = ele['identifier']
                        else:
                            source = ele[x].lower().replace('-', '_').replace(
                                ' ', '_')
                            xrefs_dict[source] = ele['identifier']

        elif key == 'external-links' and value:
            if isinstance(value['external-link'], list):
                for ele in value['external-link']:
                    for x in ele:
                        try:
                            resource = ele['resource']
                            xrefs_dict[resource.lower().replace(
                                '.', '_')] = ele['url']
                        except:
                            pass
            else:
                try:
                    resource = ele['resource']
                    d1[resource.lower().replace('.', '_')] = ele['url']
                except:
                    pass

        elif key == 'patents' and value:
            if isinstance(value, dict):
                for x in value:
                    d1.update({key: value[x]})

        elif key == 'international-brands' and value:
            key = key.lower().replace('-', '_')
            d1.update({key: value['international-brand']})

        elif key == 'mixtures' and value:
            # only accept mixture entries that have more than 1 ingredient
            mixture_lst = []
            for mixture in value['mixture']:
                if isinstance(mixture, collections.OrderedDict):
                    if '+' in mixture['ingredients']:
                        ingredient_lst = mixture['ingredients'].split(" + ")
                        mixture['ingredients'] = ingredient_lst
                        mixture_lst.append(mixture)
            if mixture_lst:
                ### remove duplicates from the mixture_lst
                # convert 'ingredients' to tuples
                for m in mixture_lst:
                    m['ingredients'] = tuple(m['ingredients'])
                # remove duplicates by converting each dict to key-value tuple
                unique_mix_lst = [
                    dict(t) for t in {tuple(d.items())
                                      for d in mixture_lst}
                ]
                # convert 'ingredients' back to list
                for m in unique_mix_lst:
                    m['ingredients'] = list(m['ingredients'])

                # save results
                d1.update({key: unique_mix_lst})

        elif key == 'pathways' and value:
            _li = []

            def restr_pathway_dict(dictionary):
                _dict = {}
                for x, y in iter(dictionary.items()):
                    if x == 'smpdb-id':
                        _dict.update({'smpdb_id': y})
                    elif x == 'name':
                        _dict.update({x: y})
                    elif x == 'drugs':
                        pass
                    elif x == 'enzymes':
                        pass
                return _dict

            if isinstance(value['pathway'], list):
                for ele in value['pathway']:
                    _dict = restr_pathway_dict(ele)
                    _li.append(_dict)
                    d1.update({key: _li})

            elif isinstance(value['pathway'], dict) or isinstance(
                    value['pathway'], OrderedDict):
                _dict = restr_pathway_dict(value['pathway'])
                _li.append(_dict)
                d1.update({key: _li})

        elif key == 'targets' and value:
            if isinstance(value['target'], list):
                for dictionary in value['target']:
                    _dict = restr_protein_dict(dictionary)
                    targets_list.append(_dict)

            elif isinstance(value['target'], dict) or isinstance(
                    value['target'], OrderedDict):
                _dict = restr_protein_dict(value['target'])
                targets_list.append(_dict)

        elif key == 'enzymes' and value:
            if isinstance(value['enzyme'], list):
                for dictionary in value['enzyme']:
                    _dict = restr_protein_dict(dictionary)
                    enzymes_list.append(_dict)

            elif isinstance(value['enzyme'], dict) or isinstance(
                    value['enzyme'], OrderedDict):
                _dict = restr_protein_dict(value['enzyme'])
                enzymes_list.append(_dict)

        elif key == 'transporters' and value:
            if isinstance(value['transporter'], list):
                for dictionary in value['transporter']:
                    _dict = restr_protein_dict(dictionary)
                    transporters_list.append(_dict)

            elif isinstance(value['transporter'], dict) or isinstance(
                    value['transporter'], OrderedDict):
                _dict = restr_protein_dict(value['transporter'])
                transporters_list.append(_dict)

        elif key == 'carriers' and value:
            if isinstance(value['carrier'], list):
                for dictionary in value['carrier']:
                    _dict = restr_protein_dict(dictionary)
                    carriers_list.append(_dict)

            elif isinstance(value['carrier'], dict) or isinstance(
                    value['carrier'], OrderedDict):
                _dict = restr_protein_dict(value['carrier'])
                carriers_list.append(_dict)

        elif key == 'atc-codes' and value:

            def restr_atccode_dict(dictionary):
                for x in dictionary:
                    if x == '@code':
                        atccode_list.append(dictionary[x])
                return atccode_list

            if isinstance(value['atc-code'], list):
                for _d in value['atc-code']:
                    restr_atccode_dict(_d)

            elif isinstance(value['atc-code'], dict) or isinstance(
                    value['atc-code'], OrderedDict):
                restr_atccode_dict(value['atc-code'])

    xrefs_dict['atc_codes'] = atccode_list
    d1['targets'] = targets_list
    d1['carriers'] = carriers_list
    d1['enzymes'] = enzymes_list
    d1['transporters'] = transporters_list
    d1['predicted_properties'] = pred_properties_dict
    d1['experimental_properties'] = exp_prop_dict
    d1['products'] = products_list
    if xrefs_pubchem_dict:
        xrefs_dict['pubchem'] = xrefs_pubchem_dict
    d1['xrefs'] = xrefs_dict
    restr_dict['drugbank'] = d1
    restr_dict = unlist(restr_dict, [
        "drugbank.accession_number", "drugbank.id", "drugbank.chebi",
        "drugbank.inchi"
    ], [])
    restr_dict = boolean_convert(restr_dict, [
        "patents.pediatric-extension", "predicted_properties.mddr_like_rule",
        "predicted_properties.bioavailability",
        "predicted_properties.ghose_filter",
        "predicted_properties.rule_of_five", "products.generic",
        "products.otc", "products.approved", "products.pediatric-extension"
    ])
    # 'int' types
    restr_dict = int_convert(
        restr_dict,
        include_keys=[
            "drugbank.pharmacology.snp_adverse_drug_reactions.reaction.pubmed-id",
            "drugbank.pharmacology.snp_effects.effect.pubmed-id",
            "drugbank.predicted_properties.physiological_charge",
            "drugbank.predicted_properties.rotatable_bond_count",
            "drugbank.predicted_properties.h_bond_acceptor_count",
            "drugbank.predicted_properties.h_bond_donor_count",
            "drugbank.predicted_properties.number_of_rings",
            "drugbank.xrefs.guide_to_pharmacology", "drugbank.xrefs.iuphar"
        ])
    # 'float' types
    restr_dict = float_convert(
        restr_dict,
        include_keys=[
            "drugbank.experimental_properties.caco2_permeability",
            "drugbank.experimental_properties.molecular_weight",
            "drugbank.experimental_properties.hydrophobicity",
            "drugbank.weight.monoisotopic", "drugbank.weight.average",
            "drugbank.predicted_properties.molecular_weight",
            "drugbank.predicted_properties.monoisotopic_weight"
        ])
    # Mixed types coerced to floats
    restr_dict = float_convert(
        restr_dict,
        include_keys=[
            "drugbank.experimental_properties.logp",
            "drugbank.experimental_properties.logs",
            "drugbank.predicted_properties.logp",
            "drugbank.predicted_properties.logs",
            "drugbank.predicted_properties.pka_(strongest_basic)",
            "drugbank.predicted_properties.pka_(strongest_acidic)",
            "drugbank.predicted_properties.refractivity",
            "drugbank.predicted_properties.polarizability",
            "drugbank.predicted_properties.polar_surface_area_(psa)"
        ])
    restr_dict = dict_sweep(restr_dict,
                            vals=[
                                None, math.inf, "INF", ".", "-", "", "NA",
                                "none", " ", "Not Available", "unknown",
                                "null", "None"
                            ])
    return restr_dict