class DrugBankUploader(BaseDrugUploader):
    """
    DrugBankUploader - biothings uploader class for DrugBank
    """

    name = "drugbank"
    storage_class = storage.IgnoreDuplicatedStorage
    __metadata__ = {"src_meta": SRC_META}
    # See the comment on the ExcludeFieldsById for use of this class.
    exclude_fields = ExcludeFieldsById(exclusion_ids, [
        "drugbank.drug_interactions", "drugbank.products", "drugbank.mixtures"
    ])
    keylookup = MyChemKeyLookup(
        [
            ("inchikey", "drugbank.inchi_key"),
            ("drugbank", "drugbank.id"),
            # the following keys could possible be used to lookup 'inchikey' or 'unii'
            ("chebi", "drugbank.xrefs.chebi"),
            ("chembl", "drugbank.xrefs.chembl"),
            ("pubchem", "drugbank.xrefs.pubchem.cid"),
            ("inchi", "drugbank.inchi"),
            ("drugname",
             "drugbank.name"),  # can be used to lookup unii, disabled for now
        ],
        copy_from_doc=True)

    def load_data(self, data_folder):
        """load_data from data source"""
        xmlfiles = glob.glob(os.path.join(data_folder, "*.xml"))
        if not xmlfiles:
            self.logger.info("Unzipping drugbank archive")
            unzipall(data_folder)
            self.logger.info("Load data from '%s'" % data_folder)
            xmlfiles = glob.glob(os.path.join(data_folder, "*.xml"))
        assert len(
            xmlfiles) == 1, "Expecting one xml file, got %s" % repr(xmlfiles)
        input_file = xmlfiles.pop()
        assert os.path.exists(
            input_file), "Can't find input file '%s'" % input_file
        return self.exclude_fields(self.keylookup(load_data,
                                                  debug=True))(input_file)

    def post_update_data(self, *args, **kwargs):
        # pylint: disable=W0613
        """create indexes following upload"""
        for idxname in ["drugbank.id", "drugbank.chebi", "drugbank.inchi"]:
            self.logger.info("Indexing '%s'" % idxname)
            # background=true or it'll lock the whole database...
            self.collection.create_index([(idxname, pymongo.HASHED)],
                                         background=True)
        # hashed index won"t support arrays, values are small enough to standard
        self.collection.create_index("drugbank.products.ndc_product_code")

    @classmethod
    def get_mapping(cls):
        """return mapping information for drugbank"""
        return drugbank_mapping
Example #2
0
class NDCUploader(BaseDrugUploader):
    """
    NDCUploader - Biothings Uploader class for NDC
    """
    name = "ndc"
    storage_class = (storage.RootKeyMergerStorage, storage.CheckSizeStorage)
    __metadata__ = {"src_meta" : SRC_META}
    keylookup = MyChemKeyLookup(
        [("ndc", "ndc.productndc"),
         ("drugname", "ndc.nonproprietaryname")])
    # See the comment on the ExcludeFieldsById for use of this class.
    exclude_fields = ExcludeFieldsById(exclusion_ids, ["ndc"])

    def load_data(self, data_folder):
        """load data from the data source"""
        return self.exclude_fields(self.keylookup(load_data))(data_folder)

    @classmethod
    def get_mapping(cls):
        """return mapping data for the class"""
        mapping = {
            "ndc" : {
                "properties" : {
                    "product_id" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "productndc" : {
                        "type" : "text"
                        },
                    "producttypename" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "proprietaryname" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "proprietarynamesuffix" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "nonproprietaryname" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "dosageformname" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "routename" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "startmarketingdate" : {
                        "type" : "text"
                        },
                    "endmarketingdate" : {
                        "type" : "text"
                        },
                    "marketingcategoryname" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "applicationnumber" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "labelername" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "substancename" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        "copy_to": ["all"]
                        },
                    "active_numerator_strength" : {
                        "type" : "text"
                        },
                    "active_ingred_unit" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "pharm_classes" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "deaschedule" : {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "package" : {
                        "properties" : {
                            "packagedescription" : {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                                },
                            "ndcpackagecode" : {
                                "type" : "text"
                                }
                            }
                        }
                    }
                }
            }

        return mapping
Example #3
0
class ChebiUploader(BaseDrugUploader):

    name = "chebi"
    #storage_class = storage.IgnoreDuplicatedStorage
    storage_class = storage.RootKeyMergerStorage
    __metadata__ = {"src_meta": SRC_META}
    keylookup = MyChemKeyLookup([
        ('inchikey', 'chebi.inchikey'),
        ('drugbank', 'chebi.xrefs.drugbank'),
        ('chebi', 'chebi.id'),
    ],
                                copy_from_doc=True)
    # See the comment on the ExcludeFieldsById for use of this class.
    exclude_fields = ExcludeFieldsById(exclusion_ids, [
        "chebi.xrefs.intenz",
        "chebi.xrefs.rhea",
        "chebi.xrefs.uniprot",
        "chebi.xrefs.sabio_rk",
        "chebi.xrefs.patent",
    ])

    def load_data(self, data_folder):
        self.logger.info("Load data from '%s'" % data_folder)
        input_file = os.path.join(data_folder, "ChEBI_complete.sdf")
        # get others source collection for inchi key conversion
        drugbank_col = get_src_db()["drugbank"]
        assert drugbank_col.count() > 0, "'drugbank' collection is empty (required for inchikey " + \
                "conversion). Please run 'drugbank' uploader first"
        chembl_col = get_src_db()["chembl"]
        assert chembl_col.count() > 0, "'chembl' collection is empty (required for inchikey " + \
                "conversion). Please run 'chembl' uploader first"
        assert os.path.exists(
            input_file), "Can't find input file '%s'" % input_file
        # KeyLookup is disabled due to duplicate key errors
        # return self.exclude_fields(self.keylookup(load_data, debug=True))(input_file)
        return self.exclude_fields(load_data)(input_file)

    def post_update_data(self, *args, **kwargs):
        for idxname in ["chebi.id"]:
            self.logger.info("Indexing '%s'" % idxname)
            # background=true or it'll lock the whole database...
            self.collection.create_index([(idxname, pymongo.ASCENDING)],
                                         background=True)

    @classmethod
    def get_mapping(klass):
        mapping = {
            "chebi": {
                "properties": {
                    "brand_names": {
                        "type": "text",
                        'copy_to': ['all'],
                    },
                    "id": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        'copy_to': ['all'],
                    },
                    "iupac": {
                        "type": "text"
                    },
                    "inchi": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "definition": {
                        "type": "text"
                    },
                    "star": {
                        "type": "integer"
                    },
                    "smiles": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "last_modified": {
                        "type": "text"
                    },
                    "inn": {
                        "type": "text"
                    },
                    "xrefs": {
                        "properties": {
                            "molbase": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "resid": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "come": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "pubchem": {
                                "properties": {
                                    "sid": {
                                        "type": "integer"
                                    },
                                    "cid": {
                                        "type": "integer"
                                    }
                                }
                            },
                            "beilstein": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "wikipedia": {
                                "properties": {
                                    "url_stub": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "metacyc": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "biomodels": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "reactome": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "um_bbd_compid": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "lincs": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "uniprot": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "sabio_rk": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "patent": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "pdbechem": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "arrayexpress": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "cas": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "lipid_maps_class": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "kegg_drug": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "knapsack": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "lipid_maps_instance": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "intenz": {
                                "type": "text"
                            },
                            "kegg_glycan": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "ecmdb": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "hmdb": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "kegg_compound": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "ymdb": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "drugbank": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "rhea": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "gmelin": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "intact": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            }
                        }
                    },
                    "monoisotopic_mass": {
                        "type": "float"
                    },
                    "mass": {
                        "type": "float"
                    },
                    "secondary_chebi_id": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        'copy_to': ['all'],
                    },
                    "formulae": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "inchikey": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "name": {
                        "type": "text",
                        'copy_to': ['all'],
                    },
                    "charge": {
                        "type": "integer"
                    },
                    "synonyms": {
                        "type": "text"
                    },
                    "citation": {
                        "properties": {
                            "pubmed": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "agricola": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "pmc": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "chinese_abstracts": {
                                "type": "integer"
                            },
                            "citexplore": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            }
                        }
                    }
                }
            }
        }
        return mapping
Example #4
0
class ChemblUploader(BaseDrugUploader, ParallelizedSourceUploader):
    """
    ChemblUploader - upload the Chembl data source
    """

    name = "chembl"
    storage_class = storage.RootKeyMergerStorage
    __metadata__ = {"src_meta" : SRC_META}

    MOLECULE_PATTERN = "molecule.*.json"
    keylookup = MyChemKeyLookup(
        [("inchikey", "chembl.inchi_key"),
         ("inchi", "chembl.inchi"),
         ("chembl", "chembl.molecule_chembl_id"),
         ("chebi", "chembl.chebi_par_id"),
         ("drugcentral", "chembl.xrefs.drugcentral.id"),
         ("drugname", "chembl.pref_name")],
        # TODO:  handle duplicate keys from pubchem
        # - we use RootKeyMergerStorage, but the num. duplicates
        # - is too high (>10000)
        # ("pubchem", "chembl.xrefs.pubchem.sid"),
        copy_from_doc=True)

    def jobs(self):
        """
        this will generate arguments for self.load.data() method, allowing parallelization
        """
        json_files = glob.glob(os.path.join(self.data_folder, self.__class__.MOLECULE_PATTERN))
        return [(f,) for f in json_files]

    def load_data(self, input_file):
        """load data from an input file"""
        self.logger.info("Load data from file '%s'" % input_file)
        return self.keylookup(load_data, debug=True)(input_file)

    def post_update_data(self, *args, **kwargs):
        """create indexes following an update"""
        # pylint: disable=W0613
        """
        for idxname in ["chembl.chebi_par_id", "chembl.inchi", "chembl.molecule_chembl_id"]:
            self.logger.info("Indexing '%s'" % idxname)
            # background=true or it'll lock the whole database...
            self.collection.create_index(idxname, background=True)
        """
        for idxname in ["chembl.chebi_par_id", "chembl.molecule_chembl_id"]:
            self.logger.info("Indexing '%s'" % idxname)
            # background=true or it'll lock the whole database...
            self.collection.create_index(idxname, background=True)

    @classmethod
    def get_mapping(cls):
        """return mapping data"""
        mapping = {
            "chembl": {
                "properties": {
                    "biotherapeutic": {
                        "properties": {
                            "helm_notation": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                                },
                            "description": {
                                "type": "text"
                                },
                            "biocomponents": {
                                "properties": {
                                    "organism": {
                                        "type": "text"
                                        },
                                    "tax_id": {
                                        "type": "integer"
                                        },
                                    "sequence": {
                                        "type": "text"
                                        },
                                    "component_id": {
                                        "type": "integer"
                                        },
                                    "description": {
                                        "type": "text"
                                        },
                                    "component_type": {
                                        "normalizer": "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                        }
                                    }
                                },
                            "molecule_chembl_id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                                'copy_to': ['all'],
                                }
                            }
                        },
                    "therapeutic_flag": {
                        "type": "boolean"
                        },
                    "usan_stem": {
                        "type": "text"
                        },
                    "molecule_chembl_id": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "molecule_properties": {
                        "properties": {
                            "heavy_atoms": {
                                "type": "integer"
                                },
                            "acd_most_bpka": {
                                "type": "float"
                                },
                            "mw_freebase": {
                                "type": "float"
                                },
                            "num_ro5_violations": {
                                "type": "integer"
                                },
                            "molecular_species": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                                },
                            "qed_weighted": {
                                "type": "float"
                                },
                            "ro3_pass": {
                                "type": "boolean"
                                },
                            "full_mwt": {
                                "type": "float"
                                },
                            "num_lipinski_ro5_violations": {
                                "type": "integer"
                                },
                            "rtb": {
                                "type": "integer"
                                },
                            "psa": {
                                "type": "float"
                                },
                            "alogp": {
                                "type": "float"
                                },
                            "hbd": {
                                "type": "integer"
                                },
                            "acd_most_apka": {
                                "type": "float"
                                },
                            "hbd_lipinski": {
                                "type": "integer"
                                },
                            "acd_logp": {
                                "type": "float"
                                },
                            "full_molformula": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                                },
                            "aromatic_rings": {
                                "type": "integer"
                                },
                            "hba_lipinski": {
                                "type": "integer"
                                },
                            "mw_monoisotopic": {
                                "type": "float"
                                },
                            "hba": {
                                "type": "integer"
                                },
                            "acd_logd": {
                                "type": "float"
                                }
                            }
                    },
                    "helm_notation": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "max_phase": {
                        "type": "integer"
                        },
                    "inorganic_flag": {
                        "type": "integer"
                        },
                    "usan_stem_definition": {
                        "type": "text"
                        },
                    "dosed_ingredient": {
                        "type": "boolean"
                        },
                    "chebi_par_id": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "withdrawn_reason": {
                        "type": "text"
                        },
                    "molecule_hierarchy": {
                        "properties": {
                            "parent_chembl_id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                                },
                            "molecule_chembl_id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                                }
                            }
                        },
                    "prodrug": {
                        "type": "integer"
                        },
                    "withdrawn_flag": {
                        "type": "boolean"
                        },
                    "usan_year": {
                        "type": "integer"
                        },
                    "parenteral": {
                        "type": "boolean"
                        },
                    "black_box_warning": {
                        "type": "integer"
                        },
                    "polymer_flag": {
                        "type": "boolean"
                        },
                    "molecule_synonyms": {
                        "properties": {
                            "molecule_synonym": {
                                "type": "text"
                                },
                            "synonyms": {
                                "type": "text"
                                },
                            "syn_type": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                                }
                            }
                        },
                    "atc_classifications": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "molecule_type": {
                        "type": "text"
                        },
                    "first_in_class": {
                        "type": "integer"
                        },
                    "inchi": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "structure_type": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "withdrawn_class": {
                        "type": "text"
                        },
                    "inchi_key": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "topical": {
                        "type": "boolean"
                        },
                    "oral": {
                        "type": "boolean"
                        },
                    "xrefs": {
                        "properties": {
                            "drugcentral": {
                                "properties": {
                                    "id": {
                                        "type": "integer"
                                        },
                                    "name": {
                                        "type": "text"
                                        }
                                    }
                                },
                            "tg-gates": {
                                "properties": {
                                    "id": {
                                        "type": "integer"
                                        },
                                    "name": {
                                        "type": "text"
                                        }
                                    }
                                },
                            "wikipedia": {
                                "properties": {
                                    "url_stub": {
                                        "normalizer": "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                        }
                                    }
                                },
                            "dailymed": {
                                "properties": {
                                    "name": {
                                        "type": "text"
                                        }
                                    }
                                },
                            "pubchem": {
                                "properties": {
                                    "sid": {
                                        "type": "integer"
                                        }
                                    }
                                }
                            }
                        },
                    "chirality": {
                        "type": "integer"
                        },
                    "usan_substem": {
                        "type": "text"
                        },
                    "indication_class": {
                        "type": "text"
                        },
                    "withdrawn_country": {
                        "type": "text"
                        },
                    "withdrawn_year": {
                        "type": "integer"
                        },
                    "availability_type": {
                        "type": "integer"
                        },
                    "smiles": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        },
                    "natural_product": {
                        "type": "integer"
                        },
                    "pref_name": {
                        "type": "text",
                        "copy_to": ["all"]
                        },
                    "first_approval": {
                        "type": "integer"
                        }
                    }
            }
        }

        return mapping
Example #5
0
class NDCUploader(BaseDrugUploader):

    name = "ndc"
    storage_class = (storage.BasicStorage, storage.CheckSizeStorage)
    __metadata__ = {"src_meta": SRC_META}
    keylookup = MyChemKeyLookup([("ndc", "ndc.productndc")])
    # See the comment on the ExcludeFieldsById for use of this class.
    exclude_fields = ExcludeFieldsById(exclusion_ids, ["ndc"])

    def load_data(self, data_folder):
        docs = self.exclude_fields(self.keylookup(load_data))(data_folder)
        inchi_key = {}
        for doc in docs:
            # IK found, but other productndc could also match the same
            # IK so we keep them in a list
            if type(doc["ndc"]) == list:
                inchi_key.setdefault(doc["_id"], doc["ndc"])
            else:
                if not doc["ndc"] in inchi_key.setdefault(doc["_id"], []):
                    inchi_key.setdefault(doc["_id"], []).append(doc["ndc"])
        l = []
        for ik, ndc in inchi_key.items():
            if len(ndc) == 1:
                ndc = ndc.pop()
            yield {"_id": ik, "ndc": ndc}

    @classmethod
    def get_mapping(klass):
        mapping = {
            "ndc": {
                "properties": {
                    "product_id": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "productndc": {
                        "type": "text"
                    },
                    "producttypename": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "proprietaryname": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "proprietarynamesuffix": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "nonproprietaryname": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "dosageformname": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "routename": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "startmarketingdate": {
                        "type": "text"
                    },
                    "endmarketingdate": {
                        "type": "text"
                    },
                    "marketingcategoryname": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "applicationnumber": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "labelername": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "substancename": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        "copy_to": ["all"]
                    },
                    "active_numerator_strength": {
                        "type": "text"
                    },
                    "active_ingred_unit": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "pharm_classes": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "deaschedule": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "package": {
                        "properties": {
                            "packagedescription": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "ndcpackagecode": {
                                "type": "text"
                            }
                        }
                    }
                }
            }
        }

        return mapping
Example #6
0
class UniiUploader(BaseDrugUploader):

    name = "unii"
    storage_class = storage.IgnoreDuplicatedStorage
    __metadata__ = {"src_meta": SRC_META}

    keylookup = MyChemKeyLookup(
        [('inchikey', 'unii.inchikey'), ('pubchem', 'unii.pubchem'),
         ('unii', 'unii.unii')],
        copy_from_doc=True,
    )

    def load_data(self, data_folder):
        self.logger.info("Load data from '%s'" % data_folder)
        record_files = glob.glob(os.path.join(data_folder, "*Records*.txt"))
        assert len(
            record_files
        ) == 1, "Expecting one record.txt file, got %s" % repr(record_files)
        input_file = record_files.pop()
        assert os.path.exists(
            input_file), "Can't find input file '%s'" % input_file
        # disable keylookup - unii is a base collection used for drugname lookup
        # and should be loaded first, (keylookup commented out)
        # return self.keylookup(load_data)(input_file)
        return load_data(input_file)

    def post_update_data(self, *args, **kwargs):
        for field in ("unii.unii", "unii.preferred_term"):
            self.logger.info("Indexing '%s'" % field)
            self.collection.create_index(field, background=True)

    @classmethod
    def get_mapping(klass):
        mapping = {
            "unii": {
                "properties": {
                    "unii": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        'copy_to': ['all'],
                    },
                    "preferred_term": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "registry_number": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "ec": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "ncit": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "rxcui": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "itis": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "ncbi": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "plants": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "grin": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "inn_id": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "molecular_formula": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "inchikey": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "smiles": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "unii_type": {
                        "type": "text"
                    },
                }
            }
        }

        return mapping
Example #7
0
class AeolusUploader(BaseDrugUploader):

    storage_class = storage.RootKeyMergerStorage
    name = "aeolus"
    __metadata__ = {
        "src_meta": {
            "url": "http://www.nature.com/articles/sdata201626",
            "license_url":
            "http://datadryad.org/resource/doi:10.5061/dryad.8q0s4",
            "license_url_short": "http://bit.ly/2DIxWwF",
            "license": "CC0 1.0"
        }
    }

    keylookup = MyChemKeyLookup([('inchikey', 'aeolus.inchikey'),
                                 ('unii', 'aeolus.unii'),
                                 ('drugname', 'aeolus.drug_name')],
                                copy_from_doc=True)

    def load_data(self, data_folder):
        # read data from the source collection
        src_col = self.db[self.src_col_name]

        def load_data():
            yield from src_col.find()

        # perform keylookup on source collection
        return self.keylookup(load_data, debug=True)()

    @classmethod
    def get_mapping(klass):
        mapping = {
            "aeolus": {
                "properties": {
                    "drug_id": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "drug_name": {
                        "type": "text",
                        "copy_to": ["all"]
                    },
                    "inchikey": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "no_of_outcomes": {
                        "type": "integer",
                    },
                    "pt": {
                        "type": "text",
                    },
                    "unii": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "drug_vocab": {
                        "type": "text"
                    },
                    "drug_code": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "rxcui": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "relationships": {
                        "properties": {
                            "relatedSubstance": {
                                "properties": {
                                    "approvalID": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "refPname": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "type": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            }
                        }
                    },
                    "outcomes": {
                        "properties": {
                            "meddra_code": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "case_count": {
                                "type": "long"
                            },
                            "id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword"
                            },
                            "name": {
                                "type": "text"
                            },
                            "prr": {
                                "type": "float"
                            },
                            "prr_95_ci": {
                                "type": "float"
                            },
                            "ror": {
                                "type": "float"
                            },
                            "ror_95_ci": {
                                "type": "float"
                            }
                        }
                    }
                }
            }
        }

        return mapping
class PharmGkbUploader(BaseDrugUploader):
    """
    PharmGKB Uploader Class
    """

    name = "pharmgkb"
    storage_class = storage.RootKeyMergerStorage
    __metadata__ = {"src_meta": SRC_META}
    keylookup = MyChemKeyLookup([('inchi', 'pharmgkb.inchi'),
                                 ('pubchem', 'pharmgkb.xrefs.pubchem.cid'),
                                 ('drugbank', 'pharmgkb.xrefs.drugbank'),
                                 ('chebi', 'pharmgkb.xrefs.chebi')])

    def load_data(self, data_folder):
        """load_data method"""
        self.logger.info("Load data from '%s'" % data_folder)
        input_file = os.path.join(data_folder, "drugs.tsv")
        assert os.path.exists(
            input_file), "Can't find input file '%s'" % input_file
        return self.keylookup(load_data)(input_file)

    def post_update_data(self, *args, **kwargs):
        field = "pharmgkb.id"
        self.logger.info("Indexing '%s'" % field)
        self.collection.create_index(field, background=True)

    @classmethod
    def get_mapping(cls):
        """get mapping information"""
        mapping = {
            "pharmgkb": {
                "properties": {
                    "id": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                        'copy_to': ['all'],
                    },
                    "dosing_guideline": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "inchi": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "name": {
                        "type": "text",
                        'copy_to': ['all'],
                    },
                    "smiles": {
                        "normalizer": "keyword_lowercase_normalizer",
                        "type": "keyword",
                    },
                    "generic_names": {
                        "type": "text",
                        'copy_to': ['all'],
                    },
                    "brand_mixtures": {
                        "type": "text"
                    },
                    "trade_names": {
                        "type": "text"
                    },
                    "type": {
                        "type": "text"
                    },
                    "xrefs": {
                        "properties": {
                            "web_resource": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "uniprotkb": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "pubchem": {
                                "properties": {
                                    "sid": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "cid": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "het": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "wikipedia": {
                                "properties": {
                                    "url_stub": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "iuphar_ligand": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "meddra": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "atc": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "kegg_compound": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "umls": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "clinicaltrials": {
                                "properties": {
                                    "gov": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "genbank": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "rxnorm": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "chebi": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "cas": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "ttd": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "kegg_drug": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "mesh": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "ndc": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "chemspider": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "hmdb": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "dailymed": {
                                "properties": {
                                    "setid": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "ndfrt": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "bindingdb": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "drugbank": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "pdb": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "dpd": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            }
                        }
                    }
                }
            }
        }
        return mapping
Example #9
0
class SiderUploader(BaseDrugUploader):

    name = "sider"
    #storage_class = storage.IgnoreDuplicatedStorage
    __metadata__ = {"src_meta": SRC_META}
    keylookup = MyChemKeyLookup([("pubchem", "_id")],
                                idstruct_class=SiderIDStruct)
    max_lst_size = 2000

    def load_data(self, data_folder):
        input_file = os.path.join(data_folder,
                                  "merged_freq_all_se_indications.tsv")
        self.logger.info("Load data from file '%s'" % input_file)
        docs = self.keylookup(load_data)(input_file)
        for doc in docs:
            # sort the 'sider' list by "sider.side_effect.frequency" and "sider.side_effect.name"
            doc['sider'] = sorted(doc['sider'], key=lambda x: sort_key(x))
            # take at most self.max_lst_size elements from the 'sider' field
            # See the 'truncated_docs.tsv' file for a list of ids that are affected
            if len(doc['sider']) > self.max_lst_size:
                doc['sider'] = doc['sider'][:self.max_lst_size]

            yield doc

    @classmethod
    def get_mapping(klass):
        mapping = {
            "sider": {
                "properties": {
                    "stitch": {
                        "properties": {
                            "flat": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "stereo": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            }
                        }
                    },
                    "indication": {
                        "properties": {
                            "method_of_detection": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "name": {
                                "type": "text"
                            }
                        }
                    },
                    "meddra": {
                        "properties": {
                            "type": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "umls_id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            }
                        }
                    },
                    "side_effect": {
                        "properties": {
                            "frequency": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "placebo": {
                                "type": "boolean"
                            },
                            "name": {
                                "type": "text"
                            }
                        }
                    }
                }
            }
        }
        return mapping
class DrugCentralUploader(BaseDrugUploader):

    name = "drugcentral"

    __metadata__ = {
        "src_meta": {
            "url": "http://drugcentral.org/",
            "license_url": "http://drugcentral.org/privacy",
            "license_url_short": "http://bit.ly/2SeEhUy",
            "license": "CC BY-SA 4.0",
        }
    }

    keylookup = MyChemKeyLookup(
        [
            ('inchikey', 'drugcentral.structures.inchikey'),
            ('unii', 'drugcentral.xref.unii'),
            # other keys are present but not currently used by keylookup
            ('inchi', 'drugcentral.structures.inchi'),
            ('drugbank', 'drugcentral.xrefs.drugbank_id'),
            ('chebi', 'drugcentral.xrefs.chebi'),
            ('chembl', 'drugcentral.xrefs.chembl_id'),
            ('pubchem', 'drugcentral.xrefs.pubchem_cid')
        ],
        # ('drugname', 'drugcentral.synonyms')], # unhashable type - list
        copy_from_doc=True,
    )

    def load_data(self, data_folder):
        # read data from the source collection
        src_col = self.db[self.src_col_name]

        def load_data():
            yield from src_col.find()

        # perform keylookup on source collection
        return self.keylookup(load_data)()

    @classmethod
    def get_mapping(klass):
        mapping = {
            "drugcentral": {
                "properties": {
                    "structures": {
                        "properties": {
                            "smiles": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "cas_rn": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "inn": {
                                "type": "text",
                                'copy_to': ['all'],
                            },
                            "inchi": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "inchikey": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            }
                        }
                    },
                    "fda_adverse_event": {
                        "properties": {
                            "llr": {
                                "type": "float"
                            },
                            "meddra_term": {
                                "type": "text"
                            },
                            "llr_threshold": {
                                "type": "float"
                            },
                            "drug_ae": {
                                "type": "integer"
                            },
                            "level": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "drug_no_ae": {
                                "type": "integer"
                            },
                            "no_drug_no_ar": {
                                "type": "integer"
                            },
                            "meddra_code": {
                                "type": "integer"
                            },
                            "no_drug_ae": {
                                "type": "integer"
                            }
                        }
                    },
                    "drug_dosage": {
                        "properties": {
                            "unit": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "route": {
                                "type": "text"
                            },
                            "dosage": {
                                "type": "float"
                            }
                        }
                    },
                    "pharmacology_class": {
                        "properties": {
                            "chebi": {
                                "properties": {
                                    "description": {
                                        "type": "text"
                                    },
                                    "code": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "fda_epc": {
                                "properties": {
                                    "description": {
                                        "type": "text"
                                    },
                                    "code": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "fda_pe": {
                                "properties": {
                                    "description": {
                                        "type": "text"
                                    },
                                    "code": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "fda_chemical/ingredient": {
                                "properties": {
                                    "description": {
                                        "type": "text"
                                    },
                                    "code": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "fda_moa": {
                                "properties": {
                                    "description": {
                                        "type": "text"
                                    },
                                    "code": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            },
                            "mesh_pa": {
                                "properties": {
                                    "description": {
                                        "type": "text"
                                    },
                                    "code": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    }
                                }
                            }
                        }
                    },
                    "approval": {
                        "properties": {
                            "agency": {
                                "type": "text"
                            },
                            "date": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "company": {
                                "type": "text"
                            },
                            "orphan": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            }
                        }
                    },
                    "drug_use": {
                        "properties": {
                            "reduce risk": {
                                "properties": {
                                    "snomed_full_name": {
                                        "type": "text"
                                    },
                                    "cui_semantic_type": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "concept_name": {
                                        "type": "text"
                                    },
                                    "umls_cui": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "snomed_concept_id": {
                                        "type": "long"
                                    }
                                }
                            },
                            "indication": {
                                "properties": {
                                    "snomed_full_name": {
                                        "type": "text"
                                    },
                                    "cui_semantic_type": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "concept_name": {
                                        "type": "text"
                                    },
                                    "umls_cui": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "snomed_concept_id": {
                                        "type": "long"
                                    }
                                }
                            },
                            "contraindication": {
                                "properties": {
                                    "snomed_full_name": {
                                        "type": "text"
                                    },
                                    "cui_semantic_type": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "concept_name": {
                                        "type": "text"
                                    },
                                    "umls_cui": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "snomed_concept_id": {
                                        "type": "long"
                                    }
                                }
                            },
                            "symptomatic treatment": {
                                "properties": {
                                    "snomed_full_name": {
                                        "type": "text"
                                    },
                                    "cui_semantic_type": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "concept_name": {
                                        "type": "text"
                                    },
                                    "umls_cui": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "snomed_concept_id": {
                                        "type": "long"
                                    }
                                }
                            },
                            "off-label use": {
                                "properties": {
                                    "snomed_full_name": {
                                        "type": "text"
                                    },
                                    "cui_semantic_type": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "concept_name": {
                                        "type": "text"
                                    },
                                    "umls_cui": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "snomed_concept_id": {
                                        "type": "long"
                                    }
                                }
                            },
                            "diagnosis": {
                                "properties": {
                                    "snomed_full_name": {
                                        "type": "text"
                                    },
                                    "cui_semantic_type": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "concept_name": {
                                        "type": "text"
                                    },
                                    "umls_cui": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "snomed_concept_id": {
                                        "type": "long"
                                    }
                                }
                            }
                        }
                    },
                    "bioactivity": {
                        "properties": {
                            "organism": {
                                "type": "text"
                            },
                            "target_class": {
                                "type": "text"
                            },
                            "action_type": {
                                "type": "text"
                            },
                            "moa": {
                                "type": "float"
                            },
                            "target_name": {
                                "type": "text"
                            },
                            "act_type": {
                                "type": "text"
                            },
                            "moa_source": {
                                "type": "text"
                            },
                            "uniprot": {
                                "properties": {
                                    "uniprot_id": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "swissprot_entry": {
                                        "normalizer":
                                        "keyword_lowercase_normalizer",
                                        "type": "keyword",
                                    },
                                    "gene_symbol": {
                                        "type": "text"
                                    }
                                }
                            },
                            "act_source": {
                                "type": "text"
                            },
                            "act_value": {
                                "type": "float"
                            }
                        }
                    },
                    "synonyms": {
                        "type": "text",
                        'copy_to': ['all'],
                    },
                    "xrefs": {
                        "properties": {
                            "pubchem_cid": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "nui": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "nddf": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "pdb_chem_id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "kegg_drug": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "secondary_cas_rn": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "vandf": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "ndfrt": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "chembl_id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "drugbank_id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "inn_id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "mmsl": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "snomedct_us": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "mesh_supplemental_record_ui": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "unii": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "umlscui": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "chebi": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "mesh_descriptor_ui": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "vuid": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "iuphar_ligand_id": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            },
                            "rxnorm": {
                                "normalizer": "keyword_lowercase_normalizer",
                                "type": "keyword",
                            }
                        }
                    }
                }
            }
        }

        return mapping