Ejemplo n.º 1
0
def find_predecessor(id,classes = None):
    """
    Go up one level in the ChEBI ontology. If classes are provided,
    then the ChEBI ID's at the current level are checked for a potential classification.
    """
    category = (None,None)

    primary = get_primary(id)

    if primary in classes:
        return (get_substrate_name(primary),primary)

    primary_chebi = libchebipy.ChebiEntity(primary)
    predecessor = primary_chebi.get_outgoings()

    while len(predecessor) != 0 :

        x = predecessor.pop(0)

        if x.get_type() == 'is_a':

            target_id = get_primary(x.get_target_chebi_id())

            if target_id in classes:
                return (get_substrate_name(target_id),target_id)

            target_chebi = libchebipy.ChebiEntity(target_id)

            predecessor += target_chebi.get_outgoings()

    return category
Ejemplo n.º 2
0
    def getCommonNameForChebi(self):
        '''
        Get common name from chebi API,
        This function is called inside self.getEverything
        '''
        for key in self.metaboliteIDDictionary:
            value = self.metaboliteIDDictionary[key]
            chebi = value["chebi_id"]
            name = "NA"
            for each in chebi:
                try:
                    chebiToSearch = str(each)
                    chebiToSearch2 = libchebipy.ChebiEntity(chebiToSearch)
                    name = chebiToSearch2.get_name()
                    #print(chebiToSearch)
                    #print(name)
                    #time.sleep(3)
                except:
                    pass
            if key not in self.metabolitesWithSynonymsDictionary:
                self.metabolitesWithSynonymsDictionary[key] = [name]
            else:
                self.metabolitesWithSynonymsDictionary[key].append(name)     
          

                
Ejemplo n.º 3
0
 def getCommonNameForChebi(self):
     
     for key in self.metaboliteIDDictionary:
         
         value = self.metaboliteIDDictionary[key]
         chebi = value["chebi_id"]
         name = "NA"
         commonName = None
         for each in chebi:
             try:
                 chebiToSearch = str(each)
                 chebiToSearch2 = libchebipy.ChebiEntity(chebiToSearch)
                 name = chebiToSearch2.get_name()
                 commonName = name
                 
                 
                 #time.sleep(1)
                 if commonName is not None:
                     self.metaboliteCommonName[each] = commonName
                 else:
                     self.metaboliteCommonName[each] = "NA" 
             except:
                 pass
         self.metabolitesWithSynonymsDictionary[key] = [name]  
         if commonName is not None:
             self.metaboliteCommonName[key] = commonName
         else:
             self.metaboliteCommonName[key] = "NA" 
Ejemplo n.º 4
0
def get_substrate_name(id):
    """
    Get the name associated with a ChEBI ID
    """
    id = libchebipy.ChebiEntity(id)

    return id.get_name()
Ejemplo n.º 5
0
def find_role(id,classes=None):
    """
    Performs an identical function to find_predecessor, but traverses role
    ontology, rather than chemical entity ontology.
    """
    role = (None,None)
    primary = get_primary(id)

    if primary in classes:
        return (get_substrate_name(primary),primary)

    primary_chebi = libchebipy.ChebiEntity(id)
    predecessor = primary_chebi.get_outgoings()

    for pre in predecessor:

        if pre.get_type() =='has_role':

            target_id = get_primary(pre.get_target_chebi_id())

            if target_id in classes:
                return (get_substrate_name(target_id),target_id)

            role = find_predecessor(target_id,classes=classes)

    return role
Ejemplo n.º 6
0
def chebi2smiles(chebi):
    chebi_entity = libchebipy.ChebiEntity(chebi)
    inchi = chebi_entity.get_inchi()
    if inchi is None:
        print 'No InChI:', inchi
        return False
    else:
        print 'Found InChI:', inchi
        return inchi2smiles(inchi)
Ejemplo n.º 7
0
def get_primary(id):
    """
    Get the primary ChEBI ID, since the primary ID is not guaranteed to be the
    ID that is reported.
    """
    chebi_id = libchebipy.ChebiEntity(id)
    primary = chebi_id.get_parent_id()

    if primary == None:

        primary = id

    return primary
Ejemplo n.º 8
0
    def getCommonNameForChebi(self):

        for key in self.metaboliteIDDictionary:
            value = self.metaboliteIDDictionary[key]
            chebi = value["chebi_id"]
            name = "NA"
            for each in chebi:
                try:
                    chebiToSearch = str(each)
                    chebiToSearch2 = libchebipy.ChebiEntity(chebiToSearch)
                    name = chebiToSearch2.get_name()
                    print(chebiToSearch)
                    print(name)
                    #time.sleep(3)
                except:
                    pass
            self.metabolitesWithSynonymsDictionary[key] = [name]
Ejemplo n.º 9
0
def map_all_metabs_to_chebi_ids(metabs, signif_no_chebi_ids, output_data_dir):
    """
    Maps a list of metabolites to their ChEBI IDs, if available.
    See: https://github.com/libChEBI/libChEBIpy/blob/master/libchebipy/_chebi_entity.py


    Returns:
        exact_matches.tsv:
            2 col .tsv with provided metabolite name and matched CHEBI ID
                i.e.
                    malate	CHEBI:25115

        all_close_matches.tsv:
            2 col .tsv with provided metabolite name and dictionaries of
            possible matching CHEBI names and IDs for all metabolites without exact matches
                i.e.
                    GSH	[{'Gsh-prostaglandin A1': '5548'}, {'S-Decyl GSH': '8955'}]

        signif_close_matches.tsv: 2 col .tsv
            same format as all_close_matches.tsv but includes only significant metabolites
            with no exact CHEBI id matches

    """
    names_map = {}
    no_exact_match = {}
    signif_no_exact_match = {}
    for name in metabs:
        chebi_ID_obj = lc.search(name, exact=True)
        # works for 72 of the 112
        if len(chebi_ID_obj) > 0:
            names_map[name] = 'CHEBI:' + str(chebi_ID_obj[0]._ChebiEntity__chebi_id)
            # TODO: search used-to-produce sif for chebi_ID_obj[0]._ChebiEntity__chebi_id
        elif len(chebi_ID_obj) == 0:
            # if an exact match is not possible, don't use exact match
            chebi_ID_obj = lc.search(name)
            no_exact_match[name] = []
            for i in range(0,len(chebi_ID_obj)):
                # fill a dictionary with desired_name: {alternative_name, alternative_id}
                alt_id = str(chebi_ID_obj[i]._ChebiEntity__chebi_id)
                chebi_entity = lc.ChebiEntity(alt_id)
                no_exact_match[name].append({chebi_entity.get_name(): 'CHEBI:' + alt_id})
                if name in signif_no_chebi_ids:
                    signif_no_exact_match[name] = []
                    for i in range(0,len(chebi_ID_obj)):
                        alt_id = str(chebi_ID_obj[i]._ChebiEntity__chebi_id)
                        chebi_entity = lc.ChebiEntity(alt_id)
                        signif_no_exact_match[name].append(
                            {chebi_entity.get_name(): 'CHEBI:' + alt_id})

    exact_outf = open(output_data_dir + "exact_matches.tsv", "w")
    for k, v in names_map.items():
        exact_outf.write(str(k) + '\t' + str(v) + '\n')
    exact_outf.close()

    all_close_matches_outf = open(output_data_dir + "all_close_matches.tsv", "w")
    for k, v in no_exact_match.items():
        all_close_matches_outf.write(str(k) + '\t' + str(v) + '\n')
    all_close_matches_outf.close()

    signif_close_matches_path = output_data_dir + "signif_close_matches.tsv"
    signif_close_matches_outf = open(signif_close_matches_path, "w")
    for k, v in signif_no_exact_match.items():
        signif_close_matches_outf.write(str(k) + '\t' + str(v) + '\n')
    signif_close_matches_outf.close()

    return signif_close_matches_path
Ejemplo n.º 10
0
            query_json = json.loads(query.text)
            chebi_id = query_json["database_links"]["CHEBI"][0]["id"]
    except (ConnectionError, KeyError) as errors:
        pass

    chebi_id_dict[bigg_id] = chebi_id


# Query ChEBI database to collect InChI string..
inchi_str_list = []
fails_counter = 0

print("Fetching InChI strings from ChEBI...")
for key in tqdm.tqdm(chebi_id_dict.keys()):
    if chebi_id_dict[key] != "":
        chebi_entity = libchebipy.ChebiEntity(chebi_id_dict[key])
        inchi_str = chebi_entity.get_inchi()
        inchi_str_list.append(inchi_str)
    else:
        inchi_str_list.append("NaN")
        fails_counter += 1
print(f"Number of entries without found InChI strings: {fails_counter}.")


# Add data to df and save to new csv file:
print(f"Saving to {OUTPUT_CSV_PATH}...")
input_csv_df["InChI_string"] = inchi_str_list
input_csv_df.to_csv(OUTPUT_CSV_PATH, index=False)


if __name__ == "__main__":
Ejemplo n.º 11
0
def update_chebi(chebi_list):
    smiles = []
    for code in chebi_list:  ### request smile and add to smiles[]
        smile = libchebipy.ChebiEntity(code).get_smiles()
        smiles.append(smile)
    return smiles
Ejemplo n.º 12
0
 def test_get_refs_reported_problem(self):
     '''Test reported problem:
     https://github.com/libChEBI/libChEBIpy/issues/9.'''
     self.assertTrue(libchebipy.ChebiEntity('4700'))