def find_predecessor(id,classes = None): """ Go up one level in the ChEBI ontology. If classes are provided, then the ChEBI ID's at the current level are checked for a potential classification. """ category = (None,None) primary = get_primary(id) if primary in classes: return (get_substrate_name(primary),primary) primary_chebi = libchebipy.ChebiEntity(primary) predecessor = primary_chebi.get_outgoings() while len(predecessor) != 0 : x = predecessor.pop(0) if x.get_type() == 'is_a': target_id = get_primary(x.get_target_chebi_id()) if target_id in classes: return (get_substrate_name(target_id),target_id) target_chebi = libchebipy.ChebiEntity(target_id) predecessor += target_chebi.get_outgoings() return category
def getCommonNameForChebi(self): ''' Get common name from chebi API, This function is called inside self.getEverything ''' for key in self.metaboliteIDDictionary: value = self.metaboliteIDDictionary[key] chebi = value["chebi_id"] name = "NA" for each in chebi: try: chebiToSearch = str(each) chebiToSearch2 = libchebipy.ChebiEntity(chebiToSearch) name = chebiToSearch2.get_name() #print(chebiToSearch) #print(name) #time.sleep(3) except: pass if key not in self.metabolitesWithSynonymsDictionary: self.metabolitesWithSynonymsDictionary[key] = [name] else: self.metabolitesWithSynonymsDictionary[key].append(name)
def getCommonNameForChebi(self): for key in self.metaboliteIDDictionary: value = self.metaboliteIDDictionary[key] chebi = value["chebi_id"] name = "NA" commonName = None for each in chebi: try: chebiToSearch = str(each) chebiToSearch2 = libchebipy.ChebiEntity(chebiToSearch) name = chebiToSearch2.get_name() commonName = name #time.sleep(1) if commonName is not None: self.metaboliteCommonName[each] = commonName else: self.metaboliteCommonName[each] = "NA" except: pass self.metabolitesWithSynonymsDictionary[key] = [name] if commonName is not None: self.metaboliteCommonName[key] = commonName else: self.metaboliteCommonName[key] = "NA"
def get_substrate_name(id): """ Get the name associated with a ChEBI ID """ id = libchebipy.ChebiEntity(id) return id.get_name()
def find_role(id,classes=None): """ Performs an identical function to find_predecessor, but traverses role ontology, rather than chemical entity ontology. """ role = (None,None) primary = get_primary(id) if primary in classes: return (get_substrate_name(primary),primary) primary_chebi = libchebipy.ChebiEntity(id) predecessor = primary_chebi.get_outgoings() for pre in predecessor: if pre.get_type() =='has_role': target_id = get_primary(pre.get_target_chebi_id()) if target_id in classes: return (get_substrate_name(target_id),target_id) role = find_predecessor(target_id,classes=classes) return role
def chebi2smiles(chebi): chebi_entity = libchebipy.ChebiEntity(chebi) inchi = chebi_entity.get_inchi() if inchi is None: print 'No InChI:', inchi return False else: print 'Found InChI:', inchi return inchi2smiles(inchi)
def get_primary(id): """ Get the primary ChEBI ID, since the primary ID is not guaranteed to be the ID that is reported. """ chebi_id = libchebipy.ChebiEntity(id) primary = chebi_id.get_parent_id() if primary == None: primary = id return primary
def getCommonNameForChebi(self): for key in self.metaboliteIDDictionary: value = self.metaboliteIDDictionary[key] chebi = value["chebi_id"] name = "NA" for each in chebi: try: chebiToSearch = str(each) chebiToSearch2 = libchebipy.ChebiEntity(chebiToSearch) name = chebiToSearch2.get_name() print(chebiToSearch) print(name) #time.sleep(3) except: pass self.metabolitesWithSynonymsDictionary[key] = [name]
def map_all_metabs_to_chebi_ids(metabs, signif_no_chebi_ids, output_data_dir): """ Maps a list of metabolites to their ChEBI IDs, if available. See: https://github.com/libChEBI/libChEBIpy/blob/master/libchebipy/_chebi_entity.py Returns: exact_matches.tsv: 2 col .tsv with provided metabolite name and matched CHEBI ID i.e. malate CHEBI:25115 all_close_matches.tsv: 2 col .tsv with provided metabolite name and dictionaries of possible matching CHEBI names and IDs for all metabolites without exact matches i.e. GSH [{'Gsh-prostaglandin A1': '5548'}, {'S-Decyl GSH': '8955'}] signif_close_matches.tsv: 2 col .tsv same format as all_close_matches.tsv but includes only significant metabolites with no exact CHEBI id matches """ names_map = {} no_exact_match = {} signif_no_exact_match = {} for name in metabs: chebi_ID_obj = lc.search(name, exact=True) # works for 72 of the 112 if len(chebi_ID_obj) > 0: names_map[name] = 'CHEBI:' + str(chebi_ID_obj[0]._ChebiEntity__chebi_id) # TODO: search used-to-produce sif for chebi_ID_obj[0]._ChebiEntity__chebi_id elif len(chebi_ID_obj) == 0: # if an exact match is not possible, don't use exact match chebi_ID_obj = lc.search(name) no_exact_match[name] = [] for i in range(0,len(chebi_ID_obj)): # fill a dictionary with desired_name: {alternative_name, alternative_id} alt_id = str(chebi_ID_obj[i]._ChebiEntity__chebi_id) chebi_entity = lc.ChebiEntity(alt_id) no_exact_match[name].append({chebi_entity.get_name(): 'CHEBI:' + alt_id}) if name in signif_no_chebi_ids: signif_no_exact_match[name] = [] for i in range(0,len(chebi_ID_obj)): alt_id = str(chebi_ID_obj[i]._ChebiEntity__chebi_id) chebi_entity = lc.ChebiEntity(alt_id) signif_no_exact_match[name].append( {chebi_entity.get_name(): 'CHEBI:' + alt_id}) exact_outf = open(output_data_dir + "exact_matches.tsv", "w") for k, v in names_map.items(): exact_outf.write(str(k) + '\t' + str(v) + '\n') exact_outf.close() all_close_matches_outf = open(output_data_dir + "all_close_matches.tsv", "w") for k, v in no_exact_match.items(): all_close_matches_outf.write(str(k) + '\t' + str(v) + '\n') all_close_matches_outf.close() signif_close_matches_path = output_data_dir + "signif_close_matches.tsv" signif_close_matches_outf = open(signif_close_matches_path, "w") for k, v in signif_no_exact_match.items(): signif_close_matches_outf.write(str(k) + '\t' + str(v) + '\n') signif_close_matches_outf.close() return signif_close_matches_path
query_json = json.loads(query.text) chebi_id = query_json["database_links"]["CHEBI"][0]["id"] except (ConnectionError, KeyError) as errors: pass chebi_id_dict[bigg_id] = chebi_id # Query ChEBI database to collect InChI string.. inchi_str_list = [] fails_counter = 0 print("Fetching InChI strings from ChEBI...") for key in tqdm.tqdm(chebi_id_dict.keys()): if chebi_id_dict[key] != "": chebi_entity = libchebipy.ChebiEntity(chebi_id_dict[key]) inchi_str = chebi_entity.get_inchi() inchi_str_list.append(inchi_str) else: inchi_str_list.append("NaN") fails_counter += 1 print(f"Number of entries without found InChI strings: {fails_counter}.") # Add data to df and save to new csv file: print(f"Saving to {OUTPUT_CSV_PATH}...") input_csv_df["InChI_string"] = inchi_str_list input_csv_df.to_csv(OUTPUT_CSV_PATH, index=False) if __name__ == "__main__":
def update_chebi(chebi_list): smiles = [] for code in chebi_list: ### request smile and add to smiles[] smile = libchebipy.ChebiEntity(code).get_smiles() smiles.append(smile) return smiles
def test_get_refs_reported_problem(self): '''Test reported problem: https://github.com/libChEBI/libChEBIpy/issues/9.''' self.assertTrue(libchebipy.ChebiEntity('4700'))