def describe_entity(entity: ChebiEntity) -> None: """ Test function to describe a ChEBI entity. Args: entity: a :class:`ChebiEntity` """ name = entity.get_name() out_lines = [] # type: List[str] for other in entity.get_outgoings(): target = ChebiEntity(other.get_target_chebi_id()) out_lines.append( f" • {name} {other.get_type()} {brief_description(target)}") in_lines = [] # type: List[str] for other in entity.get_incomings(): target = ChebiEntity(other.get_target_chebi_id()) in_lines.append( f" • {brief_description(target)} {other.get_type()} {name}") lines = ([entity.get_name(), f" ► OUTGOING ({len(out_lines)})"] + out_lines + [f" ► INCOMING ({len(in_lines)})"] + in_lines) report = "\n".join(lines) log.info(f"{entity.get_id()}:\n{report}")
def convert_entity_to_parent(entity, ID, CID): """Convert an entity from a CHEBI 'ID' to its parent. """ parent_ID = entity.get_parent_id() while parent_ID is not None: ID = parent_ID.replace("CHEBI:", '') parent_ID = ChebiEntity(ID).get_parent_id() # change chebiID to parent ID if ID != CID: CID = ID entity = ChebiEntity(CID) return entity, CID
def has_roles(self): if self.chebi_id: entity = ChebiEntity('CHEBI:' + str(self.chebi_id)) outdict = dict() for relation in entity.get_outgoings(): if relation.get_type() == 'has_role': tid = relation.get_target_chebi_id() t = ChebiEntity(tid) s = re.findall(r'\d+', tid) outdict[t.get_name()] = int(s[0]) return outdict else: return ''
def test_get_references_secondary(self): '''COMMENT''' this_chebi_entity = ChebiEntity('22182') this_reference = Reference( 'WO2006008754', 'Patent', '', 'NOVEL INTERMEDIATES FOR LINEZOLID ' 'AND RELATED COMPOUNDS') self.assertTrue(this_reference in this_chebi_entity.get_references())
def clean_up_ID(ID): """Apply some clean up steps to Chebi IDs obtained from search. """ print('>> CHEBI ID', ID) # # check for carboxylate # new_name, new_entity = check_entity_for_carboxylate( # entity=ChebiEntity(ID)) # if new_name is not None and new_entity is not None: # ID = new_entity.get_id().replace("CHEBI:", '') # check for parent ID parent_ID = ChebiEntity(ID).get_parent_id() while parent_ID is not None: ID = parent_ID parent_ID = ChebiEntity(ID).get_parent_id() return ID
def test_get_definition_secondary(self): '''COMMENT''' this_chebi_entity = ChebiEntity('41140') self.assertEqual( this_chebi_entity.get_definition(), 'D-Glucopyranose with beta configuration at the ' 'anomeric centre.')
def parse(self): ''' parse tmchem file (sentence.pubtator.tmchem), save in a dictionary the sentence id, the name of the compound and the chebi name Save only the chebi ID, not the mesh ''' with open(self.file) as f: for line in f: line=line.rstrip() line=line.split('\t') if(len(line)==6): #print(line) #print(line[0]) iddoc=line[0] #id of the sentence in TEES: ex TEES.d0.s1 #print(iddoc) iddoc=os.path.splitext(iddoc)[0] #keep only the ID of the doc: ex TEES.d0 iddoc=self.tmpr+'.'+iddoc #print(iddoc) # Some chebi are parent and some are children (duplicate and in that case, we recover and take the parent) chebimesh=line[5] chebi=re.search('(?<=CHEBI:)\w+',chebimesh) if(chebi): chebientity=ChebiEntity("CHEBI:"+chebi.group(0)) parent=chebientity.get_parent_id() # ex: CHEBI:22982 as for parent CHEBI:27732 if parent: #print('take the parent: '+chebimesh+' '+parent) chebimesh=parent if iddoc in self.dictmchem.keys(): #print line[3] self.dictmchem[iddoc][line[3]]=chebimesh #iddoc: document ID (TEES), name as find in the text, chebimesh=chebi ID or mesh ID else: self.dictmchem[iddoc]={} self.dictmchem[iddoc][line[3]]=chebimesh return self.dictmchem
def __get_mol_file(self, read_id, retrieved_id): '''COMMENT''' mol_read = _read_mol_file(read_id) this_chebi_entity = ChebiEntity(str(retrieved_id)) textfile_retrieved = open(this_chebi_entity.get_mol_filename(), 'r') mol_retrieved = textfile_retrieved.read() textfile_retrieved.close() self.assertEquals(mol_read, mol_retrieved)
def test_get_comments_secondary(self): '''COMMENT''' this_chebi_entity = ChebiEntity('11505') this_comment = Comment( '29044', 'General', 'The substituent name \'3-oxoprop-2-enyl\' is ' 'incorrect but is used in various databases.', datetime.datetime.strptime('2005-03-18', '%Y-%M-%d')) self.assertTrue(this_comment in this_chebi_entity.get_comments())
def __get_mol_file(self, read_id, retrieved_id): '''COMMENT''' mol_read = _read_mol_file(read_id) this_chebi_entity = ChebiEntity(str(retrieved_id)) with open(this_chebi_entity.get_mol_filename(), 'r') as txtfile: mol_retrieved = txtfile.read() self.assertEqual(mol_read, mol_retrieved)
def save_model(self, request, obj, form, change): if form.cleaned_data['chebi_id']: chebi_id = form.cleaned_data['chebi_id'] chebi_comb = ChebiEntity('CHEBI:' + str(chebi_id)) parent_id = chebi_comb.get_parent_id() if parent_id: s = re.findall(r'\d+', parent_id) chebi_id = int(s[0]) chebi_comb = ChebiEntity('CHEBI:' + str(chebi_id)) obj.chebi_id = chebi_id obj.chebi_name = chebi_comb.get_name() else: obj.chebi_name = None if form.cleaned_data['pubchem_id']: comp = Compound.from_cid(form.cleaned_data['pubchem_id']) obj.pubchem_name = comp.synonyms[0] else: obj.pubchem_name = None obj.save()
def get_Mol_Weight(dataframe, column): #take chebi ID and get molecular weight from libchebipy import ChebiEntity mol_weight = [] for i in dataframe[column]: try: x = ChebiEntity(str(i)) mol_weight.append(x.get_mass()) except: mol_weight.append(np.nan) continue return mol_weight
def find_synonym(results, target): """See if 'name' exists in results from a non-exact Chebi search. """ for res in results: entity = ChebiEntity(res.get_id()) names = entity.get_names() for name in names: if name.get_name().lower() == target.lower(): print('found synonym - give ID') return res.get_id().replace("CHEBI:", '') return None
def get_entity(chebi_id: Union[int, str]) -> ChebiEntity: """ Fetch a ChEBI entity by its ID. Args: chebi_id: integer ChEBI ID like ``15903``, or string ID like ``'15903'``, or string ID like ``'CHEBI:15903'``. """ chebi_id = str(chebi_id) # ignore buggy demo code; int not OK log.debug(f"Looking up ChEBI ID: {chebi_id}") return ChebiEntity(chebi_id)
def check_line_for_carboxylate(line): """Check a line from the CHEBI compounds file for carboxylic acid. Returns name of the protonated form. """ # USING OFFLINE FILE -- NOT COMPLETE # ID, _, _, _, parent_id, name, notes, _, _, star = line # print(name) # print(notes) # if 'Conjugate base of ' in notes: # if name[-3:] == 'ate': # acid_name = notes.replace("Conjugate base of ", '') # acid_name = acid_name.split('acid')[0] + 'acid' # print('Conjugate base of:', acid_name, '<<<<<<<<<<') # return acid_name # Using libchebipy -- Online: ID, _, _, _, parent_id, name, notes, _, _, star = line entity = ChebiEntity(ID) outgoings = entity.get_outgoings() for out in outgoings: type = out.get_type() id = out.get_target_chebi_id() if type == 'is_conjugate_base_of': print(out) # use regular expression to remove any non alphabetic # characters # that may follow the name only_alph = sub('[^A-Za-z]', '', name) print(only_alph) if only_alph[-3:] == 'ate' and only_alph[-5:] != 'phate': acid_ID = id.replace("CHEBI:", "") print('acid ID:', acid_ID) acid_entity = ChebiEntity(acid_ID) acid_name = acid_entity.get_name() print('Conjugate base of:', acid_name, '<<<<<<<<<<') return acid_name return None
def get_chebi_entity(compound, user=None): chebi_array = [] for com_obj in compound.compound_objects: if 'object_type' in com_obj: if com_obj['object_type'].lower() in [ 'chebi entity', 'chebi', 'chebi object' ]: chebi_array.append(com_obj['object']) if chebi_array: return chebi_array for chebi_id in get_chebi_id(compound, user=user): chebi_object = ChebiEntity(chebi_id.upper()) gnomics.objects.compound.Compound.add_object( compound, obj=chebi_object, object_type="ChEBI Entity") chebi_array.append(chebi_object) return chebi_array
def check_entity_for_carboxylate(entity): """Check an entity from libchebipy for carboxylic acid. Returns name of the protonated form. The 'ate' term will also handle phosphates - so add check for that. """ # Using libchebipy -- Online: name = entity.get_name() outgoings = entity.get_outgoings() smiles = entity.get_smiles() if smiles is not None and has_carboxylate(smiles) is False: return None, None for out in outgoings: type = out.get_type() id = out.get_target_chebi_id() if type == 'is_conjugate_base_of': # use regular expression to remove any non alphabetic # characters # that may follow the name only_alph = sub('[^A-Za-z]', '', name) if only_alph[-3:] == 'ate' and only_alph[-5:] != 'phate': acid_ID = id.replace("CHEBI:", "") acid_entity = ChebiEntity(acid_ID) acid_name = acid_entity.get_name() # check if new SMILES is charged - don't change if it # is acid_smiles = acid_entity.get_smiles() if acid_smiles is None: continue # confirm acid smiles has carboxylate based on SMILES # string if has_carboxylate(acid_smiles) is False: continue print('>>> new SMILES:', acid_smiles) if check_charge_on_SMILES(acid_smiles): if charge_except(acid_smiles) is False: continue print(acid_name, acid_entity) print('---- Conjugate base of:', acid_name, '<<<<<<<<<<') return acid_name, acid_entity return None, None
def chebi_from_dictio(self, dicofile): ''' Cecile Pereira: 14 August #TODO TEST Read the dictionary file, Save the info of name, id, chebi in dictionaries. ''' #print "Read chebi dictionary file" f=open(dicofile,'r') tmpid="" for line in f: if line.startswith('ID '): tmp=split(' ',line) tmpid=tmp[1] self.idname[tmpid]={} elif line.startswith('NA '): tmp=split(' ',line) tmpname=clean(tmp[1]) self.nameid[tmpname]=tmpid self.idname[tmpid][tmpname]='' elif line.startswith('TM '): tmp=split(' ',line) #synonyms tmp=split('\@',tmp[1]) tmp[0]=clean(tmp[0]) self.nameid[tmp[0]]=tmpid self.idname[tmpid][tmp[0]]='' #all the names of the ID elif line.startswith('DB CHEB_'): tmp=split(' ',line) for t in tmp: t=re.sub('^CHEB_','',t) t=re.sub('^CHEBI:','',t) t=re.sub('Not available','',t) if not t == '': chebi=t if(chebi): chebientity=ChebiEntity("CHEBI:"+chebi) parent=chebientity.get_parent_id() # ex: CHEBI:22982 as for parent CHEBI:27732 if parent: #take the CHEBI parent... self.idchebi[tmpid][parent]=''
def conditionclass(request, class_id): class_entity = ChebiEntity('CHEBI:' + str(class_id)) class_name = class_entity.get_name() children = [] for relation in class_entity.get_incomings(): if relation.get_type() == 'has_role': tid = relation.get_target_chebi_id() tid = re.search('(?<=CHEBI:)(\d)*', tid) tid = int(tid.group(0)) children.append(tid) conditiontypes = ConditionType.objects.filter(chebi_id__in=children) datasets = Dataset.objects.filter(conditionset__conditions__type__in=conditiontypes)\ .exclude(paper__latest_data_status__status__name='not relevant').distinct() return render( request, 'conditions/class.html', { 'id': class_id, 'class_name': class_name, 'conditiontypes': conditiontypes, 'papers': datasets, 'DOWNLOAD_PREFIX': settings.DOWNLOAD_PREFIX, 'USER_AUTH': request.user.is_authenticated() })
def test_search_hexenal_inexact(self): '''Test search method for (E)-2-Hexenal.''' results = libchebipy.search('(E)-2-Hexenal', False) self.assertIn(ChebiEntity('CHEBI:28913'), results)
def convert_Chebi2formula(ChebiID): CE = ChebiEntity(ChebiID) formula = CE.get_formula() return(formula)
def data(request, domain, id): file_header = '' if domain == 'papers': paper = get_object_or_404(Paper, pk=id) datasets = paper.dataset_set file_header = u'# Paper: %s (PMID %s)\n' % (paper, paper.pmid) if domain == 'datasets': # datasets = get_object_or_404(Dataset, pk=id) datasets = Dataset.objects.filter(pk=id) dataset = datasets.first() file_header = u'# Paper: %s (PMID %s)\n# Dataset: %s\n' % ( dataset.paper, dataset.paper.pmid, dataset) if domain == 'conditions': conditiontype = get_object_or_404(ConditionType, pk=id) datasets = conditiontype.datasets() file_header = u'# Condition: %s (ID %s)\n' % (conditiontype, conditiontype.id) if domain == 'chebi': chebi_entity = ChebiEntity('CHEBI:' + str(id)) children = [] for relation in chebi_entity.get_incomings(): if relation.get_type() == 'has_role': tid = relation.get_target_chebi_id() tid = int(filter(str.isdigit, tid)) children.append(tid) datasets = Dataset.objects.filter( conditionset__conditions__type__chebi_id__in=children) file_header = u'# Data for conditions annotated as %s (ChEBI:%s)\n' % ( chebi_entity.get_name(), id) if domain == 'phenotypes': phenotype = get_object_or_404(Observable, pk=id) datasets = phenotype.datasets() file_header = u'# Phenotype: %s (ID %s)\n' % (phenotype, phenotype.id) data = Data.objects.filter(dataset_id__in=datasets.values('id')).all() orfs = list(data.values_list('orf', flat=True).distinct()) datasets_ids = list( data.values_list('dataset_id', flat=True).order_by('dataset__paper').distinct()) matrix = [[None] * len(datasets_ids) for i in orfs] for datapoint in data: i = orfs.index(datapoint.orf) j = datasets_ids.index(datapoint.dataset_id) matrix[i][j] = datapoint.value column_headers = '\t' + '\t'.join([ u'%s' % get_object_or_404(Dataset, pk=dataset_id) for dataset_id in datasets_ids ]) + '\n' data_row = [] for i, orf in enumerate(orfs): new_row = orf + '\t' + '\t'.join([str(val) for val in matrix[i]]) print(new_row) data_row.append(new_row) txt3 = '\n'.join(data_row) response = HttpResponse(file_header + column_headers + txt3, content_type='text/plain') response[ 'Content-Disposition'] = 'attachment; filename="%s_%s_%s_data.txt"' % ( settings.DOWNLOAD_PREFIX, domain, id) return response
def test_search_aspirin(self): '''Test search method for aspirin.''' results = libchebipy.search('aspirin', False) self.assertIn(ChebiEntity('CHEBI:15365'), results)
def test_search_hexenal_exact(self): '''Test search method for (E)-2-Hexenal.''' results = libchebipy.search('(E)-2-Hexenal', True) self.assertTrue(ChebiEntity('CHEBI:28913'), results[0] if results else None)
kegggmt = Path(organism[1].iloc[0][:5] + "_format_KEGG.gmt") chebilines = [] hmdblines = [] kegglines = [] for i in pathways: path = organism[organism[3] == i] cpds = path[0].values.tolist() chebiids = list(map(str, cpds)) chebiids = ['CHEBI:' + chebiid for chebiid in chebiids] hmdbids = [] keggids = [] for chebiid in chebiids: try: chebi_entity = ChebiEntity(chebiid) except ChebiException: print('CHEBI:' + chebiid + ' invalid') for db in chebi_entity.get_database_accessions(): if db.get_type() == 'HMDB accession': hmdbids.append(db.get_accession_number().replace( "HMDB00", "HMDB")) if db.get_type() == 'KEGG COMPOUND accession': keggids.append(db.get_accession_number()) # chebiids.insert(0, i) # chebiids.insert(0, path[1].unique()[0]) # chebilines.append("\t".join(chebiids)) if len(set(hmdbids)) > threshold: hmdbids = sorted(set(hmdbids), key=hmdbids.index)
def get_cmpd_information(molec): """Get information from CHEBI Database of a compound from CHEBI ID. Online using libChEBIpy (https://github.com/libChEBI/libChEBIpy) """ if molec.chebiID is None and molec.iupac_name is not None: # try one more time for chebi ID chebiID = get_chebiID(mol_name=molec.name, iupac_name=molec.iupac_name) if chebiID is None: print('cannot get structure from chebi') return None molec.chebiID = [chebiID] # at this point, molec.chebiID will be a list - iterarte over it # the iteration stops if any CHEBI ID produces a structure for CID in molec.chebiID: if CID == '' or ' ' in CID or 'null' in CID: print(CID, '- not a real CHEBI ID') continue # get entity with chebiID entity = ChebiEntity(CID) # check for parent ID entity, CID = convert_entity_to_parent(entity, ID=CID, CID=CID) # attemp to get structure # SMILES smile = entity.get_smiles() print('libchebipy result:', smile) if smile is not None: rdkitmol = Chem.MolFromSmiles(smile) if rdkitmol is None: print('structure could not be deciphered') molec.SMILES = smile molec.mol = None continue else: rdkitmol.Compute2DCoords() molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif smile is None: print('molecule does not have recorded structure in CHEBI DB') print('probably a generic structure - skipping.') molec.SMILES = smile molec.mol = None continue # set passed = True if this chebi ID produced a structure # would not get up to this point if it didnt # if not CIDs pass then the chebiIDs remain a list and will # fail the # next step passed = True # set molecule properties if passed: molec.chebiID = CID molec.DB_ID = CID # save InChiKey iKEY = entity.get_inchi_key() if iKEY is not None: molec.InChiKey = iKEY # save inchi inchi = entity.get_inchi() if inchi is not None: molec.InChi = inchi # set name if name is only a code at this point try: if molec.change_name is True: molec.name = entity.get_name() molec.change_name = False except AttributeError: molec.change_name = False return None
def test_get_charge_secondary2(self): '''COMMENT''' self.assertEquals(-2, ChebiEntity('43474').get_charge())
def get_cmpd_information_offline(molec): """Get information from CHEBI Database of a compound from CHEBI ID. Done Offline unless necessary. molec must have attribute 'chebiID' as integer. """ DB_prop = DB_functions.get_DB_prop('CHEBI') compounds_file = DB_prop[0] + DB_prop[1]['cmpds_file'] names_file = DB_prop[0] + DB_prop[1]['names_file'] structures_file = DB_prop[0] + DB_prop[1]['strct_file'] # set name by searching compound file res = search_for_compound_by_id(compounds_file, molec.chebiID) if res is None: print('chebiID not found:', molec.chebiID) print('no match in DB - ' 'this should not happen for CHEBI ID search') print('check this!') print('Exitting....') import sys sys.exit() else: ID, parent_id, name, star = res molec.name = name molec.change_name = False # make sure is parent compound if parent_id != 'null': res = convert_nameID_to_parent(compounds_file, nameID=ID) if res is None: print("this should not happen - error with cross reference") print('check this!') print('Exitting....') import sys sys.exit() ID, parent_id, name, star = res molec.name = name molec.change_name = False molec.chebiID = int(ID) # get structure using CHEBI ID # structures.csv - read in, get COMPOUND ID match then extract the # get SMILES structure, s_type = get_structure(structures_file, molec.chebiID) print(structure, s_type) if structure is not None: # is structure a MolBlock or Smiles if s_type == 'mol': # convert structure to SMILEs rdkitmol = Chem.MolFromMolBlock(structure) if rdkitmol is None: print('structure could not be deciphered') smile = None molec.SMILES = smile molec.mol = None print('probably a polymeric structure - skipping.') else: rdkitmol.Compute2DCoords() smile = Chem.MolToSmiles(rdkitmol) molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif s_type == 'SMILES': smile = structure rdkitmol = Chem.MolFromSmiles(smile) if rdkitmol is None: print('structure could not be deciphered') molec.SMILES = smile molec.mol = None else: rdkitmol.Compute2DCoords() molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif s_type == 'InChI': rdkitmol = Chem.MolFromInchi(structure) rdkitmol.Compute2DCoords() smile = Chem.MolToSmiles(rdkitmol) molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif s_type == 'InChIKey': rdkitmol = Chem.MolFromInchi(structure) rdkitmol.Compute2DCoords() smile = None molec.SMILES = smile molec.mol = None print('molecule given as InChIKey - ambiguous') print('probably a generic structure - skipping.') else: # try using the CHEBI API # libChEBIpy (https://github.com/libChEBI/libChEBIpy) print('testing libchebipy...') entity = ChebiEntity(molec.chebiID) smile = entity.get_smiles() print('libchebipy result:', smile) if smile is not None: rdkitmol = Chem.MolFromSmiles(smile) if rdkitmol is None: print('structure could not be deciphered') molec.SMILES = smile molec.mol = None else: rdkitmol.Compute2DCoords() molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif smile is None: molec.SMILES = smile molec.mol = None print('molecule does not have recorded structure in CHEBI DB') print('probably a generic structure - skipping.') # save InChiKey iKEY = entity.get_inchi_key() if iKEY is not None: molec.InChiKey = iKEY
def setUp(self): '''COMMENT''' self.__existing = ChebiEntity('4167') self.__secondary = ChebiEntity('CHEBI:5585')
def test_get_mol_existing(self): '''COMMENT''' chebi_id = 73938 this_chebi_entity = ChebiEntity(str(chebi_id)) self.assertEqual(this_chebi_entity.get_mol(), _read_mol_file(chebi_id))