def findReaction_fromRxn2Putative(lReactants_ids, lProducts_ids, lLeft, lRight): ## vengono confrontati il primo argomento col terzo, e il secondo col quarto lSideSx = [] for item in lReactants_ids: lSideSx.append(len(gL.intersect(item, list(lLeft))) != 0) lSideDx = [] for item in lProducts_ids: lSideDx.append(len(gL.intersect(item, list(lRight))) != 0) if all(lSideSx) == True and all(lSideDx) == True: return True else: return False
def findTC_transports(lR, r): lFindR = [len(gL.intersect(l, r)) != 0 for l in lR] if any(lFindR) == True: return True else: return False
def findReaction_rhea(lReactants_ids, lProducts_ids, lLeft, lRight): ## The first vs. the third and the second vs. the fourth arguments of the function are compared lSideSx = [] for item in lReactants_ids: lSideSx_internal = [] for possibleComp in lLeft: lSideSx_internal.append(len(gL.intersect(item, possibleComp)) != 0) lSideSx.append(any(lSideSx_internal)) lSideDx = [] for item in lProducts_ids: lSideDx_internal = [] for possibleComp in lRight: lSideDx_internal.append(len(gL.intersect(item, possibleComp)) != 0) lSideDx.append(any(lSideDx_internal)) if all(lSideSx) == True and all(lSideDx) == True: return True else: return False
dfMetsFromModel_products = dfMetsFromModel_products.drop_duplicates( subset=['Id']) lProducts_ids_original = list( dfMetsFromModel_products['lIdentifiers'].dropna()) lReactants_ids_original_tuple = map(tuple, lReactants_ids_original) lProducts_ids_original_tuple = map(tuple, lProducts_ids_original) ltransportedMets = list( set(lReactants_ids_original_tuple).intersection( lProducts_ids_original_tuple)) if len(ltransportedMets) != 0: lReactants_ids = [] for l in ltransportedMets: l = [el for el in l if el.isdigit() == True] chebiIdentifiers = gL.intersect(l, list(dfChebiCompounds['Id'])) dfChebiExplodedFilter = dfChebiCompounds_exploded[ dfChebiCompounds_exploded['ParentalChebiIds_exploded']. isin(chebiIdentifiers)] dfChebiFilter = dfChebiCompounds[dfChebiCompounds['Id'].isin( chebiIdentifiers)] lAllChebi2Search = [] if dfChebiExplodedFilter.empty is False: for riga in list( dfChebiExplodedFilter['ParentalChebiIds']): lAllChebi2Search += riga for riga in list(dfChebiExplodedFilter['AllChebiIds']): lAllChebi2Search += riga
dfSearch = dfSearch.reset_index(drop=True) for lec in list(dfSearch['ec_number'].dropna()): for ec in lec: lEc.append(ec[4:-1]) allmetacycEnzymes = list(dfSearch['enzymes_of_reaction'].dropna()) for l in allmetacycEnzymes: for el in l: lAnd = [] dfProt1 = dfMetacyc_proteins[dfMetacyc_proteins['MetaCycId'] == el] dfProt2 = dfMetacyc_proteins[dfMetacyc_proteins['Component_of'] == el] dfProt = pd.concat([dfProt1, dfProt2]) if dfProt.empty is False: for rowdfProt in dfProt.itertuples(): if len(gL.intersect(rowdfProt.lSpecies, taxId)) != 0: if pd.isna(rowdfProt.Uniprot) is False: dfCorrespondingGenes = dfuniprot2Org[ dfuniprot2Org['uniprot'] == 'up:' + rowdfProt.Uniprot] for foundGene in list( dfCorrespondingGenes['keggGeneId']): if [foundGene.split(':')[1]] not in lMetaEnzOR: geneId2search = foundGene.split(':')[1] lMetaEnzOR, dGenesFromKegg = rxnL.checkNadNadpDependencies_or( nadp, nad, geneId2search, orgCode + ':' + geneId2search, lMetaEnzOR, dGenesFromKegg) lgenes = rowdfProt.lGenes lComponents = rowdfProt.Components foundGenes = dfMetacyc_genes[
lCompartments.append(unipLoc['#text'].lower()) elif '#text' in unipLoc and '@evidence' not in unipLoc: lCompartments.append(unipLoc['#text'].lower()) lCompartments_sistemati = [] for comp in lCompartments: if comp in dCompartments: comp = dCompartments[comp] elif 'integral component of' in comp: comp = comp.replace('integral component of', '').strip() if comp in dCompartments: comp = dCompartments[comp] lCompartments_sistemati.append(comp) lCompartments_sistemati = gL.unique(lCompartments_sistemati) knownComps = gL.intersect(lCompartments_sistemati, lPossibleCompartments) unknown = gL.difference(lCompartments_sistemati, lPossibleCompartments) if len(unknown) != 0: for unk in unknown: if unk not in dname2GO: idGo = genesL.getGOsearch(unk) if idGo != '': lAncestors = genesL.getGOAncestors(idGo) lnewComps = [] for anc in lAncestors: if anc in dAnc2Name and dAnc2Name[anc] in lPossibleCompartments: lnewComps.append(dAnc2Name[anc]) else: name = genesL.getGOName(anc) if name != '' and name in lPossibleCompartments:
dRxn2Compartments[r.id] = lRxnComps # Filter genes associated to each reaction according to the associated compartment dfGenes2Compartment = pd.read_csv(os.path.join(OUTDIR, dfGenes2Comp + '.csv'), sep = '\t', dtype = {'Gene': str}) dfGenes2Compartment['lCompartments'] = dfGenes2Compartment['lCompartments'].apply(literal_eval) dGenes2Compartment = dfGenes2Compartment.set_index('Gene')['lCompartments'].to_dict() dfModelRxns2Genes = pd.read_csv(os.path.join(OUTDIR, dfrxns2Genes + '.csv'), sep = '\t', dtype = {'Rxn': str, 'KeggId': str, 'GPR': str, 'Name': str, 'IsTransport': bool, 'IsExchange': bool, 'GPRrule': str}) dfModelRxns2Genes['lGenes'] = dfModelRxns2Genes['lGenes'].apply(literal_eval) lGenesFiltered_all = [] for row in dfModelRxns2Genes.itertuples(): lCompRxnModel = gL.unique(dRxn2Compartments[row.Rxn_conv]) if len(row.lGenes) != 0 and len(lCompRxnModel) != 0: lAllGene_currentRxn = [] for l in row.lGenes: lAllGene_currentRxn += l lAllGene_currentRxn = gL.unique(lAllGene_currentRxn) lGenes2Remove = [] for g in lAllGene_currentRxn: if len(dGenes2Compartment[g]) != 0 and len(gL.intersect(gL.unique(dGenes2Compartment[g]), lCompRxnModel)) == 0: lGenes2Remove.append(g) lGenesFiltered = [[el for el in l if el not in lGenes2Remove] for l in row.lGenes] lGenesFiltered = [subL for subL in lGenesFiltered if subL != []] lGenesFiltered_all.append(lGenesFiltered) else: lGenesFiltered_all.append(row.lGenes) dfModelRxns2Genes['lGenes_filtered'] = lGenesFiltered_all dfModelRxns2Genes.to_csv(os.path.join(OUTDIR, outputFileName + '.csv'), sep = '\t', index = False)
def jaccard(list1, list2): intersection = len(gL.intersect(list1, list2)) union = len(gL.union(list1, list2)) return intersection / union
if '||' not in row.description: lDescription.append([row.description]) else: lDescription.append(row.description.split('||')) dfchem_xref['description_splt'] = lDescription dfchem_merge = pd.merge(dfchem_prop, dfchem_xref, on = 'ID') dfchem_merge['name'] = dfchem_merge['name'].str.lower() ## Load list of metabolites from model dfmets = pd.read_csv(os.path.join(OUTDIR, inputFuzzy), sep = '\t', dtype=str) dfmets['Name'] = dfmets['Name'].str.lower() ## get identifiers associated to common metabolites inMetaNetX = gL.intersect(dfmets['Name'].tolist(), dfchem_merge['name'].tolist()) dfchem_merge_group = dfchem_merge[dfchem_merge['name'].isin(dfmets['Name'].tolist())] dfFinal = dfchem_merge_group.groupby('ID')[['source', 'name']].agg(list).reset_index() ## test how many metabolites of the input model are not included in the column 'name' of dfchem_merge dataframe notInMetaNetX = gL.difference(dfmets['Name'].tolist(), dfchem_merge['name'].tolist()) dfFinal_p2 = pd.DataFrame({'ID':[], 'source':[], 'description_splt':[]}) dfFilter = dfchem_merge[dfchem_merge.apply(lambda x: notInMetaNetX[0] in [el.lower() for el in x['description_splt']] , axis=1)].reset_index() if dfFilter.empty == True: print('NOT FOUND:\t', notInMetaNetX[0]) else: dfFinal_p2 = dfFilter[['ID', 'source', 'description_splt']] dfFinal_p2 = dfFinal_p2.groupby('ID')[['source', 'description_splt']].agg(list).reset_index() print(dfFinal_p2)
for i0, row0 in resAllInfo.iterrows(): lisoformsFromUniprot0 += row0[ 'otherIsoforms'] # the "otherIsoforms" column includes the isoforms retrieved from Uniprot lIsoformsFromKegg0 += row0[ 'isoform'] # the "isoforms" column includes the isoforms retrieved from KEGG luniprot0_append += row0[ 'id_uniprot'] # also used the Uniprot identifiers included in id_uniprot column nomi0 = row0['proteinNames'] + row0[ 'geneNames'] + row0['subunitsFromName'] + row0[ 'geneName_fromKEGG'] # use all the names retrieved for the protein lnames0 += nomi0 lIsoformsIndications0.append(row0['isoformIndication']) lRedundancy0 += row0['redundancy'] comparison0 = [ len(gL.intersect(rule, luniprot0_append)) != 0 for rule in dfGenesRelationships['uniprotId'] ] res = dfGenesRelationships.loc[comparison0] for i0, r0 in res.iterrows(): lnames0 += r0['gene'] lcomplex0 += r0['AND'] luniprot0 = gL.unique(luniprot0 + luniprot0_append) lcomplex0 = gL.unique(lcomplex0) lnames0 = gL.unique(lnames0) lisoformsFromUniprot0 = gL.unique(lisoformsFromUniprot0) lIsoformsIndications0 = gL.unique(lIsoformsIndications0) lIsoformsFromKegg0 = gL.unique(lIsoformsFromKegg0) for u1 in luniprot1: resAllInfo = dfCompleteGenesInfo[
rfind('[')].strip() else: dReactants[reactant.id] = reactant.name lReactants = list(dReactants.values()) lReactants.sort() dProducts = {} for product in rxn.products: if testModel == 'y7' or testModel == 'y8': dProducts[ product.id] = product.name[:product.name.rfind('[')].strip() else: dProducts[product.id] = product.name lProducts = list(dProducts.values()) lProducts.sort() if len(gL.intersect(lReactants, lProducts)) != 0: lTransport.append(True) metTrasportati = gL.intersect(lReactants, lProducts) lMetsTrasportati.append(metTrasportati) else: lTransport.append(False) lMetsTrasportati.append([]) dfRxns['Name'] = lNames dfRxns['IsTransport'] = lTransport dfRxns['trasportedMets'] = lMetsTrasportati initialLetter = all(el.startswith('M_') for el in list(dfMetsFromModel['Id'])) # Check which reactions are exchange reactions lExchange = []
def detectEqualSubsProds(lLeft, lRight): if len(gL.intersect(lLeft, lRight)) != 0: return gL.intersect(lLeft, lRight) else: return []
def checkEqualSubsProds(lLeft, lRight): if len(gL.intersect(lLeft, lRight)) != 0: return True else: return False