Python intersect 예제들, genericLib.intersect Python 예제들

예제 #1

0

파일 보기

def findReaction_fromRxn2Putative(lReactants_ids, lProducts_ids, lLeft,
                                  lRight):
    ## vengono confrontati il primo argomento col terzo, e il secondo col quarto
    lSideSx = []
    for item in lReactants_ids:
        lSideSx.append(len(gL.intersect(item, list(lLeft))) != 0)

    lSideDx = []
    for item in lProducts_ids:
        lSideDx.append(len(gL.intersect(item, list(lRight))) != 0)

    if all(lSideSx) == True and all(lSideDx) == True:
        return True
    else:
        return False

예제 #2

0

파일 보기

def findTC_transports(lR, r):
    lFindR = [len(gL.intersect(l, r)) != 0 for l in lR]

    if any(lFindR) == True:
        return True
    else:
        return False

예제 #3

0

파일 보기

def findReaction_rhea(lReactants_ids, lProducts_ids, lLeft, lRight):
    ## The first vs. the third and the second vs. the fourth arguments of the function are compared
    lSideSx = []
    for item in lReactants_ids:
        lSideSx_internal = []
        for possibleComp in lLeft:
            lSideSx_internal.append(len(gL.intersect(item, possibleComp)) != 0)
        lSideSx.append(any(lSideSx_internal))

    lSideDx = []
    for item in lProducts_ids:
        lSideDx_internal = []
        for possibleComp in lRight:
            lSideDx_internal.append(len(gL.intersect(item, possibleComp)) != 0)
        lSideDx.append(any(lSideDx_internal))

    if all(lSideSx) == True and all(lSideDx) == True:
        return True
    else:
        return False

예제 #4

0

파일 보기

            dfMetsFromModel_products = dfMetsFromModel_products.drop_duplicates(
                subset=['Id'])
            lProducts_ids_original = list(
                dfMetsFromModel_products['lIdentifiers'].dropna())

        lReactants_ids_original_tuple = map(tuple, lReactants_ids_original)
        lProducts_ids_original_tuple = map(tuple, lProducts_ids_original)
        ltransportedMets = list(
            set(lReactants_ids_original_tuple).intersection(
                lProducts_ids_original_tuple))

        if len(ltransportedMets) != 0:
            lReactants_ids = []
            for l in ltransportedMets:
                l = [el for el in l if el.isdigit() == True]
                chebiIdentifiers = gL.intersect(l,
                                                list(dfChebiCompounds['Id']))

                dfChebiExplodedFilter = dfChebiCompounds_exploded[
                    dfChebiCompounds_exploded['ParentalChebiIds_exploded'].
                    isin(chebiIdentifiers)]
                dfChebiFilter = dfChebiCompounds[dfChebiCompounds['Id'].isin(
                    chebiIdentifiers)]

                lAllChebi2Search = []
                if dfChebiExplodedFilter.empty is False:
                    for riga in list(
                            dfChebiExplodedFilter['ParentalChebiIds']):
                        lAllChebi2Search += riga
                    for riga in list(dfChebiExplodedFilter['AllChebiIds']):
                        lAllChebi2Search += riga

예제 #5

0

파일 보기

파일: fromReactions2Genes_fromOrgName.py 프로젝트: qLSLab/GPRuler

    dfSearch = dfSearch.reset_index(drop=True)
    for lec in list(dfSearch['ec_number'].dropna()):
        for ec in lec:
            lEc.append(ec[4:-1])
    allmetacycEnzymes = list(dfSearch['enzymes_of_reaction'].dropna())
    for l in allmetacycEnzymes:
        for el in l:
            lAnd = []
            dfProt1 = dfMetacyc_proteins[dfMetacyc_proteins['MetaCycId'] == el]
            dfProt2 = dfMetacyc_proteins[dfMetacyc_proteins['Component_of'] ==
                                         el]
            dfProt = pd.concat([dfProt1, dfProt2])
            if dfProt.empty is False:
                for rowdfProt in dfProt.itertuples():
                    if len(gL.intersect(rowdfProt.lSpecies, taxId)) != 0:
                        if pd.isna(rowdfProt.Uniprot) is False:
                            dfCorrespondingGenes = dfuniprot2Org[
                                dfuniprot2Org['uniprot'] == 'up:' +
                                rowdfProt.Uniprot]
                            for foundGene in list(
                                    dfCorrespondingGenes['keggGeneId']):
                                if [foundGene.split(':')[1]] not in lMetaEnzOR:
                                    geneId2search = foundGene.split(':')[1]
                                    lMetaEnzOR, dGenesFromKegg = rxnL.checkNadNadpDependencies_or(
                                        nadp, nad, geneId2search,
                                        orgCode + ':' + geneId2search,
                                        lMetaEnzOR, dGenesFromKegg)
                        lgenes = rowdfProt.lGenes
                        lComponents = rowdfProt.Components
                        foundGenes = dfMetacyc_genes[

예제 #6

0

파일 보기

                                            lCompartments.append(unipLoc['#text'].lower())

                                    elif '#text' in unipLoc and '@evidence' not in unipLoc:
                                        lCompartments.append(unipLoc['#text'].lower())
        lCompartments_sistemati = []
        for comp in lCompartments:
            if comp in dCompartments:
                comp = dCompartments[comp]
            elif 'integral component of' in comp:
                comp = comp.replace('integral component of', '').strip()
                if comp in dCompartments:
                    comp = dCompartments[comp]
            lCompartments_sistemati.append(comp)
        lCompartments_sistemati = gL.unique(lCompartments_sistemati)

        knownComps = gL.intersect(lCompartments_sistemati, lPossibleCompartments)
        unknown = gL.difference(lCompartments_sistemati, lPossibleCompartments)

        if len(unknown) != 0:
            for unk in unknown:
                if unk not in dname2GO:
                    idGo = genesL.getGOsearch(unk)
                    if idGo != '':
                        lAncestors = genesL.getGOAncestors(idGo)
                        lnewComps = []
                        for anc in lAncestors:
                            if anc in dAnc2Name and dAnc2Name[anc] in lPossibleCompartments:
                                lnewComps.append(dAnc2Name[anc])
                            else:
                                name = genesL.getGOName(anc)
                                if name != '' and name in lPossibleCompartments:

예제 #7

0

파일 보기

파일: fromReactions2Genes_wFilteredData.py 프로젝트: qLSLab/GPRuler

    dRxn2Compartments[r.id] = lRxnComps

# Filter genes associated to each reaction according to the associated compartment
dfGenes2Compartment =  pd.read_csv(os.path.join(OUTDIR, dfGenes2Comp + '.csv'), sep = '\t', dtype = {'Gene': str})
dfGenes2Compartment['lCompartments'] = dfGenes2Compartment['lCompartments'].apply(literal_eval)
dGenes2Compartment = dfGenes2Compartment.set_index('Gene')['lCompartments'].to_dict()

dfModelRxns2Genes = pd.read_csv(os.path.join(OUTDIR, dfrxns2Genes + '.csv'), sep = '\t', dtype = {'Rxn': str, 'KeggId': str, 'GPR': str, 'Name': str, 'IsTransport': bool, 'IsExchange': bool, 'GPRrule': str})
dfModelRxns2Genes['lGenes'] = dfModelRxns2Genes['lGenes'].apply(literal_eval)

lGenesFiltered_all = []
for row in dfModelRxns2Genes.itertuples():
    lCompRxnModel = gL.unique(dRxn2Compartments[row.Rxn_conv])
    if len(row.lGenes) != 0 and len(lCompRxnModel) != 0:
        lAllGene_currentRxn = []
        for l in row.lGenes:
            lAllGene_currentRxn += l
        lAllGene_currentRxn = gL.unique(lAllGene_currentRxn)
        lGenes2Remove = []
        for g in lAllGene_currentRxn:
            if len(dGenes2Compartment[g]) != 0 and len(gL.intersect(gL.unique(dGenes2Compartment[g]), lCompRxnModel)) == 0:
                lGenes2Remove.append(g)
        lGenesFiltered = [[el for el in l if el not in lGenes2Remove] for l in row.lGenes]
        lGenesFiltered = [subL for subL in lGenesFiltered if subL != []]
        lGenesFiltered_all.append(lGenesFiltered)
    else:
        lGenesFiltered_all.append(row.lGenes)

dfModelRxns2Genes['lGenes_filtered'] = lGenesFiltered_all
dfModelRxns2Genes.to_csv(os.path.join(OUTDIR, outputFileName + '.csv'), sep = '\t', index = False)

예제 #8

0

파일 보기

def jaccard(list1, list2):
    intersection = len(gL.intersect(list1, list2))
    union = len(gL.union(list1, list2))
    return intersection / union

예제 #9

0

파일 보기

파일: MetaNetX_compounds.py 프로젝트: qLSLab/GPRuler

    if '||' not in row.description:
        lDescription.append([row.description])
    else:
        lDescription.append(row.description.split('||'))

dfchem_xref['description_splt'] = lDescription

dfchem_merge = pd.merge(dfchem_prop, dfchem_xref, on = 'ID')
dfchem_merge['name'] = dfchem_merge['name'].str.lower()

## Load list of metabolites from model
dfmets =  pd.read_csv(os.path.join(OUTDIR, inputFuzzy), sep = '\t', dtype=str)
dfmets['Name'] = dfmets['Name'].str.lower()

## get identifiers associated to common metabolites
inMetaNetX = gL.intersect(dfmets['Name'].tolist(), dfchem_merge['name'].tolist())
dfchem_merge_group = dfchem_merge[dfchem_merge['name'].isin(dfmets['Name'].tolist())]
dfFinal = dfchem_merge_group.groupby('ID')[['source', 'name']].agg(list).reset_index()

## test how many metabolites of the input model are not included in the column 'name' of dfchem_merge dataframe
notInMetaNetX = gL.difference(dfmets['Name'].tolist(), dfchem_merge['name'].tolist())

dfFinal_p2 = pd.DataFrame({'ID':[], 'source':[], 'description_splt':[]})
dfFilter = dfchem_merge[dfchem_merge.apply(lambda x: notInMetaNetX[0] in [el.lower() for el in x['description_splt']] , axis=1)].reset_index()
if dfFilter.empty == True:
    print('NOT FOUND:\t', notInMetaNetX[0])
else:
    dfFinal_p2 = dfFilter[['ID', 'source', 'description_splt']]
    dfFinal_p2 = dfFinal_p2.groupby('ID')[['source', 'description_splt']].agg(list).reset_index()
    print(dfFinal_p2)

예제 #10

0

파일 보기

                    for i0, row0 in resAllInfo.iterrows():
                        lisoformsFromUniprot0 += row0[
                            'otherIsoforms']  # the "otherIsoforms" column includes the isoforms retrieved from Uniprot
                        lIsoformsFromKegg0 += row0[
                            'isoform']  # the "isoforms" column includes the isoforms retrieved from KEGG
                        luniprot0_append += row0[
                            'id_uniprot']  # also used the Uniprot identifiers included in id_uniprot column
                        nomi0 = row0['proteinNames'] + row0[
                            'geneNames'] + row0['subunitsFromName'] + row0[
                                'geneName_fromKEGG']  # use all the names retrieved for the protein
                        lnames0 += nomi0
                        lIsoformsIndications0.append(row0['isoformIndication'])
                        lRedundancy0 += row0['redundancy']

                    comparison0 = [
                        len(gL.intersect(rule, luniprot0_append)) != 0
                        for rule in dfGenesRelationships['uniprotId']
                    ]
                    res = dfGenesRelationships.loc[comparison0]
                    for i0, r0 in res.iterrows():
                        lnames0 += r0['gene']
                        lcomplex0 += r0['AND']
                luniprot0 = gL.unique(luniprot0 + luniprot0_append)
                lcomplex0 = gL.unique(lcomplex0)
                lnames0 = gL.unique(lnames0)
                lisoformsFromUniprot0 = gL.unique(lisoformsFromUniprot0)
                lIsoformsIndications0 = gL.unique(lIsoformsIndications0)
                lIsoformsFromKegg0 = gL.unique(lIsoformsFromKegg0)

                for u1 in luniprot1:
                    resAllInfo = dfCompleteGenesInfo[

예제 #11

0

파일 보기

파일: reactionsIdentification.py 프로젝트: qLSLab/GPRuler

                                                    rfind('[')].strip()
        else:
            dReactants[reactant.id] = reactant.name

    lReactants = list(dReactants.values())
    lReactants.sort()
    dProducts = {}
    for product in rxn.products:
        if testModel == 'y7' or testModel == 'y8':
            dProducts[
                product.id] = product.name[:product.name.rfind('[')].strip()
        else:
            dProducts[product.id] = product.name
    lProducts = list(dProducts.values())
    lProducts.sort()
    if len(gL.intersect(lReactants, lProducts)) != 0:
        lTransport.append(True)
        metTrasportati = gL.intersect(lReactants, lProducts)
        lMetsTrasportati.append(metTrasportati)
    else:
        lTransport.append(False)
        lMetsTrasportati.append([])

dfRxns['Name'] = lNames
dfRxns['IsTransport'] = lTransport
dfRxns['trasportedMets'] = lMetsTrasportati

initialLetter = all(el.startswith('M_') for el in list(dfMetsFromModel['Id']))

# Check which reactions are exchange reactions
lExchange = []

예제 #12

0

파일 보기

def detectEqualSubsProds(lLeft, lRight):
    if len(gL.intersect(lLeft, lRight)) != 0:
        return gL.intersect(lLeft, lRight)
    else:
        return []

예제 #13

0

파일 보기

def checkEqualSubsProds(lLeft, lRight):
    if len(gL.intersect(lLeft, lRight)) != 0:
        return True
    else:
        return False