Exemple #1
0
def kegg2UniprotGenesId(organismCode, model, dirPath):
    ## Conversion of kegg identifiers to uniprot identifiers
    kegg2uniprot = kegg_conv('uniprot', organismCode)
    dOrgKegg2Uniprot = {}
    for el in kegg2uniprot.readlines():
        elSplt = el.strip().split('\t')
        if elSplt[0].split(':')[1] not in dOrgKegg2Uniprot.keys():
            dOrgKegg2Uniprot[elSplt[0].split(':')[1]] = [
                elSplt[1].split(':')[1]
            ]
        else:
            dOrgKegg2Uniprot[elSplt[0].split(':')[1]] += [
                elSplt[1].split(':')[1]
            ]

    outFile = open(os.path.join(dirPath, model + '_Kegg2UniprotGenes.csv'),
                   mode='w')
    gL.writeLineByLineToFile(outFile, ['keggId', 'uniprotId'], '\t')

    for k, v in dOrgKegg2Uniprot.items():
        for vv in v:
            gL.writeLineByLineToFile(outFile, [k, vv], '\t')
    outFile.close()
Exemple #2
0
                'mcm complex':'cytoplasm', 'mcm core complex':'cytoplasm'}

dEvidenceCodes = {'manual': ['ECO:0000269', 'ECO:0000303', 'ECO:0000305', 'ECO:0000250', 'ECO:0000255','ECO:0000256', 'ECO:0000259',
                'ECO:0000312', 'ECO:0007005', 'ECO:0000315','ECO:0000244', 'ECO:0000314', 'ECO:0000304', 'ECO:0000318', 'ECO:0000501'],
                'automatic': ['ECO:0000313', 'ECO:0000213']}

lPossibleCompartments = ['nucleus', 'nuclear lumen', 'nucleoplasm', 'nuclear envelope', 'mitochondrion inner membrane', 'mitochondrial inner membrane', 'mitochondrial membrane',
                        'mitochondrion', 'cytoplasm', 'vacuole', 'vacuolar membrane', 'endoplasmic reticulum', 'lipid droplet', 'plasma membrane', 'mitochondrial matrix',
                        'preribosome', 'cell periphery', 'mitochondrial intermembrane space', 'Golgi apparatus', 'peroxisome', 'endoplasmic reticulum membrane',
                        'cytoskeleton', 'mating projection', 'replication compartment', 'peroxisomal membrane', 'mitochondrial outer membrane', 'golgi membrane',
                        'golgi apparatus membrane', 'Golgi membrane', 'lysosomal membrane', 'lysosome']

dname2GO = {}
dAnc2Name = {}
outFile = open(os.path.join(OUTDIR, dfGenesLoc + '.csv'), mode='w')
gL.writeLineByLineToFile(outFile, ['Gene', 'lCompartments'], '\t')

u = UniProt(verbose=False)

for gene in lAllGenes:
    lDefinitiveCompartment = []
    if gene in dGene2Uniprot:
        lCompartments = []
        lUniprotIds = dGene2Uniprot[gene]
        for unip in lUniprotIds:
            uniprotSearch = u.search("%s" % unip, frmt="xml")
            if uniprotSearch != '':
                dUniprotSearch = xmltodict.parse(uniprotSearch)
                dEcoEvidence = {}
                # Construct dictionary of the Evidence codes
                if 'evidence' in dUniprotSearch['uniprot']['entry']:
                      sep='\t')
dfMatchesEmpty.to_csv(os.path.join(
    OUTDIR, prefix_modelName + '_mappingMetaCyc_empty.tsv'),
                      sep='\t')

## Automatic interaction with the user to curate the list of putative matches with score between 91 and 99
dfMatches91_99 = pd.read_csv(os.path.join(
    OUTDIR, prefix_modelName + '_mappingMetaCyc_91_99.tsv'),
                             sep="\t",
                             index_col=0)
dfMatches91_99['Matches'] = dfMatches91_99['Matches'].apply(ast.literal_eval)

curatedFWFile = open(os.path.join(
    OUTDIR, prefix_modelName + '_mappingMetaCyc_91_99.tsv'),
                     mode='w')
gL.writeLineByLineToFile(curatedFWFile, ['Name', 'Matches'], '\t')

for row in dfMatches91_99.itertuples():
    lMatches = []
    for mat in row.Matches:
        print('Target metabolite:\t', row.Name)
        print('Proposed match:\t', mat, '\n')
        choice = input(
            'Do you want to keep this proposed metabolite name inside the final list: yes (y) or no (n)? '
        )
        if choice == 'y':
            lMatches.append(mat)

    gL.writeLineByLineToFile(curatedFWFile, [row.Name, lMatches], '\t')

curatedFWFile.close()
Exemple #4
0
# Generate the first output: reaction --> list of catalysing genes
dfRxnswGenes = pd.read_csv(os.path.join(OUTDIR, rxnswGenesFileName + '.csv'),
                           sep='\t',
                           dtype=str)
dfRxnswGenes_filter = dfRxnswGenes[['Rxn', 'lGenes_filtered']]
dfRxnswGenes_filter = dfRxnswGenes_filter.rename(
    columns={'lGenes_filtered': 'Genes'})
dfRxnswGenes_filter.to_csv(os.path.join(OUTDIR,
                                        outputFileName + '_Rxns2Genes.csv'),
                           sep='\t',
                           index=False)

# Generate the second output: KEGG gene identifier --> corresponding Uniprot identifier
kegg2uniprot = RESTmod.kegg_conv('uniprot', organismCode)
dOrgKegg2Uniprot = {}
for el in kegg2uniprot.readlines():
    elSplt = el.strip().split('\t')
    if elSplt[0].split(':')[1] not in dOrgKegg2Uniprot.keys():
        dOrgKegg2Uniprot[elSplt[0].split(':')[1]] = [elSplt[1].split(':')[1]]
    else:
        dOrgKegg2Uniprot[elSplt[0].split(':')[1]] += [elSplt[1].split(':')[1]]

outFile = open(os.path.join(OUTDIR, outputFileName + '_Kegg2UniprotGenes.csv'),
               mode='w')
gL.writeLineByLineToFile(outFile, ['keggId', 'uniprotId'], '\t')

for k, v in dOrgKegg2Uniprot.items():
    for vv in v:
        gL.writeLineByLineToFile(outFile, [k, vv], '\t')
outFile.close()
Exemple #5
0
        'Type the correct KEGG code among the returned ones: ')
elif organismChoice == '2':
    organismCode = input('Insert the KEGG organism code: ')

# Extract the entire list of genes of the target organism according to KEGG annotation
k = kegg.KEGG()
keggGenes = k.list(organismCode)
keggGenesSplt = keggGenes.strip().split("\n")
lOrganismGenes = []
for gene in keggGenesSplt:
    lOrganismGenes.append(gene.split('\t')[0].split(organismCode + ':')[1])

dGene2RxnsList = {}
cont = 1
geneFile = open(os.path.join(OUTDIR, model + '_GeneId2Rxns.csv'), mode='w')
gL.writeLineByLineToFile(geneFile, ['GeneId', 'Rxns'], '\t')

rxnFile = open(os.path.join(OUTDIR, model + '_RxnId2Equation.csv'), mode='w')
gL.writeLineByLineToFile(rxnFile, ['RxnId', 'Equation', 'Definition'], '\t')

rxn2EcFile = open(os.path.join(OUTDIR, model + '_RxnId2ECs.csv'), mode='w')
gL.writeLineByLineToFile(rxn2EcFile, ['RxnId', 'EC number'], '\t')

# Extract for each gene the list of catalysed reactions
dRxn2EcNumber = {}
dCompleteRxns_equation = {
}  # to save all the reactions candidate for the model avoiding duplicated elements (equation field of KEGG database)
dCompleteRxns_definition = {
}  # to save all the reactions candidate for the model avoiding duplicated elements (definition field of KEGG database)
for gene in lOrganismGenes:
    ## get for each gene its BRITE information in order to select only metabolic genes
    '16908', 'C00004', '|NADH|', '57945', '25805', '|OXYGEN-MOLECULE|',
    '15379', 'C00007', '|NADP|', '58349', '|B-HEP-1:5|', 'C00667', '18009',
    'C00006', '16474', '77312', '|NADPH|', '|CPD-16005|', 'C20745', '77177',
    '57783', 'C00005', 'CPD-16005', '15996', 'C00044', '37565', '|GTP|',
    'C00010', '57287', '|CO-A|', '15346', 'CARBON-DIOXIDE', 'C00011',
    '|CARBON-DIOXIDE|', '16526', 'AMMONIUM', 'C01342', '|AMMONIUM|',
    '|AMMONIA|', 'C00014', 'AMMONIA', '16134', '28938', 'C00002', '15422',
    '22258', '|ATP|', '30616', '456216', '73342', '|ADP|', '22251', 'C00008',
    'G11113', '22252', '16761'
]

if testModel == 'y7' or testModel == 'y8':
    dfMetsFromModel['Name'] = dfMetsFromModel['Name'].str.strip()

outFile = open(os.path.join(OUTDIR, dfrxnsInfo + '_wIds.csv'), mode='w')
gL.writeLineByLineToFile(outFile, ['Rxn', 'PutativeIdentifiers'], '\t')

lcofactors = gL.unique(lcofactors)
for rowRxn in dfRxns.itertuples():
    isTransport = rowRxn.IsTransport
    isExchange = rowRxn.IsExchange
    if rowRxn.Rxn.startswith('R_'):
        rxnId = rowRxn.Rxn[2:]
    else:
        rxnId = rowRxn.Rxn
    rxn = model.reactions.get_by_id(rxnId)
    lIdentifiersRxn = [
    ]  # list where all the retrieved identifiers of the current reactions are saved
    lReactants = []
    if isTransport == True:
        lReactants = rowRxn.trasportedMets