def createSequence(modrefseq, filtered_ids=None, description=None): emod = cbmpy.readSBML3FBC(modrefseq) if len(emod.genes) == 0: emod.createGeneAssociationsFromAnnotations() geneseq = {} if description == None: secDescr = os.path.split(modrefseq)[-1] else: secDescr = description if filtered_ids == None: filtered_ids = emod.getReactionIds() for r_ in emod.reactions: if r_.getId() in filtered_ids: for a in r_.annotation: if a.startswith('gbank_seq_'): newName = a.replace('gbank_seq_', '') if newName not in geneseq: if oldbiopython: seq = Bio.Seq.Seq( r_.getAnnotation(a), Bio.Alphabet.ProteinAlphabet() ) else: seq = Bio.Seq.Seq(r_.getAnnotation(a)) geneseq[newName] = Bio.SeqRecord.SeqRecord( seq, id=newName, name=newName, description=secDescr, annotations={"molecule_type": "protein"}, ) # print('Adding {}'.format(a)) print('{} genseqs added'.format(len(geneseq))) return geneseq
def roundTripModelV1(self, model): modname = 'rtv1.xml' cbmpy.writeSBML3FBCV2(model, modname, directory=self.cDir) del model model = cbmpy.readSBML3FBC(os.path.join(self.cDir, modname), xoptions={'validate': True}) return model
def setup_class(klass): """This method is run once for each class before any tests are run""" klass.m = {} for m in DATA: if m.startswith('BIGG2_'): cmod = cbmpy.readSBML3FBC(os.path.join(MDIR, m), xoptions={'validate': True}) klass.m[m] = cmod
def test_run_fba_cobra(self): print('\n\nThis test fails under Linux.\n\n') cmod = cbmpy.readCOBRASBML('L2CBR_iJR904.glc.xml', work_dir=MDIR,\ output_dir=self.CDIR, delete_intermediate=True) cmod2 = cbmpy.readSBML3FBC('L3FBCV1_iJR904.glc.xml', work_dir=MDIR, xoptions={'validate': True}) fba1 = cbmpy.doFBA(cmod) fba2 = cbmpy.doFBA(cmod2) del cmod, cmod2 assert_almost_equal(fba1, fba2)
def createSeqplusModelMetaIdx(fmod, oid, oclass, metadraft_lib_model): """ Create a MetaDraft template model index file from a seqplus model file:: - *fmod* the seqplus model file e.g. 'EstherDB.xml' - *oid* the model unique shortname e.g. 'edb' - *class* the model category e.g. 'vu' - *metadraft_lib_model* the target MetaDraft lib_model directory 'edb', 'vu', "<path>" """ import cbmpy dmod = cbmpy.readSBML3FBC(fmod) fgb = os.path.abspath(fmod) input_path, fmod = os.path.split(fgb) # dmod.createGeneAssociationsFromAnnotations() oclass = oclass.replace('-', '') oid = oid.replace('-', '') oid = '{}-{}'.format(oclass, oid) if fmod.endswith('.xml'): fmod = fmod[:-4] fmod = '({})-({}).seqplus'.format(oid, fmod) print(fmod) linkDict = {} linkDict[oid] = {} linkDict["__idx__"] = {} LD = linkDict[oid] LD['genbank_in'] = fgb LD['sbml_in'] = fgb LD['data_path'] = input_path LD['gene2reaction'] = dmod.getAllProteinGeneAssociations() for g_ in LD['gene2reaction']: linkDict['__idx__'][g_] = oid LD['reaction2gene'] = dmod.getAllGeneProteinAssociations() LD['taxon_id'] = "unknown" LD['sbml_out'] = os.path.join(metadraft_lib_model, "{}.xml".format(fmod)) LD['sbml_out_generic'] = os.path.join(metadraft_lib_model, "{}.xml".format(fmod)) LD['fasta_out'] = None Fj = open(os.path.join(input_path, '{}-link.json'.format(fmod)), 'w') json.dump(linkDict, Fj, indent=1, separators=(',', ': ')) Fj.close() cbmpy.writeSBML3FBC( dmod, os.path.join(input_path, fmod + '.xml'), add_cbmpy_annot=True, add_cobra_annot=False, add_groups=False, )
def setup_class(klass): """This method is run once for each class before any tests are run""" klass.m = {} for m in DATA: cmod = None if m.startswith('L2CBR_'): cmod = cbmpy.readCOBRASBML(m, work_dir=MDIR,\ output_dir=self.CDIR, delete_intermediate=True) elif m.startswith('L2FBA_'): cmod = cbmpy.readSBML2FBA(os.path.join(MDIR, m)) elif m.startswith('L3FBCV1_') or m.startswith('L3FBCV2_'): cmod = cbmpy.readSBML3FBC(os.path.join(MDIR, m), xoptions={'validate': True}) if cmod is not None: klass.m[m] = cmod klass.CDIR = cDir
import os, time, numpy cDir = os.path.dirname(os.path.abspath(os.sys.argv[0])) import cbmpy as cbm model_name = 'iAF692.xml' cmod = cbm.readSBML3FBC(os.path.join(cDir, 'models', model_name)) cmodS = cmod.clone() cbm.doFBA(cmod) fva, fvan = cbm.CBMultiCore.runMultiCoreFVA(cmod, procs=4) cbm.CBWrite.writeFVAtoCSV(fva, fvan, model_name.replace('.xml', ''), fbaObj=cmod) cbm.writeModelToExcel97(cmod, model_name.replace('.xml', '')) exch_reactions = cmod.getReactionIds('R_EX_') dump = [] # reactions are defined back to front so fwd is "o" and and backward is "i" for xr_ in exch_reactions: dump.append(cbm.CBTools.splitSingleReversibleReaction(cmodS, xr_, xr_.replace('R_EX_', 'R_EXo_'),\ xr_.replace('R_EX_', 'R_EXi_'))) cbm.doFBA(cmodS) fva, fvan = cbm.CBMultiCore.runMultiCoreFVA(cmodS, procs=4) cbm.CBWrite.writeFVAtoCSV(fva, fvan, model_name.replace('.xml', '.split'), fbaObj=cmodS) cbm.writeSBML3FBC(cmodS, model_name.replace('.xml', '.split.xml')) cbm.CBWrite.writeModelToExcel97(cmodS, model_name.replace('.xml', '.split'))
import cbmpy as cbm from examples_and_results.helpers_ECM_calc import * """CONSTANTS""" model_name = "iND750" # For a bigger computation, try: # input_file_name = "bacteroid_ECMinputAll.csv" # Define directories for finding models model_dir = os.path.join(os.getcwd(), "models") model_path = os.path.join(model_dir, model_name + ".xml") mod = cbm.readSBML3FBC(model_path) # pairs = cbm.CBTools.findDeadEndReactions(mod) external_metabolites, external_reactions = list(zip( *pairs)) if len(pairs) else (list( zip(*cbm.CBTools.findDeadEndMetabolites(mod)))[0], []) # External according to Urbanczik ext_urbanczik = [ 'ac', 'acald', 'ala__L', 'co2', 'csn', 'ergst', 'etoh', 'gam6p', 'glc__D', 'hdcea', 'ocdcea', 'ocdcya', 'so4', 'xylt', 'zymst', 'nh4', 'asp__L', 'ser__L', 'fum', 'gly', 'thr__L' ] force_feed = ['ac'] ext_urbanczik_inds = [ ind for ind, metab in enumerate(external_metabolites) if metab[2:-2] in ext_urbanczik
def test_load_L3FBCV1_iJR904(self): cmod = cbmpy.readSBML3FBC(os.path.join(MDIR, 'L3FBCV1_iJR904.glc.xml'), xoptions={'validate': True}) assert_not_equal(cmod, None)
def checkModelLocusTags(sbml, genbank, allow_gene_names=False): """ Checks the gene identifiers (assuming they are locus tags) against a genbank file of the same organism - *sbml* the model SBML (*.xml) file - *genbank* the associated GenBank (*.gbk) full file_s) (including CDS annotations and sequences) - *allow_gene_names* allow gene names, non-unique as gene identifiers if locus tags are not present. USE WITH CAUTION!!! """ cmod = cbmpy.readSBML3FBC(sbml) cntr = 0 if type(genbank) == str: genbank = [genbank] gprMap = {} gprMapAnnot = {} gnoprMap = {} fileNum = 0 no_locus_tag = [] for G in genbank: print('CheckModelLocusTags is processing: {}'.format(G)) for seq_record in SeqIO.parse(G, "genbank"): # print(seq_record.id) cntr += 1 if cntr > 1: print("INFO: Multiple sequences encountered in file: {}".format(G)) # raise RuntimeError, "\nMutltiple sequences encountered in file: {}".format(G) # print(repr(seq_record.seq)) # print(len(seq_record)) # add all the annotations from the genbank record(s) into the model? Use first record if fileNum == 0: gprMapAnnot[seq_record.id] = seq_record.annotations.copy() try: for r_ in gprMapAnnot[seq_record.id]['references']: if r_.pubmed_id != '': cmod.addMIRIAMannotation( 'isDescribedBy', 'PubMed', r_.pubmed_id ) gprMapAnnot[seq_record.id].pop('references') cmod.setAnnotation('genbank_id', seq_record.id) cmod.setAnnotation('genbank_name', seq_record.name) except KeyError: print('checkModelLocusTags: no references') # global features features = [f.qualifiers for f in seq_record.features if f.type == 'source'] if len(features) > 0: features = features[0] for f_ in features: if f_ == 'db_xref': for ff_ in features[f_]: if ff_.startswith('taxon:'): cmod.setAnnotation( 'genbank_taxon_id', ff_.replace('taxon:', '') ) break cmod.setAnnotation('genbank_{}'.format(f_), features[f_]) for r_ in gprMapAnnot[seq_record.id]: cmod.setAnnotation( 'genbank_{}'.format(r_), gprMapAnnot[seq_record.id][r_] ) fileNum += 1 GBFile = open(G, 'r') USING_GENE_NAME = False for cds in Bio.SeqIO.InsdcIO.GenBankCdsFeatureIterator(GBFile): if cds.seq != None: if cds.name != '<unknown name>': gprMap[cds.name] = cds # this is dangerous as there is no defined 1:1 mapping elif 'gene' in cds.annotations and allow_gene_names: gprMap[cds.annotations['gene']] = cds USING_GENE_NAME = True no_locus_tag.append(cds.annotations['gene']) else: gnoprMap[cds.name] = cds else: gnoprMap[cds.name] = cds GBFile.close() oldLoTags = [] # for g_ in cmod.getGeneIds(): for g_ in cmod.getGeneLabels(): if g_ not in gprMap: # print(g_) if g_ != 'None': oldLoTags.append(g_) # cbmpy.CBTools.pprint.pprint(gprMap) oldtags = [ gprMap[a].annotations['old_locus_tag'].split(' ') + [a] for a in gprMap if 'old_locus_tag' in gprMap[a].annotations ] oldtags2 = [ gnoprMap[a].annotations['old_locus_tag'].split(' ') + [a] for a in gnoprMap if 'old_locus_tag' in gnoprMap[a].annotations ] old2new = {} old2new2 = {} for x in oldtags: for y in x: if y != x[-1]: old2new[y] = x[-1] for x in oldtags2: for y in x: if y != x[-1]: old2new2[y] = x[-1] print('\n\nChecking locus tags\n===================') updated = {} unknown = [] noseq = [] good = [] F = open(sbml.replace('.xml', '.seqcheck.csv'), 'w') # if cmod.__FBC_VERSION__ < 2: # geneIDs = cmod.getGeneIds() # else: # geneIDs = cmod.getGeneLabels() geneIDs = cmod.getGeneLabels() for g_ in geneIDs: if g_ not in gprMap: if g_ in old2new: print('old {} --> new {}'.format(g_, old2new[g_])) updated[g_] = old2new[g_] F.write('UPDATED,{},{}\n'.format(g_, old2new[g_])) elif g_ in old2new2: print('NoSEQ: old {} --> new {}'.format(g_, old2new2[g_])) noseq.append((g_, old2new2[g_])) F.write('NOSEQ,{},{}\n'.format(g_, old2new2[g_])) else: print('UNKNOWN gene: {}'.format(g_)) unknown.append(g_) F.write('UNKNOWN,{}\n'.format(g_)) else: good.append(g_) F.close() if USING_GENE_NAME: print( '\nINFO: model contained {} genes without locus tags. For these genes \ the /gene name was used instead. This gene name may not be unique, please check model!!'.format( len(no_locus_tag) ) ) # print(no_locus_tag) # cbmpy.CBTools.storeObj(gprMap, sbml.replace('.xml', '.seqplus.dat')) return good, updated, noseq, unknown, cmod, gprMap, gprMapAnnot, no_locus_tag
def setup_class(klass): """This method is run once for each class before any tests are run""" klass.mcore = cbmpy.readSBML3FBC('cbmpy_test_core') klass.sboterm1 = 'SBO:1234567' klass.cDir = cDir