def Main(): options, _ = MakeOpts().parse_args(sys.argv) estimators = thermodynamic_estimators.LoadAllEstimators() input_filename = path.abspath(options.input_filename) if not path.exists(input_filename): logging.fatal('Input filename %s doesn\'t exist' % input_filename) print 'Will read pathway definitions from %s' % input_filename thermo = estimators[options.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name thermo_data = thermodynamic_data.WrapperThermoData(thermo) # Create a bounds instance kegg_instance = kegg.Kegg.getInstance() # Create output directories out_dir = options.output_dir if not path.exists(out_dir): util._mkdir(out_dir) pathgraph_dir = path.join(out_dir, 'pathway_graphs/') util._mkdir(pathgraph_dir) print 'Executing MTDF analysis' pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename) results = [] for pathway_data in pathway_iterator: if pathway_data.skip: print 'Skipping pathway', pathway_data.name continue print 'Analyzing pathway', pathway_data.name model = pathway_data.GetStoichiometricModel(kegg_instance) model_bounds = pathway_data.GetBounds() mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data) result = mtdf_opt.FindMTDF(model_bounds) print 'Optimization status', result.status result.WriteAllGraphs(pathgraph_dir) results.append(result) mtdf = result.opt_val print '\tMTDF for', pathway_data.name, '= %.2g' % mtdf output_filename = path.join(out_dir, 'results.html') print 'Writing output to', output_filename template_data = {'analysis_type': 'MTDF', 'results':results} templates.render_to_file('pathway_optimization_results.html', template_data, output_filename)
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default pH") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=None, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH'] + pathway_names) util._mkdir(output_prefix) shadow_csvs = {} for d in data: path = '%s/%s.csv' % (output_prefix, d['entry']) shadow_csvs[d['entry']] = csv.writer(open(path, 'w')) shadow_csvs[d['entry']].writerow(['pH'] + d['rids']) pH_vec = ParseConcentrationRange(conc_range) obd_mat = [] for pH in pH_vec.flat: logging.info("pH = %.1f" % (pH)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds={}) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH']] + obds) for d in data: if type(d['reaction prices']) != types.FloatType: prices = list(d['reaction prices'].flat) shadow_csvs[d['entry']].writerow([pH] + prices) obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. pH", figure=fig) plt.ylim(0, np.max(obd_mat.flat)) plt.xlabel('pH', figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def Main(): options, _ = MakeOpts().parse_args(sys.argv) estimators = thermodynamic_estimators.LoadAllEstimators() input_filename = path.abspath(options.input_filename) if not path.exists(input_filename): logging.fatal('Input filename %s doesn\'t exist' % input_filename) print 'Will read pathway definitions from %s' % input_filename thermo = estimators[options.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name thermo_data = thermodynamic_data.WrapperThermoData(thermo) # Create a bounds instance kegg_instance = kegg.Kegg.getInstance() # Create output directories out_dir = options.output_dir if not path.exists(out_dir): util._mkdir(out_dir) pathgraph_dir = path.join(out_dir, 'pathway_graphs/') util._mkdir(pathgraph_dir) print 'Executing MTDF analysis' pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename) results = [] for pathway_data in pathway_iterator: if pathway_data.skip: print 'Skipping pathway', pathway_data.name continue print 'Analyzing pathway', pathway_data.name model = pathway_data.GetStoichiometricModel(kegg_instance) model_bounds = pathway_data.GetBounds() mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data) result = mtdf_opt.FindMTDF(model_bounds) print 'Optimization status', result.status result.WriteAllGraphs(pathgraph_dir) results.append(result) mtdf = result.opt_val print '\tMTDF for', pathway_data.name, '= %.2g' % mtdf output_filename = path.join(out_dir, 'results.html') print 'Writing output to', output_filename template_data = {'analysis_type': 'MTDF', 'results': results} templates.render_to_file('pathway_optimization_results.html', template_data, output_filename)
def PlotHistogram(self, filename): dirname = os.path.dirname(filename) if not os.path.exists(dirname): util._mkdir(dirname) fig = pylab.figure() c = list(self.counts.values()) pylab.title('Count Per Group Histogram') pylab.xlabel('Count') pylab.ylabel('Number of Groups') pylab.hist(c, pylab.arange(0, max(c), 1)) fig.savefig(filename, format='png')
def render_to_file(template_name, data, output_filename): """Renders a template to a given file. Will create the parent directory of the output file if not present. Args: template_name: the name of a template file in pygibbs/templates. data: a dictionary of template data. output_filename: the name/path of the file to write to. """ dir = os.path.abspath(os.path.dirname(output_filename)) if not os.path.exists(dir): util._mkdir(dir) open(output_filename, 'w').write(render_to_string(template_name, data))
def __init__(self, filename, force_path_creation=True, flush_always=True): from toolbox.util import _mkdir BaseHtmlWriter.__init__(self) self.filename = filename self.filepath = os.path.dirname(filename) self.flush_always = flush_always if (not os.path.exists(self.filepath)): if (force_path_creation and not os.path.exists(self.filepath)): _mkdir(self.filepath) else: raise Exception("cannot write to HTML file %s since the directory doesn't exist" % filename) self.file = open(self.filename, "w") self.write_header() self.write_js(self.filepath)
def main(): pH, pMg, I, T = (7.0, 3, 0.1, 298.15) db = SqliteDatabase('../res/gibbs.sqlite') kegg = Kegg.getInstance() alberty = PsuedoisomerTableThermodynamics( '../data/thermodynamics/alberty_pseudoisomers.csv') cids = alberty.get_all_cids() dG0_f = pylab.zeros((len(cids), 1)) for i, cid in enumerate(cids): dG0_f[i, 0] = alberty.cid2dG0_tag(cid, pH=pH, pMg=pMg, I=I, T=T) S = pylab.zeros((0, len(cids))) rids = [] ec_numbers = [] for rid in kegg.get_all_rids(): sparse = kegg.rid2sparse_reaction(rid) if not set(cids).issuperset(sparse.keys()): continue rids.append(rid) ec_numbers.append(kegg.rid2ec_list(rid)) S_row = pylab.zeros((1, len(cids))) for cid, coeff in sparse.iteritems(): S_row[0, cids.index(cid)] = coeff S = pylab.vstack([S, S_row]) dG0_r = pylab.dot(S, dG0_f) util._mkdir('../res/arren') s_writer = csv.writer(open('../res/arren/stoichiomety.csv', 'w')) r_writer = csv.writer(open('../res/arren/reactions.csv', 'w')) e_writer = csv.writer(open('../res/arren/ec_numbers.csv', 'w')) r_writer.writerow(['rid', 'dG0_r']) e_writer.writerow(['rid', 'ec0', 'ec1', 'ec2', 'ec3']) for i in xrange(S.shape[0]): s_writer.writerow(["%d" % x for x in S[i, :]]) for ec in ec_numbers[i].split(';'): e_writer.writerow(['%d' % rids[i]] + ec.split('.')) r_writer.writerow(["%d" % rids[i], '%.1f' % dG0_r[i, 0]]) c_writer = csv.writer(open('../res/arren/compounds.csv', 'w')) c_writer.writerow(['cid', 'dG0_f']) for j in xrange(len(cids)): c_writer.writerow(['%d' % cids[j], '%.1f' % dG0_f[j, 0]])
def render_to_file(template_name, data, output_filename): """Renders a template to a given file. Will create the parent directory of the output file if not present. Args: template_name: the name of a template file in pygibbs/templates. data: a dictionary of template data. output_filename: the name/path of the file to write to. """ dir = os.path.abspath(os.path.dirname(output_filename)) if not os.path.exists(dir): util._mkdir(dir) open(output_filename, 'w').write(render_to_string( template_name, data))
def main(): pH, pMg, I, T = (7.0, 3, 0.1, 298.15) db = SqliteDatabase('../res/gibbs.sqlite') kegg = Kegg.getInstance() alberty = PsuedoisomerTableThermodynamics('../data/thermodynamics/alberty_pseudoisomers.csv') cids = alberty.get_all_cids() dG0_f = pylab.zeros((len(cids), 1)) for i, cid in enumerate(cids): dG0_f[i, 0] = alberty.cid2dG0_tag(cid, pH=pH, pMg=pMg, I=I, T=T) S = pylab.zeros((0, len(cids))) rids = [] ec_numbers = [] for rid in kegg.get_all_rids(): sparse = kegg.rid2sparse_reaction(rid) if not set(cids).issuperset(sparse.keys()): continue rids.append(rid) ec_numbers.append(kegg.rid2ec_list(rid)) S_row = pylab.zeros((1, len(cids))) for cid, coeff in sparse.iteritems(): S_row[0, cids.index(cid)] = coeff S = pylab.vstack([S, S_row]) dG0_r = pylab.dot(S, dG0_f) util._mkdir('../res/arren') s_writer = csv.writer(open('../res/arren/stoichiomety.csv', 'w')) r_writer = csv.writer(open('../res/arren/reactions.csv', 'w')) e_writer = csv.writer(open('../res/arren/ec_numbers.csv', 'w')) r_writer.writerow(['rid', 'dG0_r']) e_writer.writerow(['rid', 'ec0', 'ec1', 'ec2', 'ec3']) for i in xrange(S.shape[0]): s_writer.writerow(["%d" % x for x in S[i,:]]) for ec in ec_numbers[i].split(';'): e_writer.writerow(['%d' % rids[i]] + ec.split('.')) r_writer.writerow(["%d" % rids[i], '%.1f' % dG0_r[i,0]]) c_writer = csv.writer(open('../res/arren/compounds.csv', 'w')) c_writer.writerow(['cid', 'dG0_f']) for j in xrange(len(cids)): c_writer.writerow(['%d' % cids[j], '%.1f' % dG0_f[j, 0]])
def __init__(self, filename, force_path_creation=True, flush_always=True): from toolbox.util import _mkdir BaseHtmlWriter.__init__(self) self.filename = filename self.filepath = os.path.dirname(filename) self.flush_always = flush_always if (not os.path.exists(self.filepath)): if (force_path_creation and not os.path.exists(self.filepath)): _mkdir(self.filepath) else: raise Exception( "cannot write to HTML file %s since the directory doesn't exist" % filename) self.file = open(self.filename, "w") self.write_header() self.write_js(self.filepath)
def __init__(self, org='ecoli', db=None): self.db = db self.org = org self.base_dir = '../MetaCyc/' + org util._mkdir(self.base_dir) self.TAR_URL = 'http://brg.ai.sri.com/ecocyc/dist/flatfiles-52983746/' + org + '.tar.gz' self.TAR_FILE = self.base_dir + '/' + org + '.tar.gz' self.COMPOUND_FILE = self.base_dir + '/14.6/data/compounds.dat' self.REACTION_FILE = self.base_dir + '/14.6/data/reactions.dat' self.PATHWAY_FILE = self.base_dir + '/14.6/data/pathways.dat' self.REGULATION_FILE = self.base_dir + '/14.6/data/regulation.dat' if not self.db: self.FromFiles() elif not self.db.DoesTableExist('metacyc_' + org + '_compound'): self.FromFiles() self.ToDatabase() else: self.FromDatabase()
def __init__(self, db, public_db, html_writer, thermo=None, thermodynamic_method='global', max_reactions=None, max_solutions=100, maximal_dG=0.0, update_file=None, output_kegg_file=None): """Initialize the Pathologic object. Args: db: the DB to read group contribution data from. html_writer: an HtmlWriter for writing output. thermodynamic_method: the analysis methods. Options are: "none", "pCr", "MTDF", "global" or "localized" max_reactions: the maximum number of reactions to find in a solution (use None for unlimited) max_solutions: the maximum number of solutions to find (use None for unlimited) maximal_dG: the maximum dG allowed. Use this to change the thermodynamic constraints to have a different MTDF. When set to 0, it is the usual feasibility measure. update_file: the file to read for KEGG updates. """ assert thermodynamic_method in OptimizationMethods.ALLOWED_METHODS util._mkdir('../res/pathologic') self.html_writer = html_writer self.thermodynamic_method = thermodynamic_method self.max_reactions = max_reactions self.max_solutions = max_solutions self.maximal_dG = maximal_dG self.db_public = public_db self.db = db self.thermo = thermo self.kegg_pathologic = KeggPathologic() if update_file is not None: self.kegg_pathologic.update_database(update_file, self.html_writer)
def Main(): options, _ = MakeOpts().parse_args(sys.argv) assert options.experiment_id assert options.first_plate_ids and options.second_plate_ids assert options.culture_label and options.reporter_label assert options.output_dir if not path.exists(options.output_dir): util._mkdir(options.output_dir) imgs_path = path.join(options.output_dir, 'imgs/') if not path.exists(imgs_path): util._mkdir(imgs_path) first_plate_ids = map(str.strip, options.first_plate_ids.split(',')) second_plate_ids = map(str.strip, options.second_plate_ids.split(',')) labels_to_ignore = set() for l in options.labels_to_ignore.split(','): labels_to_ignore.add(l.strip()) print 'Reading plates from experiment %s' % (options.experiment_id) db = MySQLDatabase(host='hldbv02', user='******', passwd='a1a1a1', db='tecan') filterer = promoter_activity.CultureReporterFilterer(options.min_culture_level, options.min_reporter_level) reporter_bg_sub = promoter_activity.ReporterBackgroundSubtracter( options.background_label) culture_shifter = promoter_activity.CultureShifter() activity_calc = promoter_activity.ReporterActivityCalculator( options.lower_culture_bound, options.upper_culture_bound, min_reporter_level=options.lower_reporter_bound, window_size=options.window_size) first_plate_runners = [] second_plate_runners = [] print 'Calculating promoter activities for first condition' runner1 = PlateActivityRunner( options.culture_label, options.reporter_label, filterer, culture_shifter, reporter_bg_sub, activity_calc) for plate_id in first_plate_ids: plate = Plate96.FromDatabase(db, options.experiment_id, plate_id) runner1.AddPlate(plate) runner1.Run() first_plate_runners.append(runner1) print 'Calculating promoter activities for second condition' runner2 = PlateActivityRunner( options.culture_label, options.reporter_label, filterer, culture_shifter, reporter_bg_sub, activity_calc) for plate_id in second_plate_ids: plate = Plate96.FromDatabase(db, options.experiment_id, plate_id) runner2.AddPlate(plate) runner2.Run() second_plate_runners.append(runner2) # Unify strain data. print 'Saving figures' strains_data = StrainConditionsData(options.background_label) for plate_data in first_plate_runners: strains_data.AddPlateData('Glucose', plate_data, ignore_labels=labels_to_ignore) for plate_data in second_plate_runners: strains_data.AddPlateData('Gluconate', plate_data, ignore_labels=labels_to_ignore) strains_data.MakeStrainFigures(imgs_path) summary_fignames = strains_data.MakeSummaryFigures( imgs_path, 'Glucose', 'Gluconate') plate_fignames = strains_data.MakePerPlateFigures(imgs_path) labels = strains_data.GetStrainLabels() condition1_activities, condition1_errs = strains_data.GetMeanMaxActivities( labels, 'Glucose') condition2_activities, condition2_errs = strains_data.GetMeanMaxActivities( labels, 'Gluconate') log_1 = np.log2(condition1_activities) log_2 = np.log2(condition2_activities) diffs = log_2 - log_1 sorted_diffs = list(np.argsort(diffs)) sorted_diffs.reverse() diffs_data = [] for i in sorted_diffs: logfold = diffs[i] fold = np.exp2(logfold) if np.isnan(logfold): logfold = None fold = None diffs_data.append({'label': labels[i], 'fold_change': fold, 'log_fold': logfold}) # Render the template. print 'Writing HTML output' template_data = {'experiment_id': options.experiment_id, 'first_plate_ids': first_plate_ids, 'second_plate_ids': second_plate_ids, 'culture_label': options.culture_label, 'reporter_label': options.reporter_label, 'first_plates': first_plate_runners, 'second_plates': second_plate_runners, 'strains_data': strains_data, 'diffs_data': diffs_data, 'summary_figure_fnames': summary_fignames, 'per_plate_figure_fnames': plate_fignames} template_fname = path.join(options.output_dir, 'results.html') templates.render_to_file( 'compare_promoter_activities.html', template_data, template_fname) return
When the experimental data is broken into more than one XLS sheet, this method concatenates the data into one series and returns it as if it was from one source. """ time_array = array([]) value_array = array([]) last_t = 0 for vp in vp_vec: times, values = vp.get_data(index, row, col) time_array = hstack([time_array, times + last_t]) value_array = hstack([value_array, values]) if len(time_array) > 0: last_t = time_array.max() return time_array, value_array _mkdir('../res/victor') vp_vec = [] for name in ["OD600 20110302_lycopene1"]: vp = VictorParser() vp.parse_excel("../data/victor/%s.xls" % (name)) vp_vec.append(vp) pp = PdfPages('../res/victor/2011-02-28_lycopene1.pdf') #rcParams['text.usetex'] = True rcParams['legend.fontsize'] = 12 #rcParams['font.family'] = 'sans-serif' #rcParams['font.size'] = 8 #rcParams['lines.linewidth'] = 0.3 #rcParams['lines.markersize'] = 2
#_U, s, _V = np.linalg.svd(augmented_matrix, full_matrices=False) #print sorted(s) contributions, _K = LinearRegression.LeastSquares(full_matrix, dG_vector) for i, group_name in enumerate(all_group_names): print "%s,%.3f" % (group_name, contributions[i, 0]) pyplot.plot(dG_vector, dG_vector-np.dot(full_matrix, contributions), '.') pyplot.show() if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == 'test': TestGroupMatrix() sys.exit(0) _mkdir('../res') db = SqliteDatabase('../res/gibbs.sqlite', 'w') H_nopka = Hatzi(use_pKa=False) H_withpka = Hatzi(use_pKa=True) H_withpka.ToDatabase(db, 'hatzi_thermodynamics') #H.ToDatabase(db, 'hatzi_gc') #H.I = 0.25 #H.T = 300; #sparse_reaction = {13:-1, 1:-1, 9:2} #sparse_reaction = {36:-1, 3981:1} #sparse_reaction = {6:-1, 143:-1, 234:1, 5:1} #sparse_reaction = {1:-1, 499:-1, 603:1, 86:1} #sparse_reaction = {1:-1, 6:-1, 311:-1, 288:1, 5:1, 80:2, 26:1} #sparse_reaction = {408:-1, 6:-1, 4092:1, 5:1} #sparse_reaction = {588:-1, 1:-1, 114:1, 9:1}
def read_rbs_calc_results(fname): csv_reader = csv.reader(open(fname, 'r')) csv_reader.next() R_calc = [] for row in csv_reader: (Name,Start_Position,Expression_Level,Kinetic_Score,Sequence) = row R_calc.append(float(Expression_Level)) return array(R_calc) ################################################################################ ################################################################################ ################################################################################ ################################################################################ #(rbs, promoters, data_facs) = read_data('data_FACS.csv') #(rbs, promoters, data_single) = read_data('data_flu-OD_single.csv') #(rbs, promoters, data_multi) = read_data('data_flu-OD_multi.csv') util._mkdir('../res/pro_rbs') rbs_score_fname = '../res/pro_rbs/rbs_2010-08-18_17-50-19_133.csv' if (os.path.exists(rbs_score_fname)): R_calc = read_rbs_calc_results(rbs_score_fname) else: sys.stderr.write("The RBS calculator score file could not be found, you must " + \ "generate it using the 'rbs-calc' website and put it here: %s" % rbs_score_fname) R_calc = None html = HtmlWriter('../res/pro_rbs/fit.html') (b, R, P) = fit_params('data_flu-OD_multi', html, R_calc)
def main(): db = database.SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/nist/report.html") gc = GroupContribution(db) gc.override_gc_with_measurements = True gc.init() grad = GradientAscent(gc) nist = Nist(db, html_writer, gc.kegg()) nist.FromDatabase() alberty = Alberty() hatzi = Hatzi() if True: grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314)) grad.verify_results("Alberty", alberty, html_writer) #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv") #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>") #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>") #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database") #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer) #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Hatzimanikatis", hatzi, html_writer) #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Milo", gc, html_writer) elif False: # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm grad.load_dG0_data("../data/thermodynamics/dG0.csv") # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). grad.anchors = grad.load_dG0_data( "../data/thermodynamics/nist_anchors.csv") grad.load_nist_data(nist, grad, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient1") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) grad.load_nist_data(nist, alberty, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient2") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) # Use DETERMINISTIC gradient ascent grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15)) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.deterministic_hill_climb(max_i=200) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient_deterministic") elif False: # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds) grad = GradientAscent(gc) grad.load_nist_data(nist, skip_missing_reactions=False) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient3") elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG grad = GradientAscent(gc) grad.cid2pmap_dict = alberty.cid2pmap_dict (pH, I, T) = (7, 0, 300) counter = 0 for rid in grad.kegg.get_all_rids(): sparse_reaction = grad.kegg.rid2sparse_reaction(rid) try: dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T) print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0) counter += 1 except MissingCompoundFormationEnergy as e: #print "R%05d: missing formation energy of C%05d" % (rid, e.cid) pass print "Managed to calculate the dG0 of %d reactions" % counter elif False: util._mkdir("../res/nist/fig") csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w")) cid_set = set() for row in nist.data: sparce_reaction = row['sparse'] cid_set.update(sparce_reaction.keys()) html_writer.write("<table border=1>\n") for cid in sorted(list(cid_set)): html_writer.write(" <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid))) try: mol = grad.kegg.cid2mol(cid) img_fname = '../res/nist/fig/C%05d.png' % cid html_writer.embed_img(img_fname, "C%05d" % cid) mol.draw(show=False, filename=img_fname) except AssertionError as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) except KeggParseException as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) html_writer.write("</td><td>") if (cid in alberty.cid2pmap_dict): for (nH, z) in alberty.cid2pmap_dict[cid].keys(): html_writer.write("(nH=%d, z=%d)<br>" % (nH, z)) csv_writer.writerow((cid, nH, z)) else: nH = grad.kegg.cid2num_hydrogens(cid) z = grad.kegg.cid2charge(cid) html_writer.write("unknown pseudoisomers<br>") html_writer.write("(nH=%d, z=%d)" % (nH, z)) csv_writer.writerow((cid, nH, z)) html_writer.write("</td></tr>\n") html_writer.write("</table>\n") html_writer.close()
default=False, help="A flag for running the TEST only (without TRAIN)") parser.add_argument( "-d", "--from_database", action="store_true", default=False, help="A flag for loading the data from the DB instead of " "the CSV files (saves time but no debug information)") return parser if __name__ == '__main__': parser = MakeOpts() args = parser.parse_args() util._mkdir('../res') db = SqliteDatabase('../res/gibbs.sqlite', 'w') if args.transformed: prefix = 'bgc' else: prefix = 'pgc' if args.test_only: html_writer = HtmlWriter('../res/%s_test.html' % prefix) elif args.train_only: html_writer = HtmlWriter('../res/%s_train.html' % prefix) else: html_writer = HtmlWriter('../res/%s.html' % prefix) G = GroupContribution(db=db,
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default pH") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=None, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH'] + pathway_names) util._mkdir(output_prefix) shadow_csvs = {} for d in data: path = '%s/%s.csv' % (output_prefix, d['entry']) shadow_csvs[d['entry']] = csv.writer(open(path, 'w')) shadow_csvs[d['entry']].writerow(['pH'] + d['rids']) pH_vec = ParseConcentrationRange(conc_range) obd_mat = [] for pH in pH_vec.flat: logging.info("pH = %.1f" % (pH)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds={}) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH']] + obds) for d in data: if type(d['reaction prices']) != types.FloatType: prices = list(d['reaction prices'].flat) shadow_csvs[d['entry']].writerow([pH] + prices) obd_mat = np.matrix( obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. pH", figure=fig) plt.ylim(0, np.max(obd_mat.flat)) plt.xlabel('pH', figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def Main(): options, _ = MakeOpts().parse_args(sys.argv) estimators = thermodynamic_estimators.LoadAllEstimators() input_filename = path.abspath(options.input_filename) if not path.exists(input_filename): logging.fatal('Input filename %s doesn\'t exist' % input_filename) print 'Will read pathway definitions from %s' % input_filename # Make thermodynamic and kinetic data containers thermo = estimators[options.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name thermo_data = thermodynamic_data.WrapperThermoData(thermo) # Uniform kinetic data kin_data = kinetic_data.UniformKineticData(kcat=100, km=1e-4) # Create a kegg instance kegg_instance = kegg.Kegg.getInstance() # Create output directories out_dir = options.output_dir if not path.exists(out_dir): util._mkdir(out_dir) pathgraph_dir = path.join(out_dir, 'pathway_graphs/') util._mkdir(pathgraph_dir) print 'Executing Protein Cost analysis' pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename) mtdfs = [] protein_scores = [] names = [] num_atp = [] path_lengths = [] for pathway_data in pathway_iterator: if pathway_data.skip: print 'Skipping pathway', pathway_data.name continue print 'Analyzing pathway', pathway_data.name model = pathway_data.GetStoichiometricModel(kegg_instance) model_bounds = pathway_data.GetBounds() protein_opt = protein_optimizer.ProteinOptimizer( model, thermo_data, kin_data) mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data) # Solve MTDF. mtdf_res = mtdf_opt.FindMTDF(model_bounds) mtdf_status = mtdf_res.status if mtdf_status.IsFailure() or mtdf_status.IsInfeasible(): print '\tFailed to optimize', pathway_data.name continue # Solve protein. protein_res = protein_opt.FindOptimum(model_bounds) protein_status = protein_res.status if protein_status.IsFailure() or protein_status.IsInfeasible(): print '\tFailed to optimize', pathway_data.name continue mtdfs.append(mtdf_res.opt_val) protein_scores.append(protein_res.opt_val) names.append(model.name) net_reaction = mtdf_res.net_reaction.sparse atp_produced = net_reaction.get(2, 0) num_atp.append(atp_produced) path_lengths.append(len(mtdf_res.reaction_ids)) pylab.figure() pylab.title(model.name) dGr0_tag = mtdf_res.dGr0_tag.flatten().tolist() dgmtdf = mtdf_res.dGr_tag.flatten().tolist() dgprotein = protein_res.dGr_tag.flatten().tolist() dgbio = mtdf_res.dGr_bio.flatten().tolist() dg0_profile = np.cumsum([0] + dGr0_tag) dgmtdf_profile = np.cumsum([0] + dgmtdf) dgprotein_profile = np.cumsum([0] + dgprotein) dgbio_profile = np.cumsum([0] + dgbio) rxn_range = pylab.arange(len(mtdf_res.reaction_ids) + 1) pylab.plot(rxn_range, dg0_profile, 'b--', linewidth=2, label='Standard Conditions') pylab.plot(rxn_range, dgbio_profile, 'c--', linewidth=2, label='Biological Conditions') mtdf_label = 'MTDF Optimized (MTDF = %.2g kJ/mol)' % mtdf_res.opt_val pylab.plot(rxn_range, dgmtdf_profile, 'r-', linewidth=2, label=mtdf_label) pc_label = 'Protein Optimized (Cost = %.2g)' % protein_res.opt_val pylab.plot(rxn_range, dgprotein_profile, 'g-', linewidth=2, label=pc_label) pylab.xticks(rxn_range[:-1] + 0.5, mtdf_res.reaction_ids) pylab.xlabel('Reaction step') pylab.ylabel('Cumulative dG (kJ/mol)') pylab.legend(loc='upper right', prop=LEGEND_FONT) pylab.figure() pylab.plot(num_atp, protein_scores, 'b.') #pylab.xlabel('MTDF (kJ/mol)') pylab.xlabel('Net ATP Production') pylab.ylabel('Protein Cost') for x, y, s in zip(num_atp, protein_scores, names): pylab.text(x, y, s, fontsize=10) max_protein = np.max(protein_scores) pylab.plot([0, 0], [0, max_protein], 'r--', label='0 ATP Produced') pylab.plot([1, 1], [0, max_protein], 'g--', label='1 ATP Produced') pylab.plot([2, 2], [0, max_protein], 'b--', label='2 ATP Produced') #pylab.yscale('log') pylab.xticks([]) pylab.xlim((-1, 3)) pylab.legend() odbs = np.tanh(np.array(mtdfs) / (2 * RT)) pylab.figure() pylab.plot(protein_scores, odbs, 'b.') pylab.xlabel('Protein Cost') pylab.ylabel('ODB (unitless)') #for x,y,s in zip(protein_scores, length_scaled_cost, names): # pylab.text(x, y, s, fontsize=10) pylab.show()
def main(): db = database.SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/nist/report.html") gc = GroupContribution(db) gc.override_gc_with_measurements = True gc.init() grad = GradientAscent(gc) nist = Nist(db, html_writer, gc.kegg()) nist.FromDatabase() alberty = Alberty() hatzi = Hatzi() if True: grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314)) grad.verify_results("Alberty", alberty, html_writer) #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv") #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>") #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>") #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database") #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer) #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Hatzimanikatis", hatzi, html_writer) #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Milo", gc, html_writer) elif False: # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm grad.load_dG0_data("../data/thermodynamics/dG0.csv") # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). grad.anchors = grad.load_dG0_data("../data/thermodynamics/nist_anchors.csv") grad.load_nist_data(nist, grad, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient1") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) grad.load_nist_data(nist, alberty, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient2") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) # Use DETERMINISTIC gradient ascent grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15)) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.deterministic_hill_climb(max_i=200) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient_deterministic") elif False: # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds) grad = GradientAscent(gc) grad.load_nist_data(nist, skip_missing_reactions=False) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient3") elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG grad = GradientAscent(gc) grad.cid2pmap_dict = alberty.cid2pmap_dict (pH, I, T) = (7, 0, 300) counter = 0 for rid in grad.kegg.get_all_rids(): sparse_reaction = grad.kegg.rid2sparse_reaction(rid) try: dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T) print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0) counter += 1 except MissingCompoundFormationEnergy as e: #print "R%05d: missing formation energy of C%05d" % (rid, e.cid) pass print "Managed to calculate the dG0 of %d reactions" % counter elif False: util._mkdir("../res/nist/fig") csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w")) cid_set = set() for row in nist.data: sparce_reaction = row['sparse'] cid_set.update(sparce_reaction.keys()) html_writer.write("<table border=1>\n") for cid in sorted(list(cid_set)): html_writer.write(" <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid))) try: mol = grad.kegg.cid2mol(cid) img_fname = '../res/nist/fig/C%05d.png' % cid html_writer.embed_img(img_fname, "C%05d" % cid) mol.draw(show=False, filename=img_fname) except AssertionError as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) except KeggParseException as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) html_writer.write("</td><td>") if (cid in alberty.cid2pmap_dict): for (nH, z) in alberty.cid2pmap_dict[cid].keys(): html_writer.write("(nH=%d, z=%d)<br>" % (nH, z)) csv_writer.writerow((cid, nH, z)) else: nH = grad.kegg.cid2num_hydrogens(cid) z = grad.kegg.cid2charge(cid) html_writer.write("unknown pseudoisomers<br>") html_writer.write("(nH=%d, z=%d)" % (nH, z)) csv_writer.writerow((cid, nH, z)) html_writer.write("</td></tr>\n") html_writer.write("</table>\n") html_writer.close()
value_array = hstack([value_array, value]) if len(time_array) > 0: last_t = time_array.max() return (time_array, value_array) name_list = ["2010-12-16 rubisco prk"] vp_vec = [] for name in name_list: vp = TecanParser() vp.parse_excel("../data/tecan/%s.xls" % (name)) vp_vec.append(vp) util._mkdir("../res/tecan") pp = PdfPages("../res/tecan/%s.pdf" % name_list[0]) # rcParams['text.usetex'] = True rcParams["legend.fontsize"] = 12 # rcParams['font.family'] = 'sans-serif' # rcParams['font.size'] = 8 # rcParams['lines.linewidth'] = 0.3 # rcParams['lines.markersize'] = 2 # rcParams['figure.figsize'] = [5, 10] # rcParams['figure.subplot.hspace'] = 0.3 # figure() plot_growth_rate = True fit_window_size = 1.5 # hours fit_start_threshold = 0.01
#print sorted(s) contributions, _K = LinearRegression.LeastSquares(full_matrix, dG_vector) for i, group_name in enumerate(all_group_names): print "%s,%.3f" % (group_name, contributions[i, 0]) pyplot.plot(dG_vector, dG_vector - np.dot(full_matrix, contributions), '.') pyplot.show() if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == 'test': TestGroupMatrix() sys.exit(0) _mkdir('../res') db = SqliteDatabase('../res/gibbs.sqlite', 'w') H_nopka = Hatzi(use_pKa=False) H_withpka = Hatzi(use_pKa=True) H_withpka.ToDatabase(db, 'hatzi_thermodynamics') #H.ToDatabase(db, 'hatzi_gc') #H.I = 0.25 #H.T = 300; #sparse_reaction = {13:-1, 1:-1, 9:2} #sparse_reaction = {36:-1, 3981:1} #sparse_reaction = {6:-1, 143:-1, 234:1, 5:1} #sparse_reaction = {1:-1, 499:-1, 603:1, 86:1} #sparse_reaction = {1:-1, 6:-1, 311:-1, 288:1, 5:1, 80:2, 26:1} #sparse_reaction = {408:-1, 6:-1, 4092:1, 5:1} #sparse_reaction = {588:-1, 1:-1, 114:1, 9:1}
def Main(): options, _ = MakeOpts().parse_args(sys.argv) estimators = thermodynamic_estimators.LoadAllEstimators() input_filename = path.abspath(options.input_filename) if not path.exists(input_filename): logging.fatal('Input filename %s doesn\'t exist' % input_filename) print 'Will read pathway definitions from %s' % input_filename # Make thermodynamic and kinetic data containers thermo = estimators[options.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name thermo_data = thermodynamic_data.WrapperThermoData(thermo) # Uniform kinetic data kin_data = kinetic_data.UniformKineticData(kcat=100, km=1e-4) # Create a kegg instance kegg_instance = kegg.Kegg.getInstance() # Create output directories out_dir = options.output_dir if not path.exists(out_dir): util._mkdir(out_dir) pathgraph_dir = path.join(out_dir, 'pathway_graphs/') util._mkdir(pathgraph_dir) print 'Executing Protein Cost analysis' pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename) mtdfs = [] protein_scores = [] names = [] num_atp = [] path_lengths = [] for pathway_data in pathway_iterator: if pathway_data.skip: print 'Skipping pathway', pathway_data.name continue print 'Analyzing pathway', pathway_data.name model = pathway_data.GetStoichiometricModel(kegg_instance) model_bounds = pathway_data.GetBounds() protein_opt = protein_optimizer.ProteinOptimizer(model, thermo_data, kin_data) mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data) # Solve MTDF. mtdf_res = mtdf_opt.FindMTDF(model_bounds) mtdf_status = mtdf_res.status if mtdf_status.IsFailure() or mtdf_status.IsInfeasible(): print '\tFailed to optimize', pathway_data.name continue # Solve protein. protein_res = protein_opt.FindOptimum(model_bounds) protein_status = protein_res.status if protein_status.IsFailure() or protein_status.IsInfeasible(): print '\tFailed to optimize', pathway_data.name continue mtdfs.append(mtdf_res.opt_val) protein_scores.append(protein_res.opt_val) names.append(model.name) net_reaction = mtdf_res.net_reaction.sparse atp_produced = net_reaction.get(2, 0) num_atp.append(atp_produced) path_lengths.append(len(mtdf_res.reaction_ids)) pylab.figure() pylab.title(model.name) dGr0_tag = mtdf_res.dGr0_tag.flatten().tolist() dgmtdf = mtdf_res.dGr_tag.flatten().tolist() dgprotein = protein_res.dGr_tag.flatten().tolist() dgbio = mtdf_res.dGr_bio.flatten().tolist() dg0_profile = np.cumsum([0] + dGr0_tag) dgmtdf_profile = np.cumsum([0] + dgmtdf) dgprotein_profile = np.cumsum([0] + dgprotein) dgbio_profile = np.cumsum([0] + dgbio) rxn_range = pylab.arange(len(mtdf_res.reaction_ids) + 1) pylab.plot(rxn_range, dg0_profile, 'b--', linewidth=2, label='Standard Conditions') pylab.plot(rxn_range, dgbio_profile, 'c--', linewidth=2, label='Biological Conditions') mtdf_label = 'MTDF Optimized (MTDF = %.2g kJ/mol)' % mtdf_res.opt_val pylab.plot(rxn_range, dgmtdf_profile, 'r-', linewidth=2, label=mtdf_label) pc_label = 'Protein Optimized (Cost = %.2g)' % protein_res.opt_val pylab.plot(rxn_range, dgprotein_profile, 'g-', linewidth=2, label=pc_label) pylab.xticks(rxn_range[:-1] + 0.5, mtdf_res.reaction_ids) pylab.xlabel('Reaction step') pylab.ylabel('Cumulative dG (kJ/mol)') pylab.legend(loc='upper right', prop=LEGEND_FONT) pylab.figure() pylab.plot(num_atp, protein_scores, 'b.') #pylab.xlabel('MTDF (kJ/mol)') pylab.xlabel('Net ATP Production') pylab.ylabel('Protein Cost') for x,y,s in zip(num_atp, protein_scores, names): pylab.text(x, y, s, fontsize=10) max_protein = np.max(protein_scores) pylab.plot([0,0], [0,max_protein], 'r--', label='0 ATP Produced') pylab.plot([1,1], [0,max_protein], 'g--', label='1 ATP Produced') pylab.plot([2,2], [0,max_protein], 'b--', label='2 ATP Produced') #pylab.yscale('log') pylab.xticks([]) pylab.xlim((-1, 3)) pylab.legend() odbs = np.tanh(np.array(mtdfs) / (2*RT)) pylab.figure() pylab.plot(protein_scores, odbs, 'b.') pylab.xlabel('Protein Cost') pylab.ylabel('ODB (unitless)') #for x,y,s in zip(protein_scores, length_scaled_cost, names): # pylab.text(x, y, s, fontsize=10) pylab.show()
def find_path(self, experiment_name, net_reaction): """Find a pathway from the source to the target. Args: experiment_name: a name given to this experiment. net_reaction: a Reaction describing the net reaction for the desired paths """ dirname = os.path.join('../res/pathologic/', experiment_name) logging.info('Writing output to: %s' % dirname) util._mkdir(dirname) self.html_writer.write('<a href="pathologic/' + experiment_name + '.html">' + experiment_name + '</a><br>\n') exp_html = HtmlWriter('../res/pathologic/' + experiment_name + '.html') exp_html.write("<p><h1>%s</h1>\n" % experiment_name) exp_html.insert_toggle(div_id="__parameters__", start_here=True, label='Show Parameters') f, S, compounds, reactions = self.kegg_pathologic.get_unique_cids_and_reactions() exp_html.write('<h2>Conditions:</h2>\n') exp_html.write_ul(['Optimization method: %s' % self.thermodynamic_method, 'Concentration range: %g M < C < %g M' % (self.thermo.c_range[0], self.thermo.c_range[1]), "Max Δ<sub>r</sub>G' = %.1f" % self.maximal_dG, 'pH = %g' % self.thermo.pH, 'I = %g' % self.thermo.I, 'T = %g' % self.thermo.T, 'Max no. reactions: %d' % (self.max_reactions or -1), 'Max no. solutions: %d' % (self.max_solutions or -1), 'Overall Reaction: %s' % net_reaction.to_hypertext(), '%d reactions' % len(reactions), '%d unique compounds' % len(compounds)]) exp_html.div_end() exp_html.write('</br>\n') logging.debug("All compounds:") for i, compound in enumerate(compounds): logging.debug("%05d) C%05d = %s" % (i, compound.cid, compound.name)) logging.debug("All reactions:") for i, reaction in enumerate(reactions): logging.debug("%05d) R%05d = %s" % (i, reaction.rid, str(reaction))) output_kegg_file = open(dirname + '/kegg_pathway.txt', 'w') exp_html.write('<a href="%s/kegg_pathway.txt">All solutions in KEGG format</a></br>\n' % experiment_name) # Find a solution with a minimal total flux logging.info("Preparing LP solver for the minimal total flux problem") exp_html.write('<b>Minimum flux</b>') slip = Stoichiometric_LP("Pathologic") slip.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction) slip.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, 0)) exp_html.write(' (<a href="%s/%03d_lp.txt">LP file</a>): ' % (experiment_name, 0)) logging.info("Solving") if not slip.solve(): exp_html.write("<b>There are no solutions!</b>") logging.warning("There are no solutions. Quitting!") return logging.info("writing solution") self.write_current_solution(exp_html, slip, experiment_name) logging.info("Preparing MILP solver") milp = Stoichiometric_LP("Pathologic") milp.solution_index = 1 milp.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction) milp.add_milp_variables() if self.max_reactions is not None: milp.add_reaction_num_constraint(self.max_reactions) if self.thermodynamic_method == OptimizationMethods.LOCALIZED: milp.add_localized_dGf_constraints(self.thermo) else: milp.add_dGr_constraints(self.thermo, optimization=self.thermodynamic_method, maximal_dG=self.maximal_dG) index = 0 while (self.max_solutions is None) or (index < self.max_solutions): index += 1 # create the MILP problem to constrain the previous solutions not to reappear again. logging.info("Round %03d, solving using MILP" % (milp.solution_index)) milp.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, milp.solution_index)) exp_html.write('<b>Solution #%d</b> (<a href="%s/%03d_lp.txt">LP file</a>): ' % (index, experiment_name, index)) if not milp.solve(): exp_html.write("<b>No solution found</b>") logging.info("No more solutions. Quitting!") break logging.info("writing solution") self.write_current_solution(exp_html, milp, experiment_name, output_kegg_file) milp.ban_current_solution() output_kegg_file.close() exp_html.close()
def Main(): options, _ = MakeOpts().parse_args(sys.argv) assert options.experiment_id assert options.first_plate_ids and options.second_plate_ids assert options.culture_label and options.reporter_label assert options.output_dir if not path.exists(options.output_dir): util._mkdir(options.output_dir) imgs_path = path.join(options.output_dir, 'imgs/') if not path.exists(imgs_path): util._mkdir(imgs_path) first_plate_ids = map(str.strip, options.first_plate_ids.split(',')) second_plate_ids = map(str.strip, options.second_plate_ids.split(',')) labels_to_ignore = set() for l in options.labels_to_ignore.split(','): labels_to_ignore.add(l.strip()) print 'Reading plates from experiment %s' % (options.experiment_id) db = MySQLDatabase(host='hldbv02', user='******', passwd='a1a1a1', db='tecan') filterer = promoter_activity.CultureReporterFilterer( options.min_culture_level, options.min_reporter_level) reporter_bg_sub = promoter_activity.ReporterBackgroundSubtracter( options.background_label) culture_shifter = promoter_activity.CultureShifter() activity_calc = promoter_activity.ReporterActivityCalculator( options.lower_culture_bound, options.upper_culture_bound, min_reporter_level=options.lower_reporter_bound, window_size=options.window_size) first_plate_runners = [] second_plate_runners = [] print 'Calculating promoter activities for first condition' runner1 = PlateActivityRunner(options.culture_label, options.reporter_label, filterer, culture_shifter, reporter_bg_sub, activity_calc) for plate_id in first_plate_ids: plate = Plate96.FromDatabase(db, options.experiment_id, plate_id) runner1.AddPlate(plate) runner1.Run() first_plate_runners.append(runner1) print 'Calculating promoter activities for second condition' runner2 = PlateActivityRunner(options.culture_label, options.reporter_label, filterer, culture_shifter, reporter_bg_sub, activity_calc) for plate_id in second_plate_ids: plate = Plate96.FromDatabase(db, options.experiment_id, plate_id) runner2.AddPlate(plate) runner2.Run() second_plate_runners.append(runner2) # Unify strain data. print 'Saving figures' strains_data = StrainConditionsData(options.background_label) for plate_data in first_plate_runners: strains_data.AddPlateData('Glucose', plate_data, ignore_labels=labels_to_ignore) for plate_data in second_plate_runners: strains_data.AddPlateData('Gluconate', plate_data, ignore_labels=labels_to_ignore) strains_data.MakeStrainFigures(imgs_path) summary_fignames = strains_data.MakeSummaryFigures(imgs_path, 'Glucose', 'Gluconate') plate_fignames = strains_data.MakePerPlateFigures(imgs_path) labels = strains_data.GetStrainLabels() condition1_activities, condition1_errs = strains_data.GetMeanMaxActivities( labels, 'Glucose') condition2_activities, condition2_errs = strains_data.GetMeanMaxActivities( labels, 'Gluconate') log_1 = np.log2(condition1_activities) log_2 = np.log2(condition2_activities) diffs = log_2 - log_1 sorted_diffs = list(np.argsort(diffs)) sorted_diffs.reverse() diffs_data = [] for i in sorted_diffs: logfold = diffs[i] fold = np.exp2(logfold) if np.isnan(logfold): logfold = None fold = None diffs_data.append({ 'label': labels[i], 'fold_change': fold, 'log_fold': logfold }) # Render the template. print 'Writing HTML output' template_data = { 'experiment_id': options.experiment_id, 'first_plate_ids': first_plate_ids, 'second_plate_ids': second_plate_ids, 'culture_label': options.culture_label, 'reporter_label': options.reporter_label, 'first_plates': first_plate_runners, 'second_plates': second_plate_runners, 'strains_data': strains_data, 'diffs_data': diffs_data, 'summary_figure_fnames': summary_fignames, 'per_plate_figure_fnames': plate_fignames } template_fname = path.join(options.output_dir, 'results.html') templates.render_to_file('compare_promoter_activities.html', template_data, template_fname) return
help='Dump all training data to text files') parser.add_argument('-e', '--test', action='store_true', default=False, help='A flag for running the TEST') parser.add_argument('-l', '--leave_one_out', action='store_true', default=False, help='A flag for running the Leave One Out analysis') return parser if __name__ == "__main__": logger = logging.getLogger('') logger.setLevel(logging.DEBUG) parser = MakeOpts() args = parser.parse_args() util._mkdir('../res') db = SqliteDatabase('../res/gibbs.sqlite', 'w') html_writer = HtmlWriter('../res/ugc.html') ugc = UnifiedGroupContribution(db, html_writer, anchor_all=args.anchor_all_formations) ugc.LoadGroups(FromDatabase=(not args.recalc_groups)) ugc.LoadObservations(FromDatabase=(not args.recalc_observations)) ugc.LoadGroupVectors(FromDatabase=(not args.recalc_groupvectors)) ugc.LoadData(FromDatabase=(not args.recalc_matrices)) if args.dump: ugc.SaveDataToMatfile() sys.exit(0) if args.train: ugc.EstimateKeggCids()
continue if self.override_pMg or self.override_I or self.override_T: nist_row_copy = nist_row_data.Clone() if self.override_pMg: nist_row_copy.pMg = self.override_pMg if self.override_I: nist_row_copy.I = self.override_I if self.override_T: nist_row_copy.T = self.override_T rows.append(nist_row_copy) else: rows.append(nist_row_data) return rows def GetUniqueReactionSet(self): return set([row.reaction for row in self.data]) if __name__ == '__main__': #logging.getLogger('').setLevel(logging.DEBUG) _mkdir("../res/nist") html_writer = HtmlWriter("../res/nist/statistics.html") nist = Nist() fp = open('../res/nist_kegg_ids.txt', 'w') for cid in nist.GetAllCids(): fp.write("C%05d\n" % cid) fp.close() nist.AnalyzeStats(html_writer) nist.AnalyzeConnectivity(html_writer) html_writer.close()
value_array = hstack([value_array, value]) if (len(time_array) > 0): last_t = time_array.max() return (time_array, value_array) name_list = ["2010-12-16 rubisco prk"] vp_vec = [] for name in name_list: vp = TecanParser() vp.parse_excel("../data/tecan/%s.xls" % (name)) vp_vec.append(vp) util._mkdir('../res/tecan') pp = PdfPages('../res/tecan/%s.pdf' % name_list[0]) #rcParams['text.usetex'] = True rcParams['legend.fontsize'] = 12 #rcParams['font.family'] = 'sans-serif' #rcParams['font.size'] = 8 #rcParams['lines.linewidth'] = 0.3 #rcParams['lines.markersize'] = 2 #rcParams['figure.figsize'] = [5, 10] #rcParams['figure.subplot.hspace'] = 0.3 #figure() plot_growth_rate = True fit_window_size = 1.5 # hours fit_start_threshold = 0.01
args, _ = MakeOpts(estimators).parse_args(sys.argv) input_filename = os.path.abspath(args.input_filename) output_filename = os.path.abspath(args.output_filename) if not os.path.exists(input_filename): logging.fatal('Input filename %s doesn\'t exist' % input_filename) print 'Will read pathway definitions from %s' % input_filename print 'Will write output to %s' % output_filename db_loc = args.db_filename print 'Reading from DB %s' % db_loc db = SqliteDatabase(db_loc) thermo = estimators[args.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name kegg = Kegg.getInstance() thermo.bounds = deepcopy(kegg.cid2bounds) dirname = os.path.dirname(output_filename) if not os.path.exists(dirname): print 'Making output directory %s' % dirname _mkdir(dirname) print 'Executing thermodynamic pathway analysis' html_writer = HtmlWriter(output_filename) thermo_analyze = ThermodynamicAnalysis(db, html_writer, thermodynamics=thermo) thermo_analyze.analyze_pathway(input_filename)
concatenates the data into one series and returns it as if it was from one source. """ time_array = array([]) value_array = array([]) last_t = 0 for vp in vp_vec: (time, value) = vp.get_data(index, row, col) time_array = hstack([time_array, time + last_t]) value_array = hstack([value_array, value]) if (len(time_array) > 0): last_t = time_array.max() return (time_array, value_array) _mkdir('../res/victor') vp_vec = [] for name in ["peter_glugly"]: vp = VictorParser() vp.parse_excel("../data/victor/%s.xls" % (name)) vp_vec.append(vp) pp = PdfPages('../res/victor/2011-02-22_peter_glugly.pdf') #rcParams['text.usetex'] = True rcParams['legend.fontsize'] = 12 #rcParams['font.family'] = 'sans-serif' #rcParams['font.size'] = 8 #rcParams['lines.linewidth'] = 0.3 #rcParams['lines.markersize'] = 2
csv_reader = csv.reader(open(fname, 'r')) csv_reader.next() R_calc = [] for row in csv_reader: (Name, Start_Position, Expression_Level, Kinetic_Score, Sequence) = row R_calc.append(float(Expression_Level)) return array(R_calc) ################################################################################ ################################################################################ ################################################################################ ################################################################################ #(rbs, promoters, data_facs) = read_data('data_FACS.csv') #(rbs, promoters, data_single) = read_data('data_flu-OD_single.csv') #(rbs, promoters, data_multi) = read_data('data_flu-OD_multi.csv') util._mkdir('../res/pro_rbs') rbs_score_fname = '../res/pro_rbs/rbs_2010-08-18_17-50-19_133.csv' if (os.path.exists(rbs_score_fname)): R_calc = read_rbs_calc_results(rbs_score_fname) else: sys.stderr.write("The RBS calculator score file could not be found, you must " + \ "generate it using the 'rbs-calc' website and put it here: %s" % rbs_score_fname) R_calc = None html = HtmlWriter('../res/pro_rbs/fit.html') (b, R, P) = fit_params('data_flu-OD_multi', html, R_calc)
def WriteUniqueReactionReport(self, unique_sparse_reactions, unique_nist_row_representatives, unique_data_mat, full_data_mat, cid2nH_nMg=None): total_std = full_data_mat[2:4, :].std(1) fig = plt.figure() plt.plot(unique_data_mat[2, :].T, unique_data_mat[3, :].T, '.') plt.xlabel("$\sigma(\Delta_r G^\circ)$") plt.ylabel("$\sigma(\Delta_r G^{\'\circ})$") plt.title('$\sigma_{total}(\Delta_r G^\circ) = %.1f$ kJ/mol, ' '$\sigma_{total}(\Delta_r G^{\'\circ}) = %.1f$ kJ/mol' % (total_std[0, 0], total_std[1, 0])) self.html_writer.embed_matplotlib_figure(fig, width=640, height=480) logging.info('std(dG0_r) = %.1f' % total_std[0, 0]) logging.info('std(dG\'0_r) = %.1f' % total_std[1, 0]) rowdicts = [] for i, reaction in enumerate(unique_sparse_reactions): logging.debug('Analyzing unique reaction: ' + str(unique_sparse_reactions[i])) ddG0 = self.GetDissociation().ReverseTransformReaction(reaction, pH=7, I=0.1, pMg=10, T=298.15, cid2nH_nMg=cid2nH_nMg) d = {} d["_reaction"] = reaction.to_hypertext(show_cids=False) d["reaction"] = reaction.FullReactionString(show_cids=False) # no hypertext for the CSV output d["Reference ID"] = unique_nist_row_representatives[i].ref_id d["EC"] = unique_nist_row_representatives[i].ec d["E(" + symbol_dr_G0 + ")"] = unique_data_mat[0, i] d["E(" + symbol_dr_G0_prime + ")"] = unique_data_mat[1, i] d["E(" + symbol_dr_G0 + ")'"] = unique_data_mat[0, i] + ddG0 d["std(" + symbol_dr_G0 + ")"] = unique_data_mat[2, i] d["std(" + symbol_dr_G0_prime + ")"] = unique_data_mat[3, i] d["diff"] = unique_data_mat[2, i] - unique_data_mat[3, i] d["#observations"] = "%d" % unique_data_mat[4, i] flag = 0 c_nad = reaction.sparse.get(3, 0) c_nadh = reaction.sparse.get(4, 0) c_nadp = reaction.sparse.get(6, 0) c_nadph = reaction.sparse.get(5, 0) if c_nad == 1 and c_nadh == -1: flag = 1 elif c_nad == -1 and c_nadh == 1: flag = -1 elif c_nadp == 1 and c_nadph == -1: flag = 2 elif c_nadp == -1 and c_nadph == 1: flag = -2 d["Arren Flag"] = flag if d["diff"] > self.std_diff_threshold: _mkdir('../res/prc_reactions') link = "prc_reactions/%s.html" % reaction.name d["analysis"] = '<a href="%s">link</a>' % link reaction_html_writer = HtmlWriter(os.path.join('../res', link)) self.AnalyzeSingleReaction(reaction, html_writer=reaction_html_writer) rowdicts.append(d) result_headers = ["E(" + symbol_dr_G0 + ")", "E(" + symbol_dr_G0_prime + ")", "E(" + symbol_dr_G0 + ")'", "std(" + symbol_dr_G0 + ")", "std(" + symbol_dr_G0_prime + ")"] rowdicts.sort(key=lambda x:x["diff"], reverse=True) self.html_writer.write_table(rowdicts, ["reaction", "Reference ID"] + result_headers + ["EC", "#observations", "analysis"], decimal=1) csv_writer = csv.DictWriter(open('../res/nist_regression_unique.csv', 'w'), ["_reaction", "Reference ID", "EC", "#observations"] + result_headers + ['Arren Flag'], extrasaction='ignore') csv_writer.writeheader() csv_writer.writerows(rowdicts)
""" well = (row, col) time_list = [] value_list = [] for time, value in sorted(MES[reading_label][plate_id][well].iteritems()): time_list.append(time) value_list.append(value) time_array = array(time_list) if len(time_list): time_array = (time_array - time_list[0]) / 3600 return time_array, array(value_list) MES = CollectData("../data/tecan/PL6-96.tar.gz", number_of_plates=4) _mkdir('../res/tecan') pp = PdfPages('../res/tecan/2011-02-06_PL6-96.pdf') #rcParams['text.usetex'] = True rcParams['legend.fontsize'] = 12 #rcParams['font.family'] = 'sans-serif' #rcParams['font.size'] = 8 #rcParams['lines.linewidth'] = 0.3 #rcParams['lines.markersize'] = 2 #rcParams['figure.figsize'] = [5, 10] #rcParams['figure.subplot.hspace'] = 0.3 #figure() plot_growth_rate = False fit_window_size = 1.5 # hours fit_start_threshold = 0.01
(time, value) = vp.get_data(index, row, col) time_array = hstack([time_array, time + last_t]) value_array = hstack([value_array, value]) if len(time_array) > 0: last_t = time_array.max() return (time_array, value_array) name = "2010-10-07_K12-acetate" vp_vec = [] vp = VictorParser() vp.parse_excel("../data/victor/%s.xls" % (name)) vp_vec.append(vp) util._mkdir("../res/victor") pp = PdfPages("../res/victor/%s.pdf" % name) # rcParams['text.usetex'] = True rcParams["legend.fontsize"] = 12 # rcParams['font.family'] = 'sans-serif' # rcParams['font.size'] = 8 # rcParams['lines.linewidth'] = 0.3 # rcParams['lines.markersize'] = 2 # rcParams['figure.figsize'] = [5, 10] # rcParams['figure.subplot.hspace'] = 0.3 # figure() fit_window_size = 5 # hours fit_start_threshold = 0.002
def Main(): np.seterr('raise') parser = MakeOpts() args = parser.parse_args() estimators = thermodynamic_estimators.LoadAllEstimators() input_filename = path.abspath(args.input_filename) if not path.exists(input_filename): logging.fatal('Input filename %s doesn\'t exist' % input_filename) print 'Will read pathway definitions from %s' % input_filename # Make thermodynamic and kinetic data containers thermo = estimators[args.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name thermo_data = thermodynamic_data.WrapperThermoData(thermo) # Fetch kinetic data. kin_data = kinetic_data.UniformKineticData(kcat=200, km=2e-4, mass=40) if args.kinetics_filename is not None: print 'Parsing kinetic data from', args.kinetics_filename kin_data = kinetic_data.KineticDataWithDefault.FromArrenFile( args.kinetics_filename) """ kin_data = kinetic_data.KineticDataWithDefault.FromFiles( '../data/enzymatics/glycolytic_pathway_enzymes_kcat.csv', '../data/enzymatics/glycolytic_pathway_enzymes_km.csv') kin_data.SetDefaultKcat(100) kin_data.SetDefaultKM(1e-4) kin_data.SetDefaultMass(35) """ # Create a kegg instance kegg_instance = kegg.Kegg.getInstance() # Create output directories out_dir = args.output_dir if not path.exists(out_dir): util._mkdir(out_dir) pathgraph_dir = path.join(out_dir, 'pathway_graphs/') util._mkdir(pathgraph_dir) print 'Executing Protein Cost analysis' pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename) results = [] for pathway_data in pathway_iterator: if pathway_data.skip: print 'Skipping pathway', pathway_data.name continue print 'Analyzing pathway', pathway_data.name model = pathway_data.GetStoichiometricModel(kegg_instance) model_bounds = pathway_data.GetBounds() opt = protein_optimizer.ProteinOptimizer(model, thermo_data, kin_data) it = feasible_concentrations_iterator.FeasibleConcentrationsIterator( model, thermo_data, model_bounds) # Now solve with the default initial conditions. success = None result = None optima = [] for i, x0 in enumerate(it): result = opt.FindOptimum(model_bounds, initial_concentrations=x0) status = result.status print '\t%s optimization %d' % (pathway_data.name, i) if status.failure: print '\tFailed to optimize', pathway_data.name print '\t%s' % status elif status.infeasible: print '\t', pathway_data.name, 'is infeasible!' print '\t%s' % status else: print '\t*Optimization successful' optima.append(result.opt_val) if not success: success = result elif result.opt_val < success.opt_val: success = result mean, error = None, None if optima: try: mean, error = stats.MeanWithConfidenceInterval(optima) except Exception, e: mean, error = None, None print optima result_dict = {'result': None, 'num_optima': len(optima), 'mean_opt': mean, 'error': error} if success is not None: success.WriteAllGraphs(pathgraph_dir) result_dict['result'] = success cost = success.opt_val if cost is not None: print '\t*Protein Cost for', pathway_data.name, '= %.2g' % cost if optima: print 'Found', len(optima), 'near-optima for', pathway_data.name optima = np.array(optima) mean_opt = np.mean(optima) mean_diff = np.mean(np.abs(optima - mean_opt)) print 'Mean optimum', mean_opt print 'Mean diff from mean', mean_diff print 'Percent diff %s%%' % (100*mean_diff / mean_opt) print 'StdDev opt', np.std(optima) else: # Use default conditions to show the failure res = opt.FindOptimum(model_bounds) result_dict['result'] = res results.append(result_dict)
(time, value) = vp.get_data(index, row, col) time_array = hstack([time_array, time + last_t]) value_array = hstack([value_array, value]) if (len(time_array) > 0): last_t = time_array.max() return (time_array, value_array) name = "2010-10-07_K12-acetate" vp_vec = [] vp = VictorParser() vp.parse_excel("../data/victor/%s.xls" % (name)) vp_vec.append(vp) util._mkdir('../res/victor') pp = PdfPages('../res/victor/%s.pdf' % name) #rcParams['text.usetex'] = True rcParams['legend.fontsize'] = 12 #rcParams['font.family'] = 'sans-serif' #rcParams['font.size'] = 8 #rcParams['lines.linewidth'] = 0.3 #rcParams['lines.markersize'] = 2 #rcParams['figure.figsize'] = [5, 10] #rcParams['figure.subplot.hspace'] = 0.3 #figure() fit_window_size = 5 # hours fit_start_threshold = 0.002
concatenates the data into one series and returns it as if it was from one source. """ well = (row, col) time_list = [] value_list = [] for time, value in sorted(MES[reading_label][plate_id][well].iteritems()): time_list.append(time) value_list.append(value) time_array = array(time_list) if len(time_list): time_array = (time_array - time_list[0])/3600 return time_array, array(value_list) MES = CollectData("../data/tecan/PL6-96.tar.gz", number_of_plates=4) _mkdir('../res/tecan') pp = PdfPages('../res/tecan/2011-02-06_PL6-96.pdf') #rcParams['text.usetex'] = True rcParams['legend.fontsize'] = 12 #rcParams['font.family'] = 'sans-serif' #rcParams['font.size'] = 8 #rcParams['lines.linewidth'] = 0.3 #rcParams['lines.markersize'] = 2 #rcParams['figure.figsize'] = [5, 10] #rcParams['figure.subplot.hspace'] = 0.3 #figure() plot_growth_rate = False fit_window_size = 1.5 # hours fit_start_threshold = 0.01