예제 #1
0
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    estimators = thermodynamic_estimators.LoadAllEstimators()
    
    input_filename = path.abspath(options.input_filename)
    if not path.exists(input_filename):
        logging.fatal('Input filename %s doesn\'t exist' % input_filename)
        
    print 'Will read pathway definitions from %s' % input_filename
    
    thermo = estimators[options.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name
    thermo_data = thermodynamic_data.WrapperThermoData(thermo)
    
    # Create a bounds instance
    kegg_instance = kegg.Kegg.getInstance()

    # Create output directories
    out_dir = options.output_dir
    if not path.exists(out_dir):
        util._mkdir(out_dir)
    pathgraph_dir = path.join(out_dir, 'pathway_graphs/')
    util._mkdir(pathgraph_dir)
    
    print 'Executing MTDF analysis'
    pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename)
    results = []
    for pathway_data in pathway_iterator:
        if pathway_data.skip:
            print 'Skipping pathway', pathway_data.name
            continue
        
        print 'Analyzing pathway', pathway_data.name
                
        model = pathway_data.GetStoichiometricModel(kegg_instance)
        model_bounds = pathway_data.GetBounds()
        
        mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data)
        result = mtdf_opt.FindMTDF(model_bounds)
        
        print 'Optimization status', result.status
        
        result.WriteAllGraphs(pathgraph_dir)
        results.append(result)
        
        mtdf = result.opt_val
        print '\tMTDF for', pathway_data.name, '= %.2g' % mtdf
    
    
    output_filename = path.join(out_dir, 'results.html')
    print 'Writing output to', output_filename
    template_data = {'analysis_type': 'MTDF',
                     'results':results}
    templates.render_to_file('pathway_optimization_results.html',
                             template_data,
                             output_filename)
예제 #2
0
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)
    
    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default pH")
    data = GetAllOBDs(pathway_list, html_writer, thermo,
                  pH=None, section_prefix="test", balance_water=True,
                  override_bounds={})
    
    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH'] + pathway_names)
    
    util._mkdir(output_prefix)
    shadow_csvs = {}
    for d in data:
        path = '%s/%s.csv' % (output_prefix, d['entry'])
        shadow_csvs[d['entry']] = csv.writer(open(path, 'w'))
        shadow_csvs[d['entry']].writerow(['pH'] + d['rids'])

    pH_vec = ParseConcentrationRange(conc_range)
    obd_mat = []
    for pH in pH_vec.flat:
        logging.info("pH = %.1f" % (pH))
        data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo,
                      pH=pH, section_prefix="", balance_water=True,
                      override_bounds={})
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH']] + obds)
        
        for d in data:
            if type(d['reaction prices']) != types.FloatType:
                prices = list(d['reaction prices'].flat)
                shadow_csvs[d['entry']].writerow([pH] + prices)
            
    obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], 
                 figure=fig)
    plt.title("OBD vs. pH", figure=fig)
    plt.ylim(0, np.max(obd_mat.flat))
    plt.xlabel('pH', figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)
    
    html_writer.close()
예제 #3
0
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    estimators = thermodynamic_estimators.LoadAllEstimators()

    input_filename = path.abspath(options.input_filename)
    if not path.exists(input_filename):
        logging.fatal('Input filename %s doesn\'t exist' % input_filename)

    print 'Will read pathway definitions from %s' % input_filename

    thermo = estimators[options.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name
    thermo_data = thermodynamic_data.WrapperThermoData(thermo)

    # Create a bounds instance
    kegg_instance = kegg.Kegg.getInstance()

    # Create output directories
    out_dir = options.output_dir
    if not path.exists(out_dir):
        util._mkdir(out_dir)
    pathgraph_dir = path.join(out_dir, 'pathway_graphs/')
    util._mkdir(pathgraph_dir)

    print 'Executing MTDF analysis'
    pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename)
    results = []
    for pathway_data in pathway_iterator:
        if pathway_data.skip:
            print 'Skipping pathway', pathway_data.name
            continue

        print 'Analyzing pathway', pathway_data.name

        model = pathway_data.GetStoichiometricModel(kegg_instance)
        model_bounds = pathway_data.GetBounds()

        mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data)
        result = mtdf_opt.FindMTDF(model_bounds)

        print 'Optimization status', result.status

        result.WriteAllGraphs(pathgraph_dir)
        results.append(result)

        mtdf = result.opt_val
        print '\tMTDF for', pathway_data.name, '= %.2g' % mtdf

    output_filename = path.join(out_dir, 'results.html')
    print 'Writing output to', output_filename
    template_data = {'analysis_type': 'MTDF', 'results': results}
    templates.render_to_file('pathway_optimization_results.html',
                             template_data, output_filename)
예제 #4
0
 def PlotHistogram(self, filename):
     dirname = os.path.dirname(filename)
     if not os.path.exists(dirname):
         util._mkdir(dirname)
     
     fig = pylab.figure()
     
     c = list(self.counts.values())
     pylab.title('Count Per Group Histogram')
     pylab.xlabel('Count')
     pylab.ylabel('Number of Groups')
     pylab.hist(c, pylab.arange(0, max(c), 1))
     fig.savefig(filename, format='png')
예제 #5
0
    def PlotHistogram(self, filename):
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            util._mkdir(dirname)

        fig = pylab.figure()

        c = list(self.counts.values())
        pylab.title('Count Per Group Histogram')
        pylab.xlabel('Count')
        pylab.ylabel('Number of Groups')
        pylab.hist(c, pylab.arange(0, max(c), 1))
        fig.savefig(filename, format='png')
예제 #6
0
def render_to_file(template_name, data, output_filename):
    """Renders a template to a given file.
    
    Will create the parent directory of the output file if not present.
    
    Args:
        template_name: the name of a template file in pygibbs/templates.
        data: a dictionary of template data.
        output_filename: the name/path of the file to write to.
    """
    dir = os.path.abspath(os.path.dirname(output_filename))
    if not os.path.exists(dir):
        util._mkdir(dir)

    open(output_filename, 'w').write(render_to_string(template_name, data))
예제 #7
0
 def __init__(self, filename, force_path_creation=True, flush_always=True):
     from toolbox.util import _mkdir
     BaseHtmlWriter.__init__(self)
     self.filename = filename
     self.filepath = os.path.dirname(filename)
     self.flush_always = flush_always
     if (not os.path.exists(self.filepath)):
         if (force_path_creation and not os.path.exists(self.filepath)):
             _mkdir(self.filepath)
         else:
             raise Exception("cannot write to HTML file %s since the directory doesn't exist" % filename)
     
     self.file = open(self.filename, "w")
     self.write_header()
     self.write_js(self.filepath)
예제 #8
0
def main():
    pH, pMg, I, T = (7.0, 3, 0.1, 298.15)

    db = SqliteDatabase('../res/gibbs.sqlite')
    kegg = Kegg.getInstance()
    alberty = PsuedoisomerTableThermodynamics(
        '../data/thermodynamics/alberty_pseudoisomers.csv')

    cids = alberty.get_all_cids()
    dG0_f = pylab.zeros((len(cids), 1))

    for i, cid in enumerate(cids):
        dG0_f[i, 0] = alberty.cid2dG0_tag(cid, pH=pH, pMg=pMg, I=I, T=T)

    S = pylab.zeros((0, len(cids)))
    rids = []
    ec_numbers = []

    for rid in kegg.get_all_rids():
        sparse = kegg.rid2sparse_reaction(rid)
        if not set(cids).issuperset(sparse.keys()):
            continue

        rids.append(rid)
        ec_numbers.append(kegg.rid2ec_list(rid))
        S_row = pylab.zeros((1, len(cids)))
        for cid, coeff in sparse.iteritems():
            S_row[0, cids.index(cid)] = coeff
        S = pylab.vstack([S, S_row])

    dG0_r = pylab.dot(S, dG0_f)

    util._mkdir('../res/arren')
    s_writer = csv.writer(open('../res/arren/stoichiomety.csv', 'w'))
    r_writer = csv.writer(open('../res/arren/reactions.csv', 'w'))
    e_writer = csv.writer(open('../res/arren/ec_numbers.csv', 'w'))
    r_writer.writerow(['rid', 'dG0_r'])
    e_writer.writerow(['rid', 'ec0', 'ec1', 'ec2', 'ec3'])
    for i in xrange(S.shape[0]):
        s_writer.writerow(["%d" % x for x in S[i, :]])
        for ec in ec_numbers[i].split(';'):
            e_writer.writerow(['%d' % rids[i]] + ec.split('.'))
        r_writer.writerow(["%d" % rids[i], '%.1f' % dG0_r[i, 0]])

    c_writer = csv.writer(open('../res/arren/compounds.csv', 'w'))
    c_writer.writerow(['cid', 'dG0_f'])
    for j in xrange(len(cids)):
        c_writer.writerow(['%d' % cids[j], '%.1f' % dG0_f[j, 0]])
예제 #9
0
def render_to_file(template_name, data, output_filename):
    """Renders a template to a given file.
    
    Will create the parent directory of the output file if not present.
    
    Args:
        template_name: the name of a template file in pygibbs/templates.
        data: a dictionary of template data.
        output_filename: the name/path of the file to write to.
    """
    dir = os.path.abspath(os.path.dirname(output_filename))
    if not os.path.exists(dir):
        util._mkdir(dir)
        
    open(output_filename, 'w').write(render_to_string(
        template_name, data))
예제 #10
0
def main():
    pH, pMg, I, T = (7.0, 3, 0.1, 298.15)
    
    db = SqliteDatabase('../res/gibbs.sqlite')
    kegg = Kegg.getInstance()
    alberty = PsuedoisomerTableThermodynamics('../data/thermodynamics/alberty_pseudoisomers.csv')
    
    cids = alberty.get_all_cids()
    dG0_f = pylab.zeros((len(cids), 1))

    for i, cid in enumerate(cids):
        dG0_f[i, 0] = alberty.cid2dG0_tag(cid, pH=pH, pMg=pMg, I=I, T=T)
    
    S = pylab.zeros((0, len(cids)))
    rids = []
    ec_numbers = []
    
    for rid in kegg.get_all_rids():
        sparse = kegg.rid2sparse_reaction(rid)
        if not set(cids).issuperset(sparse.keys()):
            continue
        
        rids.append(rid)
        ec_numbers.append(kegg.rid2ec_list(rid))
        S_row = pylab.zeros((1, len(cids)))
        for cid, coeff in sparse.iteritems():
            S_row[0, cids.index(cid)] = coeff
        S = pylab.vstack([S, S_row])
    
    dG0_r = pylab.dot(S, dG0_f)

    util._mkdir('../res/arren')
    s_writer = csv.writer(open('../res/arren/stoichiomety.csv', 'w'))
    r_writer = csv.writer(open('../res/arren/reactions.csv', 'w'))
    e_writer = csv.writer(open('../res/arren/ec_numbers.csv', 'w'))
    r_writer.writerow(['rid', 'dG0_r'])
    e_writer.writerow(['rid', 'ec0', 'ec1', 'ec2', 'ec3'])
    for i in xrange(S.shape[0]):
        s_writer.writerow(["%d" % x for x in S[i,:]])
        for ec in ec_numbers[i].split(';'):
            e_writer.writerow(['%d' % rids[i]] + ec.split('.'))
        r_writer.writerow(["%d" % rids[i], '%.1f' % dG0_r[i,0]])
    
    c_writer = csv.writer(open('../res/arren/compounds.csv', 'w'))
    c_writer.writerow(['cid', 'dG0_f'])
    for j in xrange(len(cids)):
        c_writer.writerow(['%d' % cids[j], '%.1f' % dG0_f[j, 0]])
예제 #11
0
    def __init__(self, filename, force_path_creation=True, flush_always=True):
        from toolbox.util import _mkdir
        BaseHtmlWriter.__init__(self)
        self.filename = filename
        self.filepath = os.path.dirname(filename)
        self.flush_always = flush_always
        if (not os.path.exists(self.filepath)):
            if (force_path_creation and not os.path.exists(self.filepath)):
                _mkdir(self.filepath)
            else:
                raise Exception(
                    "cannot write to HTML file %s since the directory doesn't exist"
                    % filename)

        self.file = open(self.filename, "w")
        self.write_header()
        self.write_js(self.filepath)
예제 #12
0
    def __init__(self, org='ecoli', db=None):
        self.db = db
        self.org = org
        self.base_dir = '../MetaCyc/' + org
        util._mkdir(self.base_dir)
 
        self.TAR_URL = 'http://brg.ai.sri.com/ecocyc/dist/flatfiles-52983746/' + org + '.tar.gz'
        self.TAR_FILE = self.base_dir + '/' + org + '.tar.gz'
       
        self.COMPOUND_FILE = self.base_dir + '/14.6/data/compounds.dat'
        self.REACTION_FILE = self.base_dir + '/14.6/data/reactions.dat'
        self.PATHWAY_FILE = self.base_dir + '/14.6/data/pathways.dat'
        self.REGULATION_FILE = self.base_dir + '/14.6/data/regulation.dat'
        
        if not self.db:
            self.FromFiles()
        elif not self.db.DoesTableExist('metacyc_' + org + '_compound'):
            self.FromFiles()
            self.ToDatabase()
        else:
            self.FromDatabase()
예제 #13
0
 def __init__(self, db, public_db, html_writer,
              thermo=None,
              thermodynamic_method='global',
              max_reactions=None,
              max_solutions=100,
              maximal_dG=0.0,
              update_file=None,
              output_kegg_file=None):
     """Initialize the Pathologic object.
     
     Args:
         db: the DB to read group contribution data from.
         html_writer: an HtmlWriter for writing output.
         thermodynamic_method: the analysis methods.
             Options are: "none", "pCr", "MTDF", "global" or "localized"
         max_reactions: the maximum number of reactions to find in a solution (use None for unlimited)
         max_solutions: the maximum number of solutions to find (use None for unlimited)
         maximal_dG: the maximum dG allowed.
             Use this to change the thermodynamic constraints to have a different
             MTDF. When set to 0, it is the usual feasibility measure.
         update_file: the file to read for KEGG updates.
     """
     assert thermodynamic_method in OptimizationMethods.ALLOWED_METHODS
     
     util._mkdir('../res/pathologic')
     
     self.html_writer = html_writer
     self.thermodynamic_method = thermodynamic_method
     self.max_reactions = max_reactions
     self.max_solutions = max_solutions
     self.maximal_dG = maximal_dG
     
     self.db_public = public_db
     self.db = db
     self.thermo = thermo
             
     self.kegg_pathologic = KeggPathologic()
     if update_file is not None:
         self.kegg_pathologic.update_database(update_file, self.html_writer)
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    assert options.experiment_id
    assert options.first_plate_ids and options.second_plate_ids
    assert options.culture_label and options.reporter_label
    assert options.output_dir
    
    if not path.exists(options.output_dir):
        util._mkdir(options.output_dir)
    
    imgs_path = path.join(options.output_dir, 'imgs/')
    if not path.exists(imgs_path):
        util._mkdir(imgs_path)

    first_plate_ids = map(str.strip, options.first_plate_ids.split(','))
    second_plate_ids = map(str.strip, options.second_plate_ids.split(','))
    
    labels_to_ignore = set()
    for l in options.labels_to_ignore.split(','):
        labels_to_ignore.add(l.strip())
    
    print 'Reading plates from experiment %s' % (options.experiment_id)
    db = MySQLDatabase(host='hldbv02', user='******', 
                       passwd='a1a1a1', db='tecan')

    filterer = promoter_activity.CultureReporterFilterer(options.min_culture_level,
                                                         options.min_reporter_level)
    reporter_bg_sub = promoter_activity.ReporterBackgroundSubtracter(
        options.background_label)
    culture_shifter = promoter_activity.CultureShifter()
    activity_calc = promoter_activity.ReporterActivityCalculator(
        options.lower_culture_bound, options.upper_culture_bound,
        min_reporter_level=options.lower_reporter_bound,
        window_size=options.window_size)

    first_plate_runners = []
    second_plate_runners = []
    print 'Calculating promoter activities for first condition'
    runner1 = PlateActivityRunner(
        options.culture_label, options.reporter_label,
        filterer, culture_shifter, reporter_bg_sub, activity_calc)
    
    for plate_id in first_plate_ids:
        plate = Plate96.FromDatabase(db, options.experiment_id, plate_id)
        runner1.AddPlate(plate)
    
    runner1.Run()
    first_plate_runners.append(runner1)

    print 'Calculating promoter activities for second condition'
    runner2 = PlateActivityRunner(
        options.culture_label, options.reporter_label,
        filterer, culture_shifter, reporter_bg_sub, activity_calc)
    
    for plate_id in second_plate_ids:
        plate = Plate96.FromDatabase(db, options.experiment_id, plate_id)
        runner2.AddPlate(plate)
    
    runner2.Run()
    second_plate_runners.append(runner2)
    
    # Unify strain data.
    print 'Saving figures'
    strains_data = StrainConditionsData(options.background_label)
    for plate_data in first_plate_runners:
        strains_data.AddPlateData('Glucose', plate_data,
                                  ignore_labels=labels_to_ignore)
    for plate_data in second_plate_runners:
        strains_data.AddPlateData('Gluconate', plate_data,
                                  ignore_labels=labels_to_ignore)
    strains_data.MakeStrainFigures(imgs_path)
    summary_fignames = strains_data.MakeSummaryFigures(
        imgs_path, 'Glucose', 'Gluconate')
    plate_fignames = strains_data.MakePerPlateFigures(imgs_path)
    
    
    labels = strains_data.GetStrainLabels()
    condition1_activities, condition1_errs = strains_data.GetMeanMaxActivities(
        labels, 'Glucose')
    condition2_activities, condition2_errs = strains_data.GetMeanMaxActivities(
        labels, 'Gluconate')
    log_1 = np.log2(condition1_activities)
    log_2 = np.log2(condition2_activities)
    diffs = log_2 - log_1
    sorted_diffs = list(np.argsort(diffs))
    sorted_diffs.reverse()
    diffs_data = []
    for i in sorted_diffs:
        logfold = diffs[i]
        fold = np.exp2(logfold)
        if np.isnan(logfold):
            logfold = None
            fold = None
        
        diffs_data.append({'label': labels[i],
                           'fold_change': fold,
                           'log_fold': logfold})        
    
    # Render the template.
    print 'Writing HTML output'
    template_data = {'experiment_id': options.experiment_id,
                     'first_plate_ids': first_plate_ids,
                     'second_plate_ids': second_plate_ids,
                     'culture_label': options.culture_label,
                     'reporter_label': options.reporter_label,
                     'first_plates': first_plate_runners,
                     'second_plates': second_plate_runners,
                     'strains_data': strains_data,
                     'diffs_data': diffs_data,
                     'summary_figure_fnames': summary_fignames,
                     'per_plate_figure_fnames': plate_fignames}
    template_fname = path.join(options.output_dir, 'results.html')
    templates.render_to_file(
        'compare_promoter_activities.html', template_data, template_fname)
    
    return
예제 #15
0
        When the experimental data is broken into more than one XLS sheet, this method
        concatenates the data into one series and returns it as if it was from one source.
    """
    time_array = array([])
    value_array = array([])
    last_t = 0
    for vp in vp_vec:
        times, values = vp.get_data(index, row, col)
        time_array = hstack([time_array, times + last_t])
        value_array = hstack([value_array, values])
        if len(time_array) > 0:
            last_t = time_array.max()

    return time_array, value_array

_mkdir('../res/victor')

vp_vec = []
for name in ["OD600 20110302_lycopene1"]:
    vp = VictorParser()
    vp.parse_excel("../data/victor/%s.xls" % (name))
    vp_vec.append(vp)

pp = PdfPages('../res/victor/2011-02-28_lycopene1.pdf')

#rcParams['text.usetex'] = True
rcParams['legend.fontsize'] = 12
#rcParams['font.family'] = 'sans-serif'
#rcParams['font.size'] = 8
#rcParams['lines.linewidth'] = 0.3
#rcParams['lines.markersize'] = 2
예제 #16
0
    #_U, s, _V = np.linalg.svd(augmented_matrix, full_matrices=False)
    #print sorted(s)
    
    contributions, _K = LinearRegression.LeastSquares(full_matrix, dG_vector)
    for i, group_name in enumerate(all_group_names):
        print "%s,%.3f" % (group_name, contributions[i, 0])
        
    pyplot.plot(dG_vector, dG_vector-np.dot(full_matrix, contributions), '.')
    pyplot.show()

if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == 'test':
        TestGroupMatrix()
        sys.exit(0)
    
    _mkdir('../res')
    db = SqliteDatabase('../res/gibbs.sqlite', 'w')
    H_nopka = Hatzi(use_pKa=False)
    H_withpka = Hatzi(use_pKa=True)
    H_withpka.ToDatabase(db, 'hatzi_thermodynamics')
    
    #H.ToDatabase(db, 'hatzi_gc')
    #H.I = 0.25
    #H.T = 300;
    #sparse_reaction = {13:-1, 1:-1, 9:2}
    #sparse_reaction = {36:-1, 3981:1}
    #sparse_reaction = {6:-1, 143:-1, 234:1, 5:1}
    #sparse_reaction = {1:-1, 499:-1, 603:1, 86:1}
    #sparse_reaction = {1:-1, 6:-1, 311:-1, 288:1, 5:1, 80:2, 26:1}
    #sparse_reaction = {408:-1, 6:-1, 4092:1, 5:1}
    #sparse_reaction = {588:-1, 1:-1, 114:1, 9:1}
예제 #17
0
파일: fit.py 프로젝트: issfangks/milo-lab
def read_rbs_calc_results(fname):
    csv_reader = csv.reader(open(fname, 'r'))
    csv_reader.next()
    R_calc = []
    for row in csv_reader:
        (Name,Start_Position,Expression_Level,Kinetic_Score,Sequence) = row
        R_calc.append(float(Expression_Level))
    return array(R_calc)

################################################################################
################################################################################
################################################################################
################################################################################

#(rbs, promoters, data_facs) = read_data('data_FACS.csv')
#(rbs, promoters, data_single) = read_data('data_flu-OD_single.csv')
#(rbs, promoters, data_multi) = read_data('data_flu-OD_multi.csv')

util._mkdir('../res/pro_rbs')

rbs_score_fname = '../res/pro_rbs/rbs_2010-08-18_17-50-19_133.csv'
if (os.path.exists(rbs_score_fname)):
    R_calc = read_rbs_calc_results(rbs_score_fname)
else:
    sys.stderr.write("The RBS calculator score file could not be found, you must " + \
                    "generate it using the 'rbs-calc' website and put it here: %s" % rbs_score_fname)
    R_calc = None
    
html = HtmlWriter('../res/pro_rbs/fit.html')
(b, R, P) = fit_params('data_flu-OD_multi', html, R_calc)
예제 #18
0
def main():
    db = database.SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/nist/report.html")
    gc = GroupContribution(db)
    gc.override_gc_with_measurements = True
    gc.init()
    grad = GradientAscent(gc)
    nist = Nist(db, html_writer, gc.kegg())
    nist.FromDatabase()
    alberty = Alberty()
    hatzi = Hatzi()

    if True:
        grad.load_nist_data(nist,
                            alberty,
                            skip_missing_reactions=False,
                            T_range=(298, 314))
        grad.verify_results("Alberty", alberty, html_writer)

        #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv")

        #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>")
        #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>")
        #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database")
        #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer)

        #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Hatzimanikatis", hatzi, html_writer)

        #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Milo", gc, html_writer)
    elif False:
        # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm
        grad.load_dG0_data("../data/thermodynamics/dG0.csv")
        # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0).
        grad.anchors = grad.load_dG0_data(
            "../data/thermodynamics/nist_anchors.csv")
        grad.load_nist_data(nist, grad, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient1")

    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient2")

    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        # Use DETERMINISTIC gradient ascent
        grad.load_nist_data(nist,
                            alberty,
                            skip_missing_reactions=True,
                            T_range=(24 + 273.15, 40 + 273.15))
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.deterministic_hill_climb(max_i=200)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient_deterministic")

    elif False:
        # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds)
        grad = GradientAscent(gc)
        grad.load_nist_data(nist, skip_missing_reactions=False)
        print "Training %d compounds using %d reactions: " % (len(
            grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient3")

    elif False:  # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG
        grad = GradientAscent(gc)
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        (pH, I, T) = (7, 0, 300)
        counter = 0
        for rid in grad.kegg.get_all_rids():
            sparse_reaction = grad.kegg.rid2sparse_reaction(rid)
            try:
                dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T)
                print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0)
                counter += 1
            except MissingCompoundFormationEnergy as e:
                #print "R%05d: missing formation energy of C%05d" % (rid, e.cid)
                pass
        print "Managed to calculate the dG0 of %d reactions" % counter

    elif False:
        util._mkdir("../res/nist/fig")
        csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w"))

        cid_set = set()
        for row in nist.data:
            sparce_reaction = row['sparse']
            cid_set.update(sparce_reaction.keys())

        html_writer.write("<table border=1>\n")
        for cid in sorted(list(cid_set)):
            html_writer.write("  <tr><td>C%05d</td><td>%s</td><td>" %
                              (cid, grad.kegg.cid2name(cid)))
            try:
                mol = grad.kegg.cid2mol(cid)
                img_fname = '../res/nist/fig/C%05d.png' % cid
                html_writer.embed_img(img_fname, "C%05d" % cid)
                mol.draw(show=False, filename=img_fname)
            except AssertionError as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" %
                                  (cid, str(e)))
            except KeggParseException as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" %
                                  (cid, str(e)))
            html_writer.write("</td><td>")
            if (cid in alberty.cid2pmap_dict):
                for (nH, z) in alberty.cid2pmap_dict[cid].keys():
                    html_writer.write("(nH=%d, z=%d)<br>" % (nH, z))
                    csv_writer.writerow((cid, nH, z))
            else:
                nH = grad.kegg.cid2num_hydrogens(cid)
                z = grad.kegg.cid2charge(cid)
                html_writer.write("unknown pseudoisomers<br>")
                html_writer.write("(nH=%d, z=%d)" % (nH, z))
                csv_writer.writerow((cid, nH, z))

            html_writer.write("</td></tr>\n")
        html_writer.write("</table>\n")
    html_writer.close()
예제 #19
0
        default=False,
        help="A flag for running the TEST only (without TRAIN)")
    parser.add_argument(
        "-d",
        "--from_database",
        action="store_true",
        default=False,
        help="A flag for loading the data from the DB instead of "
        "the CSV files (saves time but no debug information)")
    return parser


if __name__ == '__main__':
    parser = MakeOpts()
    args = parser.parse_args()
    util._mkdir('../res')
    db = SqliteDatabase('../res/gibbs.sqlite', 'w')

    if args.transformed:
        prefix = 'bgc'
    else:
        prefix = 'pgc'

    if args.test_only:
        html_writer = HtmlWriter('../res/%s_test.html' % prefix)
    elif args.train_only:
        html_writer = HtmlWriter('../res/%s_train.html' % prefix)
    else:
        html_writer = HtmlWriter('../res/%s.html' % prefix)

    G = GroupContribution(db=db,
예제 #20
0
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range):
    pathway_list = KeggFile2PathwayList(pathway_file)
    pathway_names = [entry for (entry, _) in pathway_list]
    html_writer = HtmlWriter('%s.html' % output_prefix)

    # run once just to make sure that the pathways are all working:
    logging.info("testing all pathways with default pH")
    data = GetAllOBDs(pathway_list,
                      html_writer,
                      thermo,
                      pH=None,
                      section_prefix="test",
                      balance_water=True,
                      override_bounds={})

    csv_output = csv.writer(open('%s.csv' % output_prefix, 'w'))
    csv_output.writerow(['pH'] + pathway_names)

    util._mkdir(output_prefix)
    shadow_csvs = {}
    for d in data:
        path = '%s/%s.csv' % (output_prefix, d['entry'])
        shadow_csvs[d['entry']] = csv.writer(open(path, 'w'))
        shadow_csvs[d['entry']].writerow(['pH'] + d['rids'])

    pH_vec = ParseConcentrationRange(conc_range)
    obd_mat = []
    for pH in pH_vec.flat:
        logging.info("pH = %.1f" % (pH))
        data = GetAllOBDs(pathway_list,
                          html_writer=None,
                          thermo=thermo,
                          pH=pH,
                          section_prefix="",
                          balance_water=True,
                          override_bounds={})
        obds = [d['OBD'] for d in data]
        obd_mat.append(obds)
        csv_output.writerow([data[0]['pH']] + obds)

        for d in data:
            if type(d['reaction prices']) != types.FloatType:
                prices = list(d['reaction prices'].flat)
                shadow_csvs[d['entry']].writerow([pH] + prices)

    obd_mat = np.matrix(
        obd_mat)  # rows are pathways and columns are concentrations

    fig = plt.figure(figsize=(6, 6), dpi=90)
    colormap = color.ColorMap(pathway_names)
    for i, name in enumerate(pathway_names):
        plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig)
    plt.title("OBD vs. pH", figure=fig)
    plt.ylim(0, np.max(obd_mat.flat))
    plt.xlabel('pH', figure=fig)
    plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig)
    plt.legend(pathway_names)
    html_writer.write('<h2>Summary figure</h1>\n')
    html_writer.embed_matplotlib_figure(fig)

    html_writer.close()
예제 #21
0
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    estimators = thermodynamic_estimators.LoadAllEstimators()

    input_filename = path.abspath(options.input_filename)
    if not path.exists(input_filename):
        logging.fatal('Input filename %s doesn\'t exist' % input_filename)

    print 'Will read pathway definitions from %s' % input_filename

    # Make thermodynamic and kinetic data containers
    thermo = estimators[options.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name
    thermo_data = thermodynamic_data.WrapperThermoData(thermo)

    # Uniform kinetic data
    kin_data = kinetic_data.UniformKineticData(kcat=100, km=1e-4)

    # Create a kegg instance
    kegg_instance = kegg.Kegg.getInstance()

    # Create output directories
    out_dir = options.output_dir
    if not path.exists(out_dir):
        util._mkdir(out_dir)
    pathgraph_dir = path.join(out_dir, 'pathway_graphs/')
    util._mkdir(pathgraph_dir)

    print 'Executing Protein Cost analysis'
    pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename)
    mtdfs = []
    protein_scores = []
    names = []
    num_atp = []
    path_lengths = []
    for pathway_data in pathway_iterator:
        if pathway_data.skip:
            print 'Skipping pathway', pathway_data.name
            continue

        print 'Analyzing pathway', pathway_data.name

        model = pathway_data.GetStoichiometricModel(kegg_instance)
        model_bounds = pathway_data.GetBounds()

        protein_opt = protein_optimizer.ProteinOptimizer(
            model, thermo_data, kin_data)
        mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data)

        # Solve MTDF.
        mtdf_res = mtdf_opt.FindMTDF(model_bounds)
        mtdf_status = mtdf_res.status
        if mtdf_status.IsFailure() or mtdf_status.IsInfeasible():
            print '\tFailed to optimize', pathway_data.name
            continue

        # Solve protein.
        protein_res = protein_opt.FindOptimum(model_bounds)
        protein_status = protein_res.status
        if protein_status.IsFailure() or protein_status.IsInfeasible():
            print '\tFailed to optimize', pathway_data.name
            continue

        mtdfs.append(mtdf_res.opt_val)
        protein_scores.append(protein_res.opt_val)
        names.append(model.name)

        net_reaction = mtdf_res.net_reaction.sparse
        atp_produced = net_reaction.get(2, 0)
        num_atp.append(atp_produced)
        path_lengths.append(len(mtdf_res.reaction_ids))

        pylab.figure()
        pylab.title(model.name)
        dGr0_tag = mtdf_res.dGr0_tag.flatten().tolist()
        dgmtdf = mtdf_res.dGr_tag.flatten().tolist()
        dgprotein = protein_res.dGr_tag.flatten().tolist()
        dgbio = mtdf_res.dGr_bio.flatten().tolist()
        dg0_profile = np.cumsum([0] + dGr0_tag)
        dgmtdf_profile = np.cumsum([0] + dgmtdf)
        dgprotein_profile = np.cumsum([0] + dgprotein)
        dgbio_profile = np.cumsum([0] + dgbio)

        rxn_range = pylab.arange(len(mtdf_res.reaction_ids) + 1)
        pylab.plot(rxn_range,
                   dg0_profile,
                   'b--',
                   linewidth=2,
                   label='Standard Conditions')
        pylab.plot(rxn_range,
                   dgbio_profile,
                   'c--',
                   linewidth=2,
                   label='Biological Conditions')
        mtdf_label = 'MTDF Optimized (MTDF = %.2g kJ/mol)' % mtdf_res.opt_val
        pylab.plot(rxn_range,
                   dgmtdf_profile,
                   'r-',
                   linewidth=2,
                   label=mtdf_label)
        pc_label = 'Protein Optimized (Cost = %.2g)' % protein_res.opt_val
        pylab.plot(rxn_range,
                   dgprotein_profile,
                   'g-',
                   linewidth=2,
                   label=pc_label)
        pylab.xticks(rxn_range[:-1] + 0.5, mtdf_res.reaction_ids)
        pylab.xlabel('Reaction step')
        pylab.ylabel('Cumulative dG (kJ/mol)')
        pylab.legend(loc='upper right', prop=LEGEND_FONT)

    pylab.figure()
    pylab.plot(num_atp, protein_scores, 'b.')
    #pylab.xlabel('MTDF (kJ/mol)')
    pylab.xlabel('Net ATP Production')
    pylab.ylabel('Protein Cost')
    for x, y, s in zip(num_atp, protein_scores, names):
        pylab.text(x, y, s, fontsize=10)

    max_protein = np.max(protein_scores)
    pylab.plot([0, 0], [0, max_protein], 'r--', label='0 ATP Produced')
    pylab.plot([1, 1], [0, max_protein], 'g--', label='1 ATP Produced')
    pylab.plot([2, 2], [0, max_protein], 'b--', label='2 ATP Produced')

    #pylab.yscale('log')
    pylab.xticks([])
    pylab.xlim((-1, 3))
    pylab.legend()

    odbs = np.tanh(np.array(mtdfs) / (2 * RT))

    pylab.figure()
    pylab.plot(protein_scores, odbs, 'b.')
    pylab.xlabel('Protein Cost')
    pylab.ylabel('ODB (unitless)')

    #for x,y,s in zip(protein_scores, length_scaled_cost, names):
    #    pylab.text(x, y, s, fontsize=10)
    pylab.show()
예제 #22
0
def main():
    db = database.SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter("../res/nist/report.html")
    gc = GroupContribution(db)
    gc.override_gc_with_measurements = True
    gc.init()
    grad = GradientAscent(gc)
    nist = Nist(db, html_writer, gc.kegg())
    nist.FromDatabase()
    alberty = Alberty()
    hatzi = Hatzi()
    
    if True:
        grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314))
        grad.verify_results("Alberty", alberty, html_writer)
        
        #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv")

        #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>")
        #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>")
        #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database")
        #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer)

        #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Hatzimanikatis", hatzi, html_writer)

        #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314))
        grad.verify_results("Milo", gc, html_writer)
    elif False:
        # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm
        grad.load_dG0_data("../data/thermodynamics/dG0.csv")
        # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). 
        grad.anchors = grad.load_dG0_data("../data/thermodynamics/nist_anchors.csv")
        grad.load_nist_data(nist, grad, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient1")
        
    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient2")
    
    elif False:
        # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006)
        # Use DETERMINISTIC gradient ascent
        grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15))
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        grad.deterministic_hill_climb(max_i=200)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient_deterministic")
        
    elif False:
        # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds)
        grad = GradientAscent(gc)
        grad.load_nist_data(nist, skip_missing_reactions=False)
        print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data))
        grad.hill_climb(max_i=20000)
        grad.save_energies(grad.gc.comm, "gradient_cid2prm")
        grad.verify_results("gradient3")
    
    elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG
        grad = GradientAscent(gc)
        grad.cid2pmap_dict = alberty.cid2pmap_dict
        (pH, I, T) = (7, 0, 300)
        counter = 0
        for rid in grad.kegg.get_all_rids():
            sparse_reaction = grad.kegg.rid2sparse_reaction(rid)
            try:
                dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T)
                print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0)
                counter += 1
            except MissingCompoundFormationEnergy as e:
                #print "R%05d: missing formation energy of C%05d" % (rid, e.cid)
                pass
        print "Managed to calculate the dG0 of %d reactions" % counter
        
    elif False:
        util._mkdir("../res/nist/fig")
        csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w"))
                
        cid_set = set()
        for row in nist.data:
            sparce_reaction = row['sparse']
            cid_set.update(sparce_reaction.keys())
        
        html_writer.write("<table border=1>\n")
        for cid in sorted(list(cid_set)):
            html_writer.write("  <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid)))
            try:
                mol = grad.kegg.cid2mol(cid)
                img_fname = '../res/nist/fig/C%05d.png' % cid
                html_writer.embed_img(img_fname, "C%05d" % cid)
                mol.draw(show=False, filename=img_fname)
            except AssertionError as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e)))
            except KeggParseException as e:
                html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e)))
            html_writer.write("</td><td>")
            if (cid in alberty.cid2pmap_dict):
                for (nH, z) in alberty.cid2pmap_dict[cid].keys():
                    html_writer.write("(nH=%d, z=%d)<br>" % (nH, z))
                    csv_writer.writerow((cid, nH, z))
            else:
                nH = grad.kegg.cid2num_hydrogens(cid)
                z = grad.kegg.cid2charge(cid)
                html_writer.write("unknown pseudoisomers<br>")
                html_writer.write("(nH=%d, z=%d)" % (nH, z))
                csv_writer.writerow((cid, nH, z))
            
            html_writer.write("</td></tr>\n")
        html_writer.write("</table>\n")
    html_writer.close()
예제 #23
0
        value_array = hstack([value_array, value])
        if len(time_array) > 0:
            last_t = time_array.max()

    return (time_array, value_array)


name_list = ["2010-12-16 rubisco prk"]

vp_vec = []
for name in name_list:
    vp = TecanParser()
    vp.parse_excel("../data/tecan/%s.xls" % (name))
    vp_vec.append(vp)

util._mkdir("../res/tecan")
pp = PdfPages("../res/tecan/%s.pdf" % name_list[0])

# rcParams['text.usetex'] = True
rcParams["legend.fontsize"] = 12
# rcParams['font.family'] = 'sans-serif'
# rcParams['font.size'] = 8
# rcParams['lines.linewidth'] = 0.3
# rcParams['lines.markersize'] = 2
# rcParams['figure.figsize'] = [5, 10]
# rcParams['figure.subplot.hspace'] = 0.3
# figure()

plot_growth_rate = True
fit_window_size = 1.5  # hours
fit_start_threshold = 0.01
예제 #24
0
    #print sorted(s)

    contributions, _K = LinearRegression.LeastSquares(full_matrix, dG_vector)
    for i, group_name in enumerate(all_group_names):
        print "%s,%.3f" % (group_name, contributions[i, 0])

    pyplot.plot(dG_vector, dG_vector - np.dot(full_matrix, contributions), '.')
    pyplot.show()


if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == 'test':
        TestGroupMatrix()
        sys.exit(0)

    _mkdir('../res')
    db = SqliteDatabase('../res/gibbs.sqlite', 'w')
    H_nopka = Hatzi(use_pKa=False)
    H_withpka = Hatzi(use_pKa=True)
    H_withpka.ToDatabase(db, 'hatzi_thermodynamics')

    #H.ToDatabase(db, 'hatzi_gc')
    #H.I = 0.25
    #H.T = 300;
    #sparse_reaction = {13:-1, 1:-1, 9:2}
    #sparse_reaction = {36:-1, 3981:1}
    #sparse_reaction = {6:-1, 143:-1, 234:1, 5:1}
    #sparse_reaction = {1:-1, 499:-1, 603:1, 86:1}
    #sparse_reaction = {1:-1, 6:-1, 311:-1, 288:1, 5:1, 80:2, 26:1}
    #sparse_reaction = {408:-1, 6:-1, 4092:1, 5:1}
    #sparse_reaction = {588:-1, 1:-1, 114:1, 9:1}
예제 #25
0
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    estimators = thermodynamic_estimators.LoadAllEstimators()
    
    input_filename = path.abspath(options.input_filename)
    if not path.exists(input_filename):
        logging.fatal('Input filename %s doesn\'t exist' % input_filename)
        
    print 'Will read pathway definitions from %s' % input_filename

    # Make thermodynamic and kinetic data containers
    thermo = estimators[options.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name
    thermo_data = thermodynamic_data.WrapperThermoData(thermo)
    
    # Uniform kinetic data
    kin_data = kinetic_data.UniformKineticData(kcat=100, km=1e-4)
    
    # Create a kegg instance
    kegg_instance = kegg.Kegg.getInstance()

    # Create output directories
    out_dir = options.output_dir
    if not path.exists(out_dir):
        util._mkdir(out_dir)
    pathgraph_dir = path.join(out_dir, 'pathway_graphs/')
    util._mkdir(pathgraph_dir)
    
    print 'Executing Protein Cost analysis'
    pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename)
    mtdfs = []
    protein_scores = []
    names = []
    num_atp = []
    path_lengths = []
    for pathway_data in pathway_iterator:
        if pathway_data.skip:
            print 'Skipping pathway', pathway_data.name
            continue
        
        print 'Analyzing pathway', pathway_data.name
                
        model = pathway_data.GetStoichiometricModel(kegg_instance)
        model_bounds = pathway_data.GetBounds()
        
        protein_opt = protein_optimizer.ProteinOptimizer(model, thermo_data, kin_data)
        mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data)
        
        # Solve MTDF.
        mtdf_res = mtdf_opt.FindMTDF(model_bounds)
        mtdf_status = mtdf_res.status
        if mtdf_status.IsFailure() or mtdf_status.IsInfeasible():
            print '\tFailed to optimize', pathway_data.name
            continue

        # Solve protein.
        protein_res = protein_opt.FindOptimum(model_bounds)
        protein_status = protein_res.status
        if protein_status.IsFailure() or protein_status.IsInfeasible():          
            print '\tFailed to optimize', pathway_data.name
            continue

        mtdfs.append(mtdf_res.opt_val)
        protein_scores.append(protein_res.opt_val)
        names.append(model.name)
        
        net_reaction = mtdf_res.net_reaction.sparse
        atp_produced = net_reaction.get(2, 0)
        num_atp.append(atp_produced)
        path_lengths.append(len(mtdf_res.reaction_ids))
        
        pylab.figure()
        pylab.title(model.name)
        dGr0_tag = mtdf_res.dGr0_tag.flatten().tolist()
        dgmtdf = mtdf_res.dGr_tag.flatten().tolist()
        dgprotein = protein_res.dGr_tag.flatten().tolist()
        dgbio = mtdf_res.dGr_bio.flatten().tolist()
        dg0_profile = np.cumsum([0] + dGr0_tag)
        dgmtdf_profile = np.cumsum([0] + dgmtdf)
        dgprotein_profile = np.cumsum([0] + dgprotein)
        dgbio_profile = np.cumsum([0] + dgbio)
        
        rxn_range = pylab.arange(len(mtdf_res.reaction_ids) + 1)
        pylab.plot(rxn_range, dg0_profile, 'b--',
                   linewidth=2, label='Standard Conditions')
        pylab.plot(rxn_range, dgbio_profile, 'c--',
                   linewidth=2, label='Biological Conditions')
        mtdf_label = 'MTDF Optimized (MTDF = %.2g kJ/mol)' % mtdf_res.opt_val
        pylab.plot(rxn_range, dgmtdf_profile, 'r-',
                   linewidth=2, label=mtdf_label)
        pc_label = 'Protein Optimized (Cost = %.2g)' % protein_res.opt_val
        pylab.plot(rxn_range, dgprotein_profile, 'g-',
                   linewidth=2, label=pc_label)
        pylab.xticks(rxn_range[:-1] + 0.5, mtdf_res.reaction_ids)
        pylab.xlabel('Reaction step')
        pylab.ylabel('Cumulative dG (kJ/mol)')
        pylab.legend(loc='upper right', prop=LEGEND_FONT)
    
    pylab.figure()
    pylab.plot(num_atp, protein_scores, 'b.')
    #pylab.xlabel('MTDF (kJ/mol)')
    pylab.xlabel('Net ATP Production')
    pylab.ylabel('Protein Cost')
    for x,y,s in zip(num_atp, protein_scores, names):
        pylab.text(x, y, s, fontsize=10)
    
    max_protein = np.max(protein_scores)
    pylab.plot([0,0], [0,max_protein], 'r--', label='0 ATP Produced')
    pylab.plot([1,1], [0,max_protein], 'g--', label='1 ATP Produced')
    pylab.plot([2,2], [0,max_protein], 'b--', label='2 ATP Produced')
    
    #pylab.yscale('log')
    pylab.xticks([])
    pylab.xlim((-1, 3))
    pylab.legend()
    
    odbs = np.tanh(np.array(mtdfs) / (2*RT))
    
    pylab.figure()
    pylab.plot(protein_scores, odbs, 'b.')
    pylab.xlabel('Protein Cost')
    pylab.ylabel('ODB (unitless)')
    
    #for x,y,s in zip(protein_scores, length_scaled_cost, names):
    #    pylab.text(x, y, s, fontsize=10)  
    pylab.show()
예제 #26
0
    def find_path(self, experiment_name, net_reaction):
        """Find a pathway from the source to the target.
        
        Args:    
            experiment_name: a name given to this experiment.
            net_reaction: a Reaction describing the net reaction for the desired paths
        """
        dirname = os.path.join('../res/pathologic/', experiment_name)
        logging.info('Writing output to: %s' % dirname)
        util._mkdir(dirname)
        
        self.html_writer.write('<a href="pathologic/' + experiment_name + '.html">' + experiment_name + '</a><br>\n')
        exp_html = HtmlWriter('../res/pathologic/' + experiment_name + '.html')
        exp_html.write("<p><h1>%s</h1>\n" % experiment_name)

        exp_html.insert_toggle(div_id="__parameters__", start_here=True,
                               label='Show Parameters')
        
        f, S, compounds, reactions = self.kegg_pathologic.get_unique_cids_and_reactions()

        exp_html.write('<h2>Conditions:</h2>\n')
        exp_html.write_ul(['Optimization method: %s' % self.thermodynamic_method,
                           'Concentration range: %g M < C < %g M' % (self.thermo.c_range[0], self.thermo.c_range[1]),
                           "Max &Delta;<sub>r</sub>G' = %.1f" % self.maximal_dG,
                           'pH = %g' % self.thermo.pH,
                           'I = %g' % self.thermo.I,
                           'T = %g' % self.thermo.T,
                           'Max no. reactions: %d' % (self.max_reactions or -1),
                           'Max no. solutions: %d' % (self.max_solutions or -1),
                           'Overall Reaction: %s' % net_reaction.to_hypertext(),
                           '%d reactions' % len(reactions),
                           '%d unique compounds' % len(compounds)])

        exp_html.div_end()
        exp_html.write('</br>\n')
        
        logging.debug("All compounds:")
        for i, compound in enumerate(compounds):
            logging.debug("%05d) C%05d = %s" % (i, compound.cid, compound.name))
        logging.debug("All reactions:")
        for i, reaction in enumerate(reactions):
            logging.debug("%05d) R%05d = %s" % (i, reaction.rid, str(reaction)))

        output_kegg_file = open(dirname + '/kegg_pathway.txt', 'w')
        exp_html.write('<a href="%s/kegg_pathway.txt">All solutions in KEGG format</a></br>\n'
                       % experiment_name)
        
        # Find a solution with a minimal total flux
        logging.info("Preparing LP solver for the minimal total flux problem")
        exp_html.write('<b>Minimum flux</b>')
        slip = Stoichiometric_LP("Pathologic")
        slip.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction)
        slip.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, 0))
        exp_html.write(' (<a href="%s/%03d_lp.txt">LP file</a>): ' % (experiment_name, 0))
        logging.info("Solving")
        if not slip.solve():
            exp_html.write("<b>There are no solutions!</b>")
            logging.warning("There are no solutions. Quitting!")
            return
        logging.info("writing solution")
        self.write_current_solution(exp_html, slip, experiment_name)

        logging.info("Preparing MILP solver")
        milp = Stoichiometric_LP("Pathologic")
        milp.solution_index = 1
        milp.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction)
        milp.add_milp_variables()
        if self.max_reactions is not None:
            milp.add_reaction_num_constraint(self.max_reactions)
       
        if self.thermodynamic_method == OptimizationMethods.LOCALIZED:
            milp.add_localized_dGf_constraints(self.thermo)
        else:
            milp.add_dGr_constraints(self.thermo,
                                     optimization=self.thermodynamic_method,
                                     maximal_dG=self.maximal_dG)
        
        index = 0
        while (self.max_solutions is None) or (index < self.max_solutions):
            index += 1
            # create the MILP problem to constrain the previous solutions not to reappear again.
            logging.info("Round %03d, solving using MILP" % (milp.solution_index))
            milp.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, milp.solution_index))
            exp_html.write('<b>Solution #%d</b> (<a href="%s/%03d_lp.txt">LP file</a>): '  % (index, experiment_name, index))
            if not milp.solve():
                exp_html.write("<b>No solution found</b>")
                logging.info("No more solutions. Quitting!")
                break
            logging.info("writing solution")
            self.write_current_solution(exp_html, milp, experiment_name,
                                        output_kegg_file)
            milp.ban_current_solution()
        
        output_kegg_file.close()
        exp_html.close()
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    assert options.experiment_id
    assert options.first_plate_ids and options.second_plate_ids
    assert options.culture_label and options.reporter_label
    assert options.output_dir

    if not path.exists(options.output_dir):
        util._mkdir(options.output_dir)

    imgs_path = path.join(options.output_dir, 'imgs/')
    if not path.exists(imgs_path):
        util._mkdir(imgs_path)

    first_plate_ids = map(str.strip, options.first_plate_ids.split(','))
    second_plate_ids = map(str.strip, options.second_plate_ids.split(','))

    labels_to_ignore = set()
    for l in options.labels_to_ignore.split(','):
        labels_to_ignore.add(l.strip())

    print 'Reading plates from experiment %s' % (options.experiment_id)
    db = MySQLDatabase(host='hldbv02',
                       user='******',
                       passwd='a1a1a1',
                       db='tecan')

    filterer = promoter_activity.CultureReporterFilterer(
        options.min_culture_level, options.min_reporter_level)
    reporter_bg_sub = promoter_activity.ReporterBackgroundSubtracter(
        options.background_label)
    culture_shifter = promoter_activity.CultureShifter()
    activity_calc = promoter_activity.ReporterActivityCalculator(
        options.lower_culture_bound,
        options.upper_culture_bound,
        min_reporter_level=options.lower_reporter_bound,
        window_size=options.window_size)

    first_plate_runners = []
    second_plate_runners = []
    print 'Calculating promoter activities for first condition'
    runner1 = PlateActivityRunner(options.culture_label,
                                  options.reporter_label, filterer,
                                  culture_shifter, reporter_bg_sub,
                                  activity_calc)

    for plate_id in first_plate_ids:
        plate = Plate96.FromDatabase(db, options.experiment_id, plate_id)
        runner1.AddPlate(plate)

    runner1.Run()
    first_plate_runners.append(runner1)

    print 'Calculating promoter activities for second condition'
    runner2 = PlateActivityRunner(options.culture_label,
                                  options.reporter_label, filterer,
                                  culture_shifter, reporter_bg_sub,
                                  activity_calc)

    for plate_id in second_plate_ids:
        plate = Plate96.FromDatabase(db, options.experiment_id, plate_id)
        runner2.AddPlate(plate)

    runner2.Run()
    second_plate_runners.append(runner2)

    # Unify strain data.
    print 'Saving figures'
    strains_data = StrainConditionsData(options.background_label)
    for plate_data in first_plate_runners:
        strains_data.AddPlateData('Glucose',
                                  plate_data,
                                  ignore_labels=labels_to_ignore)
    for plate_data in second_plate_runners:
        strains_data.AddPlateData('Gluconate',
                                  plate_data,
                                  ignore_labels=labels_to_ignore)
    strains_data.MakeStrainFigures(imgs_path)
    summary_fignames = strains_data.MakeSummaryFigures(imgs_path, 'Glucose',
                                                       'Gluconate')
    plate_fignames = strains_data.MakePerPlateFigures(imgs_path)

    labels = strains_data.GetStrainLabels()
    condition1_activities, condition1_errs = strains_data.GetMeanMaxActivities(
        labels, 'Glucose')
    condition2_activities, condition2_errs = strains_data.GetMeanMaxActivities(
        labels, 'Gluconate')
    log_1 = np.log2(condition1_activities)
    log_2 = np.log2(condition2_activities)
    diffs = log_2 - log_1
    sorted_diffs = list(np.argsort(diffs))
    sorted_diffs.reverse()
    diffs_data = []
    for i in sorted_diffs:
        logfold = diffs[i]
        fold = np.exp2(logfold)
        if np.isnan(logfold):
            logfold = None
            fold = None

        diffs_data.append({
            'label': labels[i],
            'fold_change': fold,
            'log_fold': logfold
        })

    # Render the template.
    print 'Writing HTML output'
    template_data = {
        'experiment_id': options.experiment_id,
        'first_plate_ids': first_plate_ids,
        'second_plate_ids': second_plate_ids,
        'culture_label': options.culture_label,
        'reporter_label': options.reporter_label,
        'first_plates': first_plate_runners,
        'second_plates': second_plate_runners,
        'strains_data': strains_data,
        'diffs_data': diffs_data,
        'summary_figure_fnames': summary_fignames,
        'per_plate_figure_fnames': plate_fignames
    }
    template_fname = path.join(options.output_dir, 'results.html')
    templates.render_to_file('compare_promoter_activities.html', template_data,
                             template_fname)

    return
                        help='Dump all training data to text files')
    parser.add_argument('-e', '--test',
                        action='store_true', default=False,
                        help='A flag for running the TEST')
    parser.add_argument('-l', '--leave_one_out',
                        action='store_true', default=False,
                        help='A flag for running the Leave One Out analysis')
    return parser
    
if __name__ == "__main__":
    logger = logging.getLogger('')
    logger.setLevel(logging.DEBUG)

    parser = MakeOpts()
    args = parser.parse_args()
    util._mkdir('../res')
    db = SqliteDatabase('../res/gibbs.sqlite', 'w')
    html_writer = HtmlWriter('../res/ugc.html')
    
    ugc = UnifiedGroupContribution(db, html_writer,
                                   anchor_all=args.anchor_all_formations)
    ugc.LoadGroups(FromDatabase=(not args.recalc_groups))
    ugc.LoadObservations(FromDatabase=(not args.recalc_observations))
    ugc.LoadGroupVectors(FromDatabase=(not args.recalc_groupvectors))
    ugc.LoadData(FromDatabase=(not args.recalc_matrices))
    
    if args.dump:
        ugc.SaveDataToMatfile()
        sys.exit(0)
    if args.train:
        ugc.EstimateKeggCids()
예제 #29
0
                continue
            if self.override_pMg or self.override_I or self.override_T:
                nist_row_copy = nist_row_data.Clone()
                if self.override_pMg:
                    nist_row_copy.pMg = self.override_pMg
                if self.override_I:
                    nist_row_copy.I = self.override_I
                if self.override_T:
                    nist_row_copy.T = self.override_T
                rows.append(nist_row_copy)
            else:
                rows.append(nist_row_data)
        return rows

    def GetUniqueReactionSet(self):
        return set([row.reaction for row in self.data])


if __name__ == '__main__':
    #logging.getLogger('').setLevel(logging.DEBUG)
    _mkdir("../res/nist")
    html_writer = HtmlWriter("../res/nist/statistics.html")
    nist = Nist()
    fp = open('../res/nist_kegg_ids.txt', 'w')
    for cid in nist.GetAllCids():
        fp.write("C%05d\n" % cid)
    fp.close()
    nist.AnalyzeStats(html_writer)
    nist.AnalyzeConnectivity(html_writer)
    html_writer.close()
예제 #30
0
        value_array = hstack([value_array, value])
        if (len(time_array) > 0):
            last_t = time_array.max()

    return (time_array, value_array)


name_list = ["2010-12-16 rubisco prk"]

vp_vec = []
for name in name_list:
    vp = TecanParser()
    vp.parse_excel("../data/tecan/%s.xls" % (name))
    vp_vec.append(vp)

util._mkdir('../res/tecan')
pp = PdfPages('../res/tecan/%s.pdf' % name_list[0])

#rcParams['text.usetex'] = True
rcParams['legend.fontsize'] = 12
#rcParams['font.family'] = 'sans-serif'
#rcParams['font.size'] = 8
#rcParams['lines.linewidth'] = 0.3
#rcParams['lines.markersize'] = 2
#rcParams['figure.figsize'] = [5, 10]
#rcParams['figure.subplot.hspace'] = 0.3
#figure()

plot_growth_rate = True
fit_window_size = 1.5  # hours
fit_start_threshold = 0.01
예제 #31
0
    args, _ = MakeOpts(estimators).parse_args(sys.argv)
    input_filename = os.path.abspath(args.input_filename)
    output_filename = os.path.abspath(args.output_filename)
    if not os.path.exists(input_filename):
        logging.fatal('Input filename %s doesn\'t exist' % input_filename)
        
    print 'Will read pathway definitions from %s' % input_filename
    print 'Will write output to %s' % output_filename
    
    db_loc = args.db_filename
    print 'Reading from DB %s' % db_loc
    db = SqliteDatabase(db_loc)

    thermo = estimators[args.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name
    
    kegg = Kegg.getInstance()
    thermo.bounds = deepcopy(kegg.cid2bounds)
    
    dirname = os.path.dirname(output_filename)
    if not os.path.exists(dirname):
        print 'Making output directory %s' % dirname
        _mkdir(dirname)
    
    print 'Executing thermodynamic pathway analysis'
    html_writer = HtmlWriter(output_filename)
    thermo_analyze = ThermodynamicAnalysis(db, html_writer, thermodynamics=thermo)
    thermo_analyze.analyze_pathway(input_filename)

    
예제 #32
0
파일: nist.py 프로젝트: issfangks/milo-lab
                continue
            if self.override_pMg or self.override_I or self.override_T:
                nist_row_copy = nist_row_data.Clone()
                if self.override_pMg:
                    nist_row_copy.pMg = self.override_pMg
                if self.override_I:
                    nist_row_copy.I = self.override_I
                if self.override_T:
                    nist_row_copy.T = self.override_T
                rows.append(nist_row_copy)
            else:
                rows.append(nist_row_data)
        return rows
    
    def GetUniqueReactionSet(self):
        return set([row.reaction for row in self.data])


if __name__ == '__main__':
    #logging.getLogger('').setLevel(logging.DEBUG)
    _mkdir("../res/nist")
    html_writer = HtmlWriter("../res/nist/statistics.html")
    nist = Nist()
    fp = open('../res/nist_kegg_ids.txt', 'w')
    for cid in nist.GetAllCids():
        fp.write("C%05d\n" % cid)
    fp.close()
    nist.AnalyzeStats(html_writer)
    nist.AnalyzeConnectivity(html_writer)
    html_writer.close()
예제 #33
0
        concatenates the data into one series and returns it as if it was from one source.
    """
    time_array = array([])
    value_array = array([])
    last_t = 0
    for vp in vp_vec:
        (time, value) = vp.get_data(index, row, col)
        time_array = hstack([time_array, time + last_t])
        value_array = hstack([value_array, value])
        if (len(time_array) > 0):
            last_t = time_array.max()

    return (time_array, value_array)


_mkdir('../res/victor')

vp_vec = []
for name in ["peter_glugly"]:
    vp = VictorParser()
    vp.parse_excel("../data/victor/%s.xls" % (name))
    vp_vec.append(vp)

pp = PdfPages('../res/victor/2011-02-22_peter_glugly.pdf')

#rcParams['text.usetex'] = True
rcParams['legend.fontsize'] = 12
#rcParams['font.family'] = 'sans-serif'
#rcParams['font.size'] = 8
#rcParams['lines.linewidth'] = 0.3
#rcParams['lines.markersize'] = 2
예제 #34
0
파일: fit.py 프로젝트: titus0810/milo-lab
    csv_reader = csv.reader(open(fname, 'r'))
    csv_reader.next()
    R_calc = []
    for row in csv_reader:
        (Name, Start_Position, Expression_Level, Kinetic_Score, Sequence) = row
        R_calc.append(float(Expression_Level))
    return array(R_calc)


################################################################################
################################################################################
################################################################################
################################################################################

#(rbs, promoters, data_facs) = read_data('data_FACS.csv')
#(rbs, promoters, data_single) = read_data('data_flu-OD_single.csv')
#(rbs, promoters, data_multi) = read_data('data_flu-OD_multi.csv')

util._mkdir('../res/pro_rbs')

rbs_score_fname = '../res/pro_rbs/rbs_2010-08-18_17-50-19_133.csv'
if (os.path.exists(rbs_score_fname)):
    R_calc = read_rbs_calc_results(rbs_score_fname)
else:
    sys.stderr.write("The RBS calculator score file could not be found, you must " + \
                    "generate it using the 'rbs-calc' website and put it here: %s" % rbs_score_fname)
    R_calc = None

html = HtmlWriter('../res/pro_rbs/fit.html')
(b, R, P) = fit_params('data_flu-OD_multi', html, R_calc)
예제 #35
0
    def WriteUniqueReactionReport(self, unique_sparse_reactions,
                                  unique_nist_row_representatives,
                                  unique_data_mat, full_data_mat,
                                  cid2nH_nMg=None):
        
        total_std = full_data_mat[2:4, :].std(1)
        
        fig = plt.figure()
        plt.plot(unique_data_mat[2, :].T, unique_data_mat[3, :].T, '.')
        plt.xlabel("$\sigma(\Delta_r G^\circ)$")
        plt.ylabel("$\sigma(\Delta_r G^{\'\circ})$")
        plt.title('$\sigma_{total}(\Delta_r G^\circ) = %.1f$ kJ/mol, '
                    '$\sigma_{total}(\Delta_r G^{\'\circ}) = %.1f$ kJ/mol' % 
                    (total_std[0, 0], total_std[1, 0]))
        self.html_writer.embed_matplotlib_figure(fig, width=640, height=480)
        logging.info('std(dG0_r) = %.1f' % total_std[0, 0])
        logging.info('std(dG\'0_r) = %.1f' % total_std[1, 0])
        
        rowdicts = []
        for i, reaction in enumerate(unique_sparse_reactions):
            logging.debug('Analyzing unique reaction: ' + 
                          str(unique_sparse_reactions[i]))
            ddG0 = self.GetDissociation().ReverseTransformReaction(reaction,
                pH=7, I=0.1, pMg=10, T=298.15, cid2nH_nMg=cid2nH_nMg)
            
            d = {}
            d["_reaction"] = reaction.to_hypertext(show_cids=False)
            d["reaction"] = reaction.FullReactionString(show_cids=False) # no hypertext for the CSV output
            d["Reference ID"] = unique_nist_row_representatives[i].ref_id
            d["EC"] = unique_nist_row_representatives[i].ec
            d["E(" + symbol_dr_G0 + ")"] = unique_data_mat[0, i]
            d["E(" + symbol_dr_G0_prime + ")"] = unique_data_mat[1, i]
            d["E(" + symbol_dr_G0 + ")'"] = unique_data_mat[0, i] + ddG0
            d["std(" + symbol_dr_G0 + ")"] = unique_data_mat[2, i]
            d["std(" + symbol_dr_G0_prime + ")"] = unique_data_mat[3, i]
            d["diff"] = unique_data_mat[2, i] - unique_data_mat[3, i]
            d["#observations"] = "%d" % unique_data_mat[4, i]
            
            flag = 0
            c_nad = reaction.sparse.get(3, 0)
            c_nadh = reaction.sparse.get(4, 0)
            c_nadp = reaction.sparse.get(6, 0)
            c_nadph = reaction.sparse.get(5, 0)
            if  c_nad == 1 and c_nadh == -1:
                flag = 1
            elif c_nad == -1 and c_nadh == 1:
                flag = -1
            elif c_nadp == 1 and c_nadph == -1:
                flag = 2
            elif c_nadp == -1 and c_nadph == 1:
                flag = -2
            d["Arren Flag"] = flag

            if d["diff"] > self.std_diff_threshold:
                _mkdir('../res/prc_reactions')
                link = "prc_reactions/%s.html" % reaction.name
                d["analysis"] = '<a href="%s">link</a>' % link
                reaction_html_writer = HtmlWriter(os.path.join('../res', link))
                self.AnalyzeSingleReaction(reaction,
                                           html_writer=reaction_html_writer)
            rowdicts.append(d)
        
        result_headers = ["E(" + symbol_dr_G0 + ")",
                          "E(" + symbol_dr_G0_prime + ")", 
                          "E(" + symbol_dr_G0 + ")'",
                          "std(" + symbol_dr_G0 + ")",
                          "std(" + symbol_dr_G0_prime + ")"]
        rowdicts.sort(key=lambda x:x["diff"], reverse=True)
        self.html_writer.write_table(rowdicts, ["reaction", "Reference ID"] + 
                                     result_headers + ["EC", "#observations", "analysis"],
                                     decimal=1)
        csv_writer = csv.DictWriter(open('../res/nist_regression_unique.csv', 'w'),
                                    ["_reaction", "Reference ID", "EC", "#observations"]
                                    + result_headers + ['Arren Flag'],
                                    extrasaction='ignore')
        csv_writer.writeheader()
        csv_writer.writerows(rowdicts)
예제 #36
0
    """
    well = (row, col)
    time_list = []
    value_list = []
    for time, value in sorted(MES[reading_label][plate_id][well].iteritems()):
        time_list.append(time)
        value_list.append(value)

    time_array = array(time_list)
    if len(time_list):
        time_array = (time_array - time_list[0]) / 3600
    return time_array, array(value_list)


MES = CollectData("../data/tecan/PL6-96.tar.gz", number_of_plates=4)
_mkdir('../res/tecan')
pp = PdfPages('../res/tecan/2011-02-06_PL6-96.pdf')

#rcParams['text.usetex'] = True
rcParams['legend.fontsize'] = 12
#rcParams['font.family'] = 'sans-serif'
#rcParams['font.size'] = 8
#rcParams['lines.linewidth'] = 0.3
#rcParams['lines.markersize'] = 2
#rcParams['figure.figsize'] = [5, 10]
#rcParams['figure.subplot.hspace'] = 0.3
#figure()

plot_growth_rate = False
fit_window_size = 1.5  # hours
fit_start_threshold = 0.01
예제 #37
0
        (time, value) = vp.get_data(index, row, col)
        time_array = hstack([time_array, time + last_t])
        value_array = hstack([value_array, value])
        if len(time_array) > 0:
            last_t = time_array.max()

    return (time_array, value_array)


name = "2010-10-07_K12-acetate"
vp_vec = []
vp = VictorParser()
vp.parse_excel("../data/victor/%s.xls" % (name))
vp_vec.append(vp)

util._mkdir("../res/victor")
pp = PdfPages("../res/victor/%s.pdf" % name)

# rcParams['text.usetex'] = True
rcParams["legend.fontsize"] = 12
# rcParams['font.family'] = 'sans-serif'
# rcParams['font.size'] = 8
# rcParams['lines.linewidth'] = 0.3
# rcParams['lines.markersize'] = 2
# rcParams['figure.figsize'] = [5, 10]
# rcParams['figure.subplot.hspace'] = 0.3
# figure()

fit_window_size = 5  # hours
fit_start_threshold = 0.002
예제 #38
0
def Main():
    np.seterr('raise')
    parser = MakeOpts()
    args = parser.parse_args()
    estimators = thermodynamic_estimators.LoadAllEstimators()
    
    input_filename = path.abspath(args.input_filename)
    if not path.exists(input_filename):
        logging.fatal('Input filename %s doesn\'t exist' % input_filename)
        
    print 'Will read pathway definitions from %s' % input_filename

    # Make thermodynamic and kinetic data containers
    thermo = estimators[args.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name
    thermo_data = thermodynamic_data.WrapperThermoData(thermo)
    
    # Fetch kinetic data.
    kin_data = kinetic_data.UniformKineticData(kcat=200, km=2e-4, mass=40)
    if args.kinetics_filename is not None:
        print 'Parsing kinetic data from', args.kinetics_filename
        kin_data = kinetic_data.KineticDataWithDefault.FromArrenFile(
            args.kinetics_filename)
        
    """
    kin_data = kinetic_data.KineticDataWithDefault.FromFiles(
        '../data/enzymatics/glycolytic_pathway_enzymes_kcat.csv',
        '../data/enzymatics/glycolytic_pathway_enzymes_km.csv')
    kin_data.SetDefaultKcat(100)
    kin_data.SetDefaultKM(1e-4)
    kin_data.SetDefaultMass(35)
    """
    
    # Create a kegg instance
    kegg_instance = kegg.Kegg.getInstance()

    # Create output directories
    out_dir = args.output_dir
    if not path.exists(out_dir):
        util._mkdir(out_dir)
    pathgraph_dir = path.join(out_dir, 'pathway_graphs/')
    util._mkdir(pathgraph_dir)
    
    print 'Executing Protein Cost analysis'
    pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename)
    results = []
    for pathway_data in pathway_iterator:
        if pathway_data.skip:
            print 'Skipping pathway', pathway_data.name
            continue
        
        print 'Analyzing pathway', pathway_data.name
                
        model = pathway_data.GetStoichiometricModel(kegg_instance)
        model_bounds = pathway_data.GetBounds()

        opt = protein_optimizer.ProteinOptimizer(model, thermo_data, kin_data)
        it = feasible_concentrations_iterator.FeasibleConcentrationsIterator(
            model, thermo_data, model_bounds)
        
        # Now solve with the default initial conditions.
        success = None
        result = None
        optima = []
        for i, x0 in enumerate(it):
            result = opt.FindOptimum(model_bounds, initial_concentrations=x0)
            status = result.status
            print '\t%s optimization %d' % (pathway_data.name, i)
            if status.failure:          
                print '\tFailed to optimize', pathway_data.name
                print '\t%s' % status
            elif status.infeasible:      
                print '\t', pathway_data.name, 'is infeasible!'
                print '\t%s' % status
            else:
                print '\t*Optimization successful'
                optima.append(result.opt_val)
                if not success:
                    success = result
                elif result.opt_val < success.opt_val:
                    success = result
        
        mean, error = None, None
        if optima:
            try:
                mean, error = stats.MeanWithConfidenceInterval(optima)
            except Exception, e:
                mean, error = None, None
                print optima
        result_dict = {'result': None,
                       'num_optima': len(optima),
                       'mean_opt': mean,
                       'error': error}
        
        if success is not None:
            success.WriteAllGraphs(pathgraph_dir)
            result_dict['result'] = success
        
            cost = success.opt_val
            if cost is not None:
                print '\t*Protein Cost for', pathway_data.name, '= %.2g' % cost
            if optima:
                print 'Found', len(optima), 'near-optima for', pathway_data.name 
                optima = np.array(optima)
                mean_opt = np.mean(optima)
                mean_diff = np.mean(np.abs(optima - mean_opt))
                print 'Mean optimum', mean_opt
                print 'Mean diff from mean', mean_diff
                print 'Percent diff %s%%' % (100*mean_diff / mean_opt)
                print 'StdDev opt', np.std(optima)
        else:
            # Use default conditions to show the failure
            res = opt.FindOptimum(model_bounds)
            result_dict['result'] = res            
        results.append(result_dict)
예제 #39
0
        (time, value) = vp.get_data(index, row, col)
        time_array = hstack([time_array, time + last_t])
        value_array = hstack([value_array, value])
        if (len(time_array) > 0):
            last_t = time_array.max()

    return (time_array, value_array)


name = "2010-10-07_K12-acetate"
vp_vec = []
vp = VictorParser()
vp.parse_excel("../data/victor/%s.xls" % (name))
vp_vec.append(vp)

util._mkdir('../res/victor')
pp = PdfPages('../res/victor/%s.pdf' % name)

#rcParams['text.usetex'] = True
rcParams['legend.fontsize'] = 12
#rcParams['font.family'] = 'sans-serif'
#rcParams['font.size'] = 8
#rcParams['lines.linewidth'] = 0.3
#rcParams['lines.markersize'] = 2
#rcParams['figure.figsize'] = [5, 10]
#rcParams['figure.subplot.hspace'] = 0.3
#figure()

fit_window_size = 5  # hours
fit_start_threshold = 0.002
예제 #40
0
        concatenates the data into one series and returns it as if it was from one source.
    """
    well = (row, col)
    time_list = []
    value_list = []
    for time, value in sorted(MES[reading_label][plate_id][well].iteritems()):
         time_list.append(time)
         value_list.append(value)
         
    time_array = array(time_list)
    if len(time_list):
        time_array = (time_array - time_list[0])/3600 
    return time_array, array(value_list)

MES = CollectData("../data/tecan/PL6-96.tar.gz", number_of_plates=4)
_mkdir('../res/tecan')
pp = PdfPages('../res/tecan/2011-02-06_PL6-96.pdf')

#rcParams['text.usetex'] = True
rcParams['legend.fontsize'] = 12
#rcParams['font.family'] = 'sans-serif'
#rcParams['font.size'] = 8
#rcParams['lines.linewidth'] = 0.3
#rcParams['lines.markersize'] = 2
#rcParams['figure.figsize'] = [5, 10]
#rcParams['figure.subplot.hspace'] = 0.3
#figure()

plot_growth_rate = False
fit_window_size = 1.5 # hours
fit_start_threshold = 0.01