def getEkinProject(data, xerror=None, yerror=None, sep='__'): """Get an ekin project from a dict of the form {label:([x],[y]),..} or {label:([x],[y],[xerr],[yerr]),..}""" E = EkinProject(mode='General') for d in data.keys(): if type(data[d]) is types.DictType: for lbl in data[d]: name = str(d) + sep + str(lbl) xy = data[d][lbl] ek = EkinDataset(xy=xy) E.insertDataset(ek, name) else: #print data[d] if len(data[d]) == 4: x, y, xerrs, yerrs = data[d] else: x, y = data[d] xerrs = [] yerrs = [] if xerror != None: xerrs = [xerror for i in x] if yerror != None: yerrs = [yerror for i in y] ek = EkinDataset(xy=[x, y], xerrs=xerrs, yerrs=yerrs) E.insertDataset(ek, d) #print ek.errors return E
def fitPropagationTest(): """Tests the propagation of fit data direct from a dict - no importing""" start = time.time() p = Pipeline() conf = { 'model1': 'linear', 'model2': 'Michaelis-Menten', 'model3': 'sigmoid', 'variable1': 'a', 'variable2': 'Km', 'variable3': 'tm', #'xerror':.1,'yerror':0.05, } p.createConfig('temp.conf', **conf) data = Utilities.createNestedData() Em = EkinProject() E, fits = p.processFits(data, Em=Em) print 'final fits', fits fname = os.path.join(p.workingdir, 'results') Em.saveProject(fname) p.saveEkinPlotstoImages(Em, fname) print 'completed fit propagation test' print 'took %s seconds' % round((time.time() - start), 2) print '-------------------' return
def analyseHill(ekindicts): """make hist of n coefficents for hill fits""" import pylab pylab.rc('text', usetex=True) f = pylab.figure() f.suptitle('n distributions- No linear (case 3)') i = 1 for e in ekindicts: ekindata = ekindicts[e] proteins = ekindata.keys() nvals = [] for prot in proteins: edata = ekindata[prot] E = EkinProject(data=edata) for d in E.datasets: fdata = E.getMetaData(d) if fdata != None and fdata.has_key('model'): if fdata['model'] == 'Modified Hill': n = fdata['n'] if n < 5 and n > -5: nvals.append(n) print 'n=', n ax = f.add_subplot(2, 2, i) n, b, patches = pylab.hist(nvals, 30, histtype='bar', alpha=0.8) std = round(numpy.std(nvals), 2) ave = round(numpy.mean(nvals), 2) ax.set_title(e + ' mean= ' + str(ave) + r' $\sigma$= ' + str(std)) i += 1 f.subplots_adjust(hspace=0.4) f.savefig('n_hist.png') return
def sampleData(self): E =self.E = EkinProject() E.createSampleData() self.plotframe.setProject(E) self.datasets = sorted(self.E.datasets) self.replot() self.updateSelector() return
def createdb(local=None, server=None, project=None, username=None, norecs=1000): """Create and add some test data""" if local != None: if os.path.exists(local): for i in ['.lock', '.index', '']: try: os.remove(local + i) except: pass DB = PDatabase(local=local) elif server != None: DB = PDatabase(server=server, username=username, password='******', project=project) import string import DNAtool.mutation as mutation choices = ['a', 'b', 'c', 'd'] DB.addField('choice', 'text') DB.addField('stab', 'text') DB.addField('activity', 'text') #DB.addField('ekin', 'General') E = EkinProject() data = E.readDataset('Ekin/test.Ekindat') E.insertDataset(data['data'], 'test', fit=data['fit']) print 'creating dummy data..' j = 0 count = 0 for i in range(norecs): if j > 3: j = 0 c = '' for k in range(10): c += random.choice(string.letters) DB.add(c) DB.data[c].choice = choices[j] DB.data[c].DNASeq = simulate_sequence(300) AAseqs3, AAseqs1 = mutation.translate(DB.data[c].DNASeq) DB.addProtseq(c, AAseqs3[1][5:80], 1) DB.data[c].stab = str(round(random.normalvariate(1, 2), 3)) DB.data[c].activity = str(round(random.normalvariate(30, 4), 3)) #DB.data[c].ekin = E j += 1 count += 1 if count > 3000: print 'saving..' DB.commit() DB.db.cacheMinimize() count = 0 DB.commit() return DB
def showMetaData(self, ekinproj=None, ekindata=None, dataset=None, fdata=None, silent=False): """Print html of fit and metadata for the given dataset""" if fdata == None: if ekinproj == None and ekindata != None: E = EkinProject(data=ekindata) else: E = ekinproj fdata = E.getMetaData(dataset) fsock = None if silent == True: saveout = sys.stdout sys.stdout = fsock = StringIO.StringIO() print '<table id="mytable">' print '<tr>' print '<td class="alt" style="bold" colspan=2>%s</td><tr>' % dataset kys = fdata.keys() ignore = ['error'] for k in sorted(fdata): if k in ignore: continue if fdata[k] == None: continue elif type(fdata[k]) is types.DictType: print '<td>%s</td>' % k print '<td> <table id="mytable">' for n in fdata[k]: val = fdata[k][n][1] print '<td class="alt">%s</td><td>%.2f</td><tr>' % (n, val) print '</table></td><tr>' elif type(fdata[k]) is types.StringType: print '<td class="alt">%s</td><td>%s</td><tr>' % (k, fdata[k]) else: print '<td class="alt">%s</td><td>%.2f</td><tr>' % (k, fdata[k]) print '</table>' if silent == True: sys.stdout = saveout if fsock == None: return '' else: return fsock.getvalue() return
def getCSV(self): """Import a csv file""" self.E = EkinProject() from PEATDB.Ekin.IO import Importer importer = Importer(self, parent_win=self.mainwin) newdata = importer.import_multiple() if newdata == None: return for n in newdata.keys(): self.E.insertDataset(newdata[n], n, update=None) print 'imported %s datasets' % len(self.E.datasets) self.showDatasetSelector() self.showPreview() return
def loadEkin(self): """Load the ekin prj""" filename = tkFileDialog.askopenfilename(defaultextension='.ekinprj', initialdir=os.getcwd(), filetypes=[ ("ekinprj", "*.ekinprj"), ("All files", "*.*") ], parent=self.mainwin) if not os.path.isfile(filename): return self.E = EkinProject() self.E.openProject(filename) self.showDatasetSelector() self.showPreview() return
def loadEkinProj(self, E=None): """Load an ekin project file""" import os, types if E == None: import tkFileDialog filename=tkFileDialog.askopenfilename(defaultextension='.ekinprj', filetypes=[("Ekin project","*.ekinprj"), ("All files","*.*")], parent=self.mainwin) if filename != None: if os.path.isfile(filename): fd=open(filename) import pickle data=pickle.load(fd) E=EkinProject(data=data) self.ekinprojects[filename] = E fd.close() else: return self.currprj = E self.showEkinProject(E) return
def showEkinPlots(self, ekindata=None, project=None, filename=None, datasets='ALL', title='Ekin plots', outfile=None, imgpath=None, path='', normalise=False, showfitvars=False, plotoption=1, columns=2, legend=False, size=(8, 6), logx=False, logy=False): """Plot ekin datasets from the provided ekin project data""" def downloadLink(): #do csv download link for data displayed print '<table id="mytable" valign=top>' cfname = tempfile.mktemp('.csv', dir=csvpath) E.exportCSV(filename=cfname) p = os.path.split(path)[0] print '<td><a href=%s title="right-click to save as"> download data </a>' \ %os.path.join(p, 'csv', os.path.basename(cfname)) print '</td></tr>' print '</table>' csvpath = os.path.join(os.path.split(imgpath)[0], 'csv') print project if ekindata != None: #convert from ekindata E = EkinProject(data=ekindata, mode='NMR titration') elif project != None: #just passed object E = project elif filename != None: #load project from file E = EkinProject() E.openProject(project) else: return E.checkDatasets() #if outfile is given, we override imgpath if outfile != None and imgpath == None: imgpath = os.path.dirname(outfile) if imgpath != None: tempfile.tempdir = imgpath size = (8, 6) if datasets == 'ALL': #we plot all the datasets datasets = E.datasets if plotoption == 1: if columns > 2: size = (4, 3) imagenames = {} for d in datasets: imgfile = self.maketempImage() name = os.path.basename(imgfile) E.plotDatasets(d, filename=imgfile, size=size, linecolor='r', normalise=normalise, showfitvars=showfitvars, legend=legend, logx=logx, logy=logy) imagenames[d] = name elif plotoption == 3: name = self.maketempImage() E.plotDatasets(datasets, filename=name, plotoption=3, size=size, normalise=normalise, legend=legend, logx=logx, logy=logy) if outfile != None: saveout = sys.stdout fsock = open(outfile, 'w') sys.stdout = fsock self.doheader(title) downloadLink() print '<table id="mytable" align=center cellspacing="0" borderwidth=1>' row = 1 c = 1 datasets.sort() if plotoption == 1: for d in datasets: if not imagenames.has_key(d): continue if c == 1: print '<tr>' print '<td> <img src=%s/%s align=center></td>' % ( path, imagenames[d]) print '<td class="alt">' #use ekinproject to supply formatted fit and model info here.. self.showMetaData(ekinproj=E, dataset=d) print '</td>' c = c + 1 if c >= columns: print '</tr>' row = row + 1 c = 1 elif plotoption == 3: print '<td> <img src=%s/%s align=center></td>' % ( path, os.path.basename(name)) print '<td class="alt">' #use ekinproject to supply formatted fit and model info here.. x = 1 for d in datasets: if x > 2: n = True x = 0 else: n = False if n == False: print '<td>' self.showMetaData(ekinproj=E, dataset=d) x += 1 print '</td>' c = c + 1 if c >= columns: print '</tr>' row = row + 1 c = 1 print '</table>' if outfile != None: sys.stdout.flush() sys.stdout = saveout fsock.close() return
def run(self, callback=None): """Do initial import/fitting run with the current config""" self.stop = False self.loadModels() self.prepareData() print 'processing files in queue..' self.parseLabels() imported = {} #raw data results = {} #fitted data #print self.queue for key in self.queue: filename = self.queue[key] lines = self.openRaw(filename) if lines == None: continue data = self.doImport(lines) imported[key] = data #rebuild dict into a nested structure if it's flat (i.e. from single files) '''from Data import NestedData D = NestedData(imported) D.buildNestedStructure([0,2]) D.show() imported = D.data self.namelabels = None''' #try to average replicates here before we process if self.replicates == 1: if self.namelabels != None: imported = Utilities.addReplicates(imported, self.namelabels) else: print 'no replicates detected from labels' #re-arrange the imported dict if we want to group our output per field if self.groupbyfields == 1: imported = Utilities.arrangeDictbySecondaryKey( imported, self.namelabels) total = len(imported) #print imported #print self.namelabels c = 0.0 for key in imported: if self.stop == True: print 'cancelled' return #set filename fname = os.path.basename(key) fname = os.path.join(self.workingdir, fname) data = imported[key] if self.function1 != '': data = self.doProcessingStep(data, fname) if self.namelabels == None or not self.namelabels.has_key(key): namelabel = key else: namelabel = self.namelabels[key] #print namelabel, key #print data #if we have models to fit this means we might need to propagate fit data if self.model1 != '': Em = EkinProject() #grouping by file labels handled here if self.groupbyname == 1: #we don't pass the last model if it has to be #reserved for a final round of fitting from the files dict models = self.models[:-1] variables = self.variables[:-1] E, fits = self.processFits(rawdata=data, Em=Em, models=models, variables=variables) else: E, fits = self.processFits(rawdata=data, Em=Em) results[namelabel] = fits #print E.datasets, namelabel else: #if no fitting we just put the data in ekin Em = Utilities.getEkinProject(data) results[namelabel] = data Em.saveProject(fname) Em.exportDatasets(fname, append=True) if self.model1 != '': self.saveFitstoCSV(Em, fname) if self.saveplots == 1: self.saveEkinPlotstoImages(Em, fname) c += 1.0 if callback != None: callback(c / total * 100) #if grouped by file names then we process that here from results if self.groupbyname == 1: results = Utilities.extractSecondaryKeysFromDict(results) Em = EkinProject() #print results E, fits = self.processFits(rawdata=results, Em=Em) fname = os.path.join(self.workingdir, 'final') Em.saveProject(os.path.join(self.workingdir, fname)) Em.exportDatasets(os.path.join(self.workingdir, fname)) if self.model1 != '': self.saveFitstoCSV(Em, fname) self.saveEkinPlotstoImages(Em, fname) print 'processing done' print 'results saved to %s' % self.workingdir self.results = results return results
def main(): """Run some analysis""" from optparse import OptionParser parser = OptionParser() app = NMRTitration() DB=None; E=None parser.add_option("-f", "--file", dest="file", help="Open a local db") parser.add_option("-e", "--ekinprj", dest="ekinprj", help="Open an ekin project") parser.add_option("-s", "--server", dest="server", help="field") parser.add_option("-t", "--analysis", dest="analysis", action='store_true', help="titr db analysis", default=False) parser.add_option("-r", "--refit", dest="refit", action='store_true', help="refit specific ekin data", default=False) parser.add_option("-u", "--getexperrs", dest="getexperrs", action='store_true', help="get exp uncertainties", default=False) parser.add_option("-m", "--addmeta", dest="addmeta", action='store_true', help="add meta data for NMR", default=False) parser.add_option("-p", "--protein", dest="protein", help="protein") parser.add_option("-c", "--col", dest="col", help="field") parser.add_option("-a", "--atom", dest="atom", help="atom") parser.add_option("-x", "--export", dest="export", action='store_true', help="export db", default=False) parser.add_option("-b", "--benchmark", dest="benchmark", action='store_true', help="benchmark some stuff", default=False) parser.add_option("-g", "--gui", dest="gui", action='store_true', help="start gui app", default=False) opts, remainder = parser.parse_args() if opts.file != None and os.path.exists(opts.file): app.loadDB(opts.file) elif opts.server != None: DB = PDatabase(server='localhost', username='******', password='******', project='titration_db', port=8080) if opts.gui == True: app.main() app.mainwin.mainloop() return yuncerts = {'H':0.03,'N':0.1,'C':0.2} try: yuncert=yuncerts[opts.atom] except: yuncert=None if opts.ekinprj != None: E = EkinProject() E.openProject(opts.ekinprj) #some tit db analysis if opts.analysis == True and opts.server != None: complete = ['HEWL', 'Bovine Beta-Lactoglobulin', 'Plastocyanin (Anabaena variabilis)', 'Plastocyanin (Phormidium)', 'Glutaredoxin', 'Protein G B1','Xylanase (Bacillus subtilus)'] if opts.col == None: print 'provide a column' else: app.analyseTitDB(DB, opts.col)#, complete) #app.addpKaTables(DB, complete) elif opts.benchmark == True: app.benchmarkExpErr(DB) elif opts.col != None or E != None: app.titDBUtils(DB, opts.col, opts.protein, a=opts.atom, E=E, refit=opts.refit, addmeta=opts.addmeta, getexperrs=opts.getexperrs, yuncert=yuncert) elif opts.export == True: app.exportAll(DB, col=opts.col)
def importOldProj(datadir, local=None, server=None, project=None, username=None): """Import old peat projects""" import PEAT_DB.Database as peatDB from PEAT_DB.PEAT_dict import PEAT_dict, sub_dict import copy if local != None: newDB = PDatabase(local=local) elif server != None: newDB = PDatabase(server=server, username=username, port=8080, password='******', project=project) print newDB PT = peatDB.Database(datadir, Tk=False) oldDB = PT.DB print 'got old peat_db with %s proteins' % len(PT.proteins) print PT.DB.keys() #import meta stuff like userfields, table for p in newDB.meta.special: if not p in PT.DB.keys(): continue print 'adding', p for k in PT.DB[p]: newDB.meta[p][k] = copy.deepcopy(PT.DB[p][k]) newDB.meta._p_changed = 1 for p in PT.proteins: if p in newDB.meta.special: continue name = oldDB[p]['Name'] rec = PEATRecord(name=name) for col in oldDB[p].keys(): cdata = oldDB[p][col] recdata = {} if col == 'name': cdata = oldDB[p]['Name'] if oldDB['userfields'].has_key(col) and oldDB['userfields'][col][ 'field_type'] in ekintypes: E = EkinProject(data=cdata) E.length = len(E.datasets) if len(E.datasets) == 0: continue cdata = E if type(cdata) == sub_dict: for k in cdata.keys(): recdata[k] = copy.deepcopy(cdata[k]) else: recdata = cdata if cdata != '' and cdata != None: rec.addField(col, data=recdata) newDB.add(p, rec) print newDB.meta.userfields #remove any file cols, too hard to import for m in newDB.meta.userfields.keys()[:]: if newDB.meta.userfields[m]['field_type'] == 'File': newDB.deleteField(m) newDB.commit(user='******', note='import') newDB.close() print 'import done' return
def main(): """Run some analysis""" from optparse import OptionParser parser = OptionParser() app = VantHoff() parser.add_option("-f", "--file", dest="file", help="Open a local db") parser.add_option("-e", "--ekinprj", dest="ekinprj", help="Open an ekin project") parser.add_option("-d", "--dataset", dest="dataset", help="Dataset name") parser.add_option( "-m", "--method", dest="method", default=1, type='int', help= "Choose method - 1: Van't Hoff plot, 2: Schellman, 3: Differential fit, 4: Breslauer" ) parser.add_option("-b", "--benchmark", dest="benchmark", action='store_true', help="Test", default=False) parser.add_option("-a", "--all", dest="all", action='store_true', help="Do all datasets in ekinprj", default=False) parser.add_option("-w", "--width", dest="width", default=50, type='int', help="Width of transition region to fit for method 1") parser.add_option( "-s", "--smoothing", dest="smoothing", default=5, type='int', help="Degree of smoothing to apply in method 2 (default 5)") parser.add_option("-i", "--invert", dest="invert", action='store_true', help="Invert raw data", default=False) opts, remainder = parser.parse_args() if opts.file != None and os.path.exists(opts.file): app.loadDB(opts.file) if opts.ekinprj != None and os.path.exists(opts.ekinprj): E = EkinProject() E.openProject(opts.ekinprj) d = opts.dataset else: x, y = app.simulateCD() E = EkinProject() d = 'cdtest' E.insertDataset(xydata=[x, y], newname=d) if opts.all == True: self.doAll(E, methods) if opts.benchmark == True: app.benchmark(E, d, method=opts.method) #app.plotCorrelation() else: if opts.method == 1: app.fitVantHoff(E, d, transwidth=opts.width, invert=opts.invert, figname=d) elif opts.method == 2: app.fitElwellSchellman(E, d, transwidth=opts.width, invert=opts.invert, figname=d) elif opts.method == 3: app.fitDifferentialCurve(E, d, smooth=opts.smoothing, invert=opts.invert, figname=d) elif opts.method == 4: app.breslauerMethod(E, d, invert=opts.invert)