Exemple #1
0
def analyseHill(ekindicts):
    """make hist of n coefficents for hill fits"""

    import pylab
    pylab.rc('text', usetex=True)
    f=pylab.figure()
    f.suptitle('n distributions- No linear (case 3)')
    i=1
    for e in ekindicts:
        ekindata = ekindicts[e]
        proteins = ekindata.keys()
        nvals = []
        for prot in proteins:
            edata = ekindata[prot]
            E = EkinProject(data=edata)
            for d in E.datasets:
                fdata = E.getMetaData(d)
                if fdata != None and fdata.has_key('model'):
                    if fdata['model'] == 'Modified Hill':
                        n=fdata['n']
                        if n<5 and n>-5:
                            nvals.append(n)
                            print 'n=', n

        ax = f.add_subplot(2,2,i)
        n, b, patches = pylab.hist(nvals, 30, histtype='bar', alpha=0.8)
        std = round(numpy.std(nvals), 2)
        ave = round(numpy.mean(nvals), 2)
        ax.set_title(e +' mean= '+str(ave)+r' $\sigma$= '+str(std))
        i+=1
    f.subplots_adjust(hspace=0.4)
    f.savefig('n_hist.png')
    return
Exemple #2
0
def getEkinProject(data, xerror=None, yerror=None, sep='__'):
    """Get an ekin project from a dict of the form
         {label:([x],[y]),..} or
         {label:([x],[y],[xerr],[yerr]),..}"""

    E = EkinProject(mode='General')
    for d in data.keys():
        if type(data[d]) is types.DictType:
            for lbl in data[d]:
                name = str(d)+sep+str(lbl)
                xy = data[d][lbl]
                ek=EkinDataset(xy=xy)
                E.insertDataset(ek, name)
        else:
            #print data[d]
            if len(data[d]) == 4:
                x,y,xerrs,yerrs = data[d]
            else:
                x,y = data[d]
                xerrs = []; yerrs=[]
                if xerror!=None:
                    xerrs=[xerror for i in x]
                if yerror!=None:
                    yerrs=[yerror for i in y]
            ek = EkinDataset(xy=[x,y], xerrs=xerrs, yerrs=yerrs)
            E.insertDataset(ek, d)
            #print ek.errors
    return E
Exemple #3
0
def analyseHill(ekindicts):
    """make hist of n coefficents for hill fits"""

    import pylab
    pylab.rc('text', usetex=True)
    f = pylab.figure()
    f.suptitle('n distributions- No linear (case 3)')
    i = 1
    for e in ekindicts:
        ekindata = ekindicts[e]
        proteins = ekindata.keys()
        nvals = []
        for prot in proteins:
            edata = ekindata[prot]
            E = EkinProject(data=edata)
            for d in E.datasets:
                fdata = E.getMetaData(d)
                if fdata != None and fdata.has_key('model'):
                    if fdata['model'] == 'Modified Hill':
                        n = fdata['n']
                        if n < 5 and n > -5:
                            nvals.append(n)
                            print 'n=', n

        ax = f.add_subplot(2, 2, i)
        n, b, patches = pylab.hist(nvals, 30, histtype='bar', alpha=0.8)
        std = round(numpy.std(nvals), 2)
        ave = round(numpy.mean(nvals), 2)
        ax.set_title(e + ' mean= ' + str(ave) + r' $\sigma$= ' + str(std))
        i += 1
    f.subplots_adjust(hspace=0.4)
    f.savefig('n_hist.png')
    return
Exemple #4
0
def fitPropagationTest():
    """Tests the propagation of fit data direct from a dict - no importing"""

    start = time.time()
    p = Pipeline()
    conf = {
        "model1": "linear",
        "model2": "Michaelis-Menten",
        "model3": "sigmoid",
        "variable1": "a",
        "variable2": "Km",
        "variable3": "tm",  #'xerror':.1,'yerror':0.05,
    }
    p.createConfig("temp.conf", **conf)
    data = Utilities.createNestedData()
    Em = EkinProject()
    E, fits = p.processFits(data, Em=Em)
    print "final fits", fits
    fname = os.path.join(p.workingdir, "results")
    Em.saveProject(fname)
    p.saveEkinPlotstoImages(Em, fname)
    print "completed fit propagation test"
    print "took %s seconds" % round((time.time() - start), 2)
    print "-------------------"
    return
Exemple #5
0
def fitPropagationTest():
    """Tests the propagation of fit data direct from a dict - no importing"""

    start = time.time()
    p = Pipeline()
    conf = {
        'model1': 'linear',
        'model2': 'Michaelis-Menten',
        'model3': 'sigmoid',
        'variable1': 'a',
        'variable2': 'Km',
        'variable3': 'tm',  #'xerror':.1,'yerror':0.05,
    }
    p.createConfig('temp.conf', **conf)
    data = Utilities.createNestedData()
    Em = EkinProject()
    E, fits = p.processFits(data, Em=Em)
    print 'final fits', fits
    fname = os.path.join(p.workingdir, 'results')
    Em.saveProject(fname)
    p.saveEkinPlotstoImages(Em, fname)
    print 'completed fit propagation test'
    print 'took %s seconds' % round((time.time() - start), 2)
    print '-------------------'
    return
Exemple #6
0
def createdb(local=None,
             server=None,
             project=None,
             username=None,
             norecs=1000):
    """Create and add some test data"""
    if local != None:
        if os.path.exists(local):
            for i in ['.lock', '.index', '']:
                try:
                    os.remove(local + i)
                except:
                    pass
        DB = PDatabase(local=local)
    elif server != None:
        DB = PDatabase(server=server,
                       username=username,
                       password='******',
                       project=project)

    import string
    import DNAtool.mutation as mutation

    choices = ['a', 'b', 'c', 'd']
    DB.addField('choice', 'text')
    DB.addField('stab', 'text')
    DB.addField('activity', 'text')
    #DB.addField('ekin', 'General')
    E = EkinProject()
    data = E.readDataset('Ekin/test.Ekindat')
    E.insertDataset(data['data'], 'test', fit=data['fit'])
    print 'creating dummy data..'
    j = 0
    count = 0
    for i in range(norecs):
        if j > 3: j = 0
        c = ''
        for k in range(10):
            c += random.choice(string.letters)
        DB.add(c)
        DB.data[c].choice = choices[j]
        DB.data[c].DNASeq = simulate_sequence(300)
        AAseqs3, AAseqs1 = mutation.translate(DB.data[c].DNASeq)
        DB.addProtseq(c, AAseqs3[1][5:80], 1)
        DB.data[c].stab = str(round(random.normalvariate(1, 2), 3))
        DB.data[c].activity = str(round(random.normalvariate(30, 4), 3))
        #DB.data[c].ekin = E
        j += 1
        count += 1
        if count > 3000:
            print 'saving..'
            DB.commit()
            DB.db.cacheMinimize()
            count = 0

    DB.commit()
    return DB
Exemple #7
0
    def showMetaData(self,
                     ekinproj=None,
                     ekindata=None,
                     dataset=None,
                     fdata=None,
                     silent=False):
        """Print html of fit and metadata for the given dataset"""

        if fdata == None:
            if ekinproj == None and ekindata != None:
                E = EkinProject(data=ekindata)
            else:
                E = ekinproj

            fdata = E.getMetaData(dataset)
        fsock = None
        if silent == True:
            saveout = sys.stdout
            sys.stdout = fsock = StringIO.StringIO()

        print '<table id="mytable">'
        print '<tr>'
        print '<td class="alt" style="bold" colspan=2>%s</td><tr>' % dataset
        kys = fdata.keys()
        ignore = ['error']
        for k in sorted(fdata):
            if k in ignore:
                continue
            if fdata[k] == None:
                continue
            elif type(fdata[k]) is types.DictType:
                print '<td>%s</td>' % k
                print '<td> <table id="mytable">'
                for n in fdata[k]:
                    val = fdata[k][n][1]
                    print '<td class="alt">%s</td><td>%.2f</td><tr>' % (n, val)
                print '</table></td><tr>'
            elif type(fdata[k]) is types.StringType:
                print '<td class="alt">%s</td><td>%s</td><tr>' % (k, fdata[k])

            else:
                print '<td class="alt">%s</td><td>%.2f</td><tr>' % (k,
                                                                    fdata[k])
        print '</table>'
        if silent == True:
            sys.stdout = saveout

        if fsock == None:
            return ''
        else:
            return fsock.getvalue()
        return
Exemple #8
0
 def getCSV(self):
     """Import a csv file"""
     self.E = EkinProject()
     from PEATDB.Ekin.IO import Importer
     importer = Importer(self, parent_win=self.mainwin)
     newdata = importer.import_multiple()
     if newdata == None: return
     for n in newdata.keys():
         self.E.insertDataset(newdata[n], n, update=None)
     print 'imported %s datasets' % len(self.E.datasets)
     self.showDatasetSelector()
     self.showPreview()
     return
Exemple #9
0
def createdb(local=None, server=None, project=None, username=None, norecs=1000):
    """Create and add some test data"""
    if local != None:
        if os.path.exists(local):
            for i in ['.lock','.index','']:
                try:
                    os.remove(local+i)
                except:
                    pass
        DB = PDatabase(local=local)
    elif server!=None:
        DB = PDatabase(server=server, username=username,
                    password='******', project=project)

    import string
    import DNAtool.mutation as mutation
    
    choices = ['a','b','c','d']
    DB.addField('choice', 'text')
    DB.addField('stab', 'text')
    DB.addField('activity', 'text')
    #DB.addField('ekin', 'General')
    E = EkinProject()
    data=E.readDataset('Ekin/test.Ekindat')
    E.insertDataset(data['data'], 'test', fit=data['fit'])
    print 'creating dummy data..'
    j=0
    count=0
    for i in range(norecs):
        if j>3: j=0
        c=''
        for k in range(10):
            c += random.choice(string.letters)
        DB.add(c)
        DB.data[c].choice = choices[j]
        DB.data[c].DNASeq = simulate_sequence(300)
        AAseqs3,AAseqs1 = mutation.translate(DB.data[c].DNASeq)
        DB.addProtseq(c, AAseqs3[1][5:80], 1)
        DB.data[c].stab = str(round(random.normalvariate(1,2),3))
        DB.data[c].activity = str(round(random.normalvariate(30,4),3))
        #DB.data[c].ekin = E
        j+=1
        count+=1
        if count>3000:
            print 'saving..'
            DB.commit()
            DB.db.cacheMinimize()
            count=0

    DB.commit()
    return DB
Exemple #10
0
    def showMetaData(self, ekinproj=None, ekindata=None, dataset=None, fdata=None, silent=False):
        """Print html of fit and metadata for the given dataset"""

        if fdata == None:
            if ekinproj == None and ekindata!=None:
                E = EkinProject(data=ekindata)
            else:
                E = ekinproj

            fdata = E.getMetaData(dataset)
        fsock = None
        if silent == True:
            saveout = sys.stdout
            sys.stdout = fsock = StringIO.StringIO()

        print '<table id="mytable">'
        print '<tr>'
        print '<td class="alt" style="bold" colspan=2>%s</td><tr>' % dataset
        kys = fdata.keys()
        ignore = ['error']
        for k in sorted(fdata):
            if k in ignore:
                continue
            if fdata[k] == None:
                continue
            elif type(fdata[k]) is types.DictType:
                print '<td>%s</td>' %k
                print '<td> <table id="mytable">'
                for n in fdata[k]:
                    val = fdata[k][n][1]
                    print '<td class="alt">%s</td><td>%.2f</td><tr>' %(n, val)
                print '</table></td><tr>'
            elif type(fdata[k]) is types.StringType:
                print '<td class="alt">%s</td><td>%s</td><tr>' %(k, fdata[k])

            else:
                print '<td class="alt">%s</td><td>%.2f</td><tr>' %(k, fdata[k])
        print '</table>'
        if silent == True:
            sys.stdout = saveout

        if fsock == None:
            return ''
        else:
            return fsock.getvalue()
        return
Exemple #11
0
 def sampleData(self):
     E =self.E = EkinProject()
     E.createSampleData()
     self.plotframe.setProject(E)
     self.datasets = sorted(self.E.datasets)
     self.replot()
     self.updateSelector()
     return
Exemple #12
0
    def loadEkin(self):
        """Load the ekin prj"""

        filename = tkFileDialog.askopenfilename(defaultextension='.ekinprj',
                                                initialdir=os.getcwd(),
                                                filetypes=[
                                                    ("ekinprj", "*.ekinprj"),
                                                    ("All files", "*.*")
                                                ],
                                                parent=self.mainwin)
        if not os.path.isfile(filename):
            return
        self.E = EkinProject()
        self.E.openProject(filename)
        self.showDatasetSelector()
        self.showPreview()
        return
Exemple #13
0
def getEkinProject(data, xerror=None, yerror=None, sep='__'):
    """Get an ekin project from a dict of the form
         {label:([x],[y]),..} or
         {label:([x],[y],[xerr],[yerr]),..}"""

    E = EkinProject(mode='General')
    for d in data.keys():
        if type(data[d]) is types.DictType:
            for lbl in data[d]:
                name = str(d) + sep + str(lbl)
                xy = data[d][lbl]
                ek = EkinDataset(xy=xy)
                E.insertDataset(ek, name)
        else:
            #print data[d]
            if len(data[d]) == 4:
                x, y, xerrs, yerrs = data[d]
            else:
                x, y = data[d]
                xerrs = []
                yerrs = []
                if xerror != None:
                    xerrs = [xerror for i in x]
                if yerror != None:
                    yerrs = [yerror for i in y]
            ek = EkinDataset(xy=[x, y], xerrs=xerrs, yerrs=yerrs)
            E.insertDataset(ek, d)
            #print ek.errors
    return E
Exemple #14
0
 def getCSV(self):
     """Import a csv file"""
     self.E = EkinProject()
     from PEATDB.Ekin.IO import Importer
     importer = Importer(self,parent_win=self.mainwin)
     newdata = importer.import_multiple()
     if newdata == None: return
     for n in newdata.keys():
         self.E.insertDataset(newdata[n], n, update=None)
     print 'imported %s datasets' %len(self.E.datasets)
     self.showDatasetSelector()
     self.showPreview()
     return
Exemple #15
0
    def loadEkin(self):
        """Load the ekin prj"""

        filename=tkFileDialog.askopenfilename(defaultextension='.ekinprj',
                                                  initialdir=os.getcwd(),
                                                  filetypes=[("ekinprj","*.ekinprj"),
                                                             ("All files","*.*")],
                                                  parent=self.mainwin)
        if not os.path.isfile(filename):
            return
        self.E = EkinProject()
        self.E.openProject(filename)
        self.showDatasetSelector()
        self.showPreview()
        return
Exemple #16
0
def main():
    """Run some analysis"""

    from optparse import OptionParser
    parser = OptionParser()
    app = VantHoff()
    parser.add_option("-f", "--file", dest="file",
                        help="Open a local db")
    parser.add_option("-e", "--ekinprj", dest="ekinprj",
                        help="Open an ekin project")
    parser.add_option("-d", "--dataset", dest="dataset",
                        help="Dataset name")
    parser.add_option("-m", "--method", dest="method", default=1, type='int',
        help="Choose method - 1: Van't Hoff plot, 2: Schellman, 3: Differential fit, 4: Breslauer")
    parser.add_option("-b", "--benchmark", dest="benchmark", action='store_true',
                       help="Test", default=False)
    parser.add_option("-a", "--all", dest="all", action='store_true',
                       help="Do all datasets in ekinprj", default=False)
    parser.add_option("-w", "--width", dest="width", default=50, type='int',
                       help="Width of transition region to fit for method 1")
    parser.add_option("-s", "--smoothing", dest="smoothing", default=5, type='int',
                       help="Degree of smoothing to apply in method 2 (default 5)")
    parser.add_option("-i", "--invert", dest="invert", action='store_true',
                       help="Invert raw data", default=False)

    opts, remainder = parser.parse_args()

    if opts.file != None and os.path.exists(opts.file):
        app.loadDB(opts.file)
    if opts.ekinprj != None and os.path.exists(opts.ekinprj):
        E = EkinProject()
        E.openProject(opts.ekinprj)
        d = opts.dataset
    else:
        x,y = app.simulateCD()
        E = EkinProject()
        d='cdtest'
        E.insertDataset(xydata=[x,y], newname=d)
    if opts.all == True:
        self.doAll(E, methods)
    if opts.benchmark == True:
        app.benchmark(E,d,method=opts.method)

        #app.plotCorrelation()
    else:
        if opts.method == 1:
            app.fitVantHoff(E,d,transwidth=opts.width,invert=opts.invert,figname=d)
        elif opts.method == 2:
            app.fitElwellSchellman(E,d,transwidth=opts.width,invert=opts.invert,figname=d)
        elif opts.method == 3:
            app.fitDifferentialCurve(E,d,smooth=opts.smoothing,invert=opts.invert,figname=d)
        elif opts.method == 4:
            app.breslauerMethod(E,d,invert=opts.invert)
Exemple #17
0
 def loadEkinProj(self, E=None):
     """Load an ekin project file"""
     import os, types
     if E == None:
         import tkFileDialog
         filename=tkFileDialog.askopenfilename(defaultextension='.ekinprj',
                                               filetypes=[("Ekin project","*.ekinprj"),
                                                          ("All files","*.*")],
                                               parent=self.mainwin)
         if filename != None:
             if os.path.isfile(filename):
                 fd=open(filename)
                 import pickle
                 data=pickle.load(fd)
                 E=EkinProject(data=data)
                 self.ekinprojects[filename] = E
                 fd.close()
         else:
             return
     self.currprj = E
     self.showEkinProject(E)
     return
Exemple #18
0
class VantHoff(Plugin):
    """A plugin to do Van't Hoff Analysis of temperature melting curves"""
    """Author: Damien Farrell"""

    capabilities = ['gui','uses_sidepane']
    requires = ['pylab','numpy']
    menuentry = "Van't Hoff Analysis"

    gui_methods = {'getCSV': 'Import CSV',
                    'loadEkin':'Load Ekin Proj',
                    'saveEkin':'Save Ekin Proj',
                    'doAnalysis':"Do Analysis",
                    #'benchmark': 'Do Benchmark',
                    'close':'Close' }
    about = "A plugin to do Van't Hoff Analysis of temperature melting curves"
    R = 8.3144

    def __init__(self):
        self.path = os.path.expanduser("~")
        self.pltConfig()
        self.E = None
        return

    def main(self, parent):
        if parent==None:
            return
        self.parent = parent
        self.DB = parent.DB
        self.xydata = None
        self._doFrame()
        return

    def _doFrame(self):
        if 'uses_sidepane' in self.capabilities:
            self.mainwin = self.parent.createChildFrame(width=600)
        else:
            self.mainwin=Toplevel()
            self.mainwin.title(self.menuentry)
            self.mainwin.geometry('800x600+200+100')

        methods = self._getmethods()
        fr = Frame(self.mainwin)
        fr.pack(side=LEFT,fill=BOTH)
        methods = [m for m in methods if m[0] in self.gui_methods.keys()]
        self._createButtons(methods, fr)
        self.showDatasetSelector()
        self.doall = Pmw.RadioSelect(fr,
                buttontype = 'checkbutton',
                orient = 'horizontal',
                labelpos = 'w')
        self.doall.add('Process All')
        self.doall.pack()
        self.conversions = Pmw.RadioSelect(fr,
                buttontype = 'checkbutton',
                orient = 'horizontal',
                labelpos = 'w')
        self.conversions.add('Convert Celsius-Kelvin')
        self.conversions.pack()
        self.methods = Pmw.RadioSelect(fr,
                buttontype = 'checkbutton',
                orient = 'vertical',
                labelpos = 'w',
                label_text = 'Methods:')
        for m in ['method 1','method 2','method 3', 'method 4']:
            self.methods.add(m)
        self.methods.invoke('method 1')
        self.methods.pack()
        self.sm = Pmw.EntryField(fr,
                labelpos = 'w',
                value = 5,
                label_text = 'Smoothing:')
        self.sm.pack()
        self.tw = Pmw.EntryField(fr,
                labelpos = 'w',
                value = 60,
                label_text = 'Width of transition:')
        self.tw.pack()
        return

    def _getmethods(self):
        """Get a list of all available public methods"""
        import inspect
        mems = inspect.getmembers(self, inspect.ismethod)
        methods = [m for m in mems if not m[0].startswith('_')]
        return methods

    def _createButtons(self, methods, fr=None):
        """Dynamically create buttons for supplied methods, which is a tuple
            of (method name, label)"""
        for m in methods:
            b=Button(fr,text=self.gui_methods[m[0]],command=m[1])
            b.pack(side=TOP,fill=BOTH)
        return

    def close(self):
        self.mainwin.destroy()
        self.plotframe = None
        return

    def showDatasetSelector(self):
        if self.E==None:
            return
        if hasattr(self, 'dmenu'):
            self.dmenu.destroy()
        self.dmenu = Pmw.OptionMenu(self.mainwin,
                labelpos = 'w',
                label_text = 'Dataset:',
                items = sorted(self.E.datasets),
                command=self.showPreview,
                menubutton_width = 8)
        self.dmenu.pack(side=TOP,fill=BOTH)
        return

    def showPreview(self,event=None):
        if self.E == None:
            return
        if not hasattr(self, 'plotframe') or self.plotframe == None:
            from Ekin.Plotting import PlotPanel
            self.plotframe = PlotPanel(parent=self.mainwin, side=BOTTOM)
        self.plotframe.setProject(self.E)
        d = self.dmenu.getcurselection()
        self.plotframe.plotCurrent(d)
        #plt.close(1)
        return

    def getCSV(self):
        """Import a csv file"""
        self.E = EkinProject()
        from PEATDB.Ekin.IO import Importer
        importer = Importer(self,parent_win=self.mainwin)
        newdata = importer.import_multiple()
        if newdata == None: return
        for n in newdata.keys():
            self.E.insertDataset(newdata[n], n, update=None)
        print 'imported %s datasets' %len(self.E.datasets)
        self.showDatasetSelector()
        self.showPreview()
        return

    def loadEkin(self):
        """Load the ekin prj"""

        filename=tkFileDialog.askopenfilename(defaultextension='.ekinprj',
                                                  initialdir=os.getcwd(),
                                                  filetypes=[("ekinprj","*.ekinprj"),
                                                             ("All files","*.*")],
                                                  parent=self.mainwin)
        if not os.path.isfile(filename):
            return
        self.E = EkinProject()
        self.E.openProject(filename)
        self.showDatasetSelector()
        self.showPreview()
        return

    def saveEkin(self):
        """save proj"""
        if self.E != None:
            if self.E.filename == None:
                self.E.filename = tkFileDialog.asksaveasfilename(defaultextension='.ekinprj',
                                                          initialdir=os.getcwd(),
                                                          filetypes=[("ekinprj","*.ekinprj"),
                                                                     ("All files","*.*")],
                                                          parent=self.mainwin)

            self.E.saveProject()
            print 'saved ekin proj'
        return

    def doAnalysis(self):
        """Execute from GUI"""
        if self.E == None:
            return
        methods = self.methods.getcurselection()
        if 'Process All' in self.doall.getcurselection():
            self.doAll(methods=methods)
        else:
            if 'method 1' in methods:
                self.fitVantHoff(E=self.E,d=self.dmenu.getcurselection(),
                        transwidth=int(self.tw.getvalue()))
            if 'method 2' in methods:
                self.fitElwellSchellman(E=self.E,d=self.dmenu.getcurselection(),
                                            transwidth=int(self.tw.getvalue()))
            if 'method 3' in methods:
                self.fitDifferentialCurve(E=self.E,d=self.dmenu.getcurselection(),
                                            smooth=int(self.sm.getvalue()))
            if 'method 4' in methods:
                self.breslauerMethod(E=self.E,d=self.dmenu.getcurselection())#,invert=opts.invert)
        return

    def guessMidpoint(self,x,y):
        """guess midpoint for unfolding model"""
        midy=min(y)+(max(y)-min(y))/2.0
        midx=0
        closest=1e4
        for i in range(len(x)):
            c=abs(y[i]-midy)
            if c<closest:
                midx=x[i]
                closest=c
        return midx

    def transformCD(self,x,y,transwidth=None,ax=None):
        """Transform raw data into fraction unfolded per temp value, by fitting to
            a general unfolding equation that extracts baseline/slopes"""
        #fit baseline slopes and get intercepts
        d50 = self.guessMidpoint(x,y)
        print 'fitting to get baseline slopes and intercepts..'
        print 'midpoint is %s' %d50
        A,X=Fitting.doFit(expdata=zip(x,y),model='Unfolding',noiter=50,silent=True,
                           guess=False,startvalues=[1,1,1,1,1,d50])
        #print X.getResult()
        fity = X.getFitLine(x)
        fd=X.getFitDict()
        if ax!=None:
            p=ax.plot(x,fity,'r',lw=2)
            self.drawParams(ax,fd)
        #we then use slopes and intercepts get frac unfolded at each temp
        mn = fd['bn']; mu = fd['bd'] #slopes
        #if mu>0.01: mu = 0.01
        yn = fd['an']; yu = fd['ad'] #intercepts
        d50 = fd['d50']; m = fd['m']

        t=[]; f=[]
        #print mu, mn
        for T,yo in zip(x,y):
            fu = (yo-(yn+mn*T)) / ((yu+mu*T)-(yn+mn*T))
            #print fu, (yo-(yn+mn*T)), (m), mu, mn
            #if f>0:
            f.append(fu)
            t.append(T)

        #try to take useful transition region of data
        at,af=t,f
        diff=1e5
        if transwidth != None:
            for i in t:
                d=abs(i-d50)
                if d<diff:
                    mid = t.index(i)
                    diff=d
            L=int(mid-transwidth); U=int(mid+transwidth)
            t,f = t[L:U], f[L:U]

        return at,af,t,f

    def fitVantHoff(self, E=None, d=None, xy=None, transwidth=80, invert=False,
                        show=True, figname=None):
        """Derive fraction unfolded, get K and fit to Van't Hoff.
           see http://www.jbc.org/content/277/43/40717.full
           or http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2144003/
        """
        if E != None:
            if not d in E.datasets:
                print 'no such dataset, %s' %d
                print 'available datasets:', E.datasets
                return
            ek = E.getDataset(d)
            x,y = ek.getxySorted()
        elif xy!=None:
            x,y = xy

        if 'Convert Celsius-Kelvin' in self.conversions.getcurselection():
            x = [i+273 for i in x]
        if invert == True:
            y = [max(y)-i for i in y[:]]

        f=plt.figure(figsize=(18,6))
        ax=f.add_subplot(131)
        p=ax.plot(x,y,'o',alpha=0.6)
        ax.set_xlabel('T(K)'); ax.set_ylabel('mdeg')
        ax.set_title('raw data')

        x1,y1,x,y = self.transformCD(x,y,transwidth,ax)
        cw=csv.writer(open('frac_unfolded_'+d+'.csv','w'))
        cw.writerow(['temp','frac'])
        for i in zip(x1,y1):
            cw.writerow(i)

        #derive lnK vs 1/T
        t=[]; k=[]

        for T,fu in zip(x,y):
            if fu>=1 or fu<=0:
                continue
            K = fu/(1-fu)
            klog = math.log(K)
            k.append(klog)
            t.append(1/T)

        if len(t)<2: return None, None, None

        ax=f.add_subplot(132)
        p=ax.plot(x1,y1,'o',color='g',alpha=0.6)
        ax.set_xlabel('T(K)'); ax.set_ylabel('fu')
        ax.set_title('fraction unfolded')

        ax=f.add_subplot(133)
        p=ax.plot(t,k,'x',mew=2,color='black')
        ax.set_xlabel('1/T')#(r'$1/T ($K^-1)$')
        ax.set_ylabel('ln K')

        formatter = matplotlib.ticker.ScalarFormatter()
        formatter.set_scientific(True)
        formatter.set_powerlimits((0,0))
        ax.xaxis.set_major_formatter(formatter)
        for l in ax.get_xticklabels():
            l.set_rotation(30)

        #fit this van't hoff plot
        A,X=Fitting.doFit(expdata=zip(t,k),model='Linear')
        fitk = X.getFitLine(t)
        p=ax.plot(t,fitk,'r',lw=2)
        fd=X.getFitDict()
        #self.drawParams(ax,fd)

        #slope is deltaH/R/1000 in kJ
        deltaH = -fd['a']*self.R/1000
        deltaS = fd['b']*self.R/1000
        f.suptitle("Method 1 - deltaH: %2.2f deltaS: %2.2f" %(deltaH,deltaS),size=18)
        f.subplots_adjust(bottom=0.15,top=0.85)

        if show==True:
            self.showTkFigure(f)

        if figname == None: figname = d
        figname = figname.replace('.','_')
        fname = figname+'m1'+'.png'
        f.savefig(fname,dpi=300)
        print 'plot saved to %s' %os.path.abspath(fname)
        #plt.close()
        if E!=None:
            fdata = Fitting.makeFitData(X.name,vrs=X.variables)
            E.insertDataset(xydata=[t,k], newname=d+'_vanthoff',replace=True,fit=fdata)
            #E.saveProject()
        return deltaH, deltaS, ax

    def fitElwellSchellman(self,E=None, d=None, xy=None,transwidth=50,
                                invert=False,show=True,figname=None):
        """Fit entire raw data simultaneously to the three main thermodynamic
           parameters using Elwell/Schellman method"""
        if E !=None:
            ek = E.getDataset(d)
            x,y,a, xerr,yerr = ek.getAll()
        elif xy!=None:
            x,y = xy
        else:
            return
        if invert == True:
            y = [max(y)-i for i in y[:]]
        f=plt.figure(figsize=(10,5))
        ax=f.add_subplot(121)
        p=ax.plot(x,y,'o',alpha=0.5)
        ax.set_xlabel('T');ax.set_xlabel('mdeg')
        ax.set_title('raw data')

        x1,y1,x,y = self.transformCD(x,y,transwidth,ax)

        t=[];dg=[]
        R=8.3144e-3
        for T,fu in zip(x,y):
            if fu>=1 or fu<=0:
                continue
            K = fu/(1-fu)
            deltaGt = -R * T * math.log(K)
            dg.append(deltaGt)
            t.append(T)

        ax1=f.add_subplot(122)
        p=ax1.plot(t,dg,'x',mew=2,color='black')
        ax1.set_xlabel('T'); ax1.set_ylabel('dG(T)')
        ax.set_title('stability curve')

        A,X=Fitting.doFit(expdata=zip(t,dg),model='schellman',grad=1e-9,conv=1e-9)
        fity = X.getFitLine(t)
        p=ax1.plot(t,fity,'r',lw=2)
        fd=X.getFitDict()
        self.drawParams(ax1,fd)
        deltaH=fd['deltaH']; deltacp=fd['deltacp']; Tm=fd['Tm']
        f.suptitle("Method 2 - deltaH: %2.2f deltaCp: %2.2e Tm: %2.2f" %(deltaH,deltacp,Tm),size=18)
        if show == True:
            self.showTkFigure(f)

        if figname == None: figname = d
        figname = figname.replace('.','_')
        fname = figname+'m1'+'.png'
        f.savefig(fname,dpi=300)
        print 'plot saved to %s' %os.path.abspath(fname)
        if E!=None:
            fdata = Fitting.makeFitData(X.name,vrs=X.variables)
            E.insertDataset(xydata=[t,dg], newname=d+'_vanthoff2',replace=True,fit=fdata)
            #E.saveProject()
        return deltaH, Tm, deltacp

    def breslauerMethod(self,E=None, d=None, xy=None,invert=False,
                        show=True,figname=None):
        """Finds slope of trans region and plugs this in to equation
        http://www.springerlink.com/content/r34n0201g30563u7/  """
        if E !=None:
            ek = E.getDataset(d)
            x,y,a, xerr,yerr = ek.getAll()
        elif xy!=None:
            x,y = xy
        else:
            return
        f=plt.figure(figsize=(10,6))
        ax=f.add_subplot(111)
        ax.set_xlabel('T')
        p=ax.plot(x,y,'o',alpha=0.5)
        d50 = self.guessMidpoint(x,y)
        A,X=Fitting.doFit(expdata=zip(x,y),model='Unfolding',conv=1e-7,noiter=60,
                            guess=False,startvalues=[1,1,1,1,1,d50])
        fity = X.getFitLine(x)
        p=ax.plot(x,fity,'r',lw=2)
        fd=X.getFitDict()
        self.drawParams(ax,fd)
        Tm = fd['d50']; m = fd['m']
        R = 8.3144e-3
        deltaH =  R * math.pow(Tm,2) * m
        f.suptitle("Method 4 - deltaH: %2.2f Tm: %2.2f" %(deltaH,Tm),size=18)
        if show == True:
            self.showTkFigure(f)
        if figname != None:
            figname = figname.replace('.','_')
            f.savefig(figname)
            plt.close()
        return deltaH, Tm

    def fitDifferentialCurve(self, E=None, d=None, xy=None,smooth=0,
                                invert=False,show=True,figname=None):
        """Derive differential denaturation curve and fit to get deltaH
           We smooth the unfolding curve and then differentiate and finally
           fit to a 3 parameter equation.
           See http://www.ncbi.nlm.nih.gov/pubmed/10933511"""

        if E !=None:
            ek = E.getDataset(d)
            x,y,a, xerr,yerr = ek.getAll()
        elif xy!=None:
            x,y = xy
        else:
            return
        if invert == True:
            y = [max(y)-i for i in y[:]]

        leg=[]; lines=[]
        f=plt.figure(figsize=(10,5))
        ax=f.add_subplot(121)
        p=ax.plot(x,y,'x',color='black',mew=3,alpha=0.5)
        leg.append(p); lines.append('original')
        #smooth
        if smooth == 0:
            smooth=int(len(x)/15.0)
        s=self.smoothListGaussian(y,smooth)
        p=ax.plot(x[:len(s)-1],s[:-1],lw=3)
        leg.append(p); lines.append('smoothed')
        ax.set_title("original data")
        ax.set_xlabel('T')
        ax1=f.add_subplot(122)
        #differentiate
        dx,ds = self.differentiate(x[:len(s)],s)
        #ds = [i/max(ds) for i in ds]
        ds = [i*10 for i in ds]
        cw=csv.writer(open('diffcd.csv','w'))
        for row in zip(dx,ds):
            cw.writerow(row)
        p=ax1.plot(dx,ds,'-',lw=1.5,alpha=0.7,color='black')
        leg.append(p); lines.append('differential')
        ax1.set_title("differential denaturation")
        ax1.set_xlabel('T'); ax1.set_ylabel('dsignal/dT')

        A,X=Fitting.doFit(expdata=zip(dx,ds),model='diffDenaturation',grad=1e-9,conv=1e-10)
        fity = X.getFitLine(dx)
        p=ax1.plot(dx,fity,'r',lw=2)
        leg.append(p); lines.append('fit')
        t=X.getFitDict()
        self.drawParams(ax1,t)
        dHkcal=t['deltaH']/4.184
        f.suptitle('Method 3 - deltaH: %2.2f kJ/mol (%2.2f kcal) Tm: %2.2f' %(t['deltaH'],dHkcal,t['Tm']),size=18)
        ax.legend(leg,lines,loc='best',prop=FontProperties(size="smaller"))
        #f.subplots_adjust(hspace=0.8)
        if show == True:
            self.showTkFigure(f)
        if figname != None:
            figname = figname.replace('.','_')
            f.savefig(figname+'m3',dpi=300)
            plt.close()
        if E!=None:
            fdata = Fitting.makeFitData(X.name,vrs=X.variables)
            E.insertDataset(xydata=[dx,ds], newname=d+'_diff',replace=True,fit=fdata)
            #E.saveProject()
        return t['deltaH'],t['Tm']

    def differentiate(self, x,y):
        dy = numpy.diff(y,1)
        dx = x[:len(dy)]
        return dx,dy

    def smoothListGaussian(self,data,degree=5):
        """Gaussian data smoothing function"""
        #buffer data to avoid offset result
        data=list(data)
        data = [data[0]]*(degree-1) + data + [data[-1]]*degree
        window=degree*2-1
        weight=numpy.array([1.0]*window)
        weightGauss=[]
        for i in range(window):
            i=i-degree+1
            frac=i/float(window)
            gauss=1/(numpy.exp((4*(frac))**2))
            weightGauss.append(gauss)
        weight=numpy.array(weightGauss)*weight
        smoothed=[0.0]*(len(data)-window)
        for i in range(len(smoothed)):
            smoothed[i]=sum(numpy.array(data[i:i+window])*weight)/sum(weight)
        return smoothed

    def invert(self,data):
        inv=[i for i in data]
        return inv

    def simulateCD(self,noise=1.0):
        """Simulate some CD spec data"""
        x=list(numpy.arange(290,380,0.2)); y=[]
        X=Fitting.getFitter(model='Unfolding',
                              vrs=[-16, 0.01, -11.6, 0.01, 2.7, 324])
        fity = X.getFitLine(x)
        for i in fity:
            noise=numpy.random.normal(i, 1.0/2)
            y.append(i+noise)
        cw=csv.writer(open('cd.csv','w'))
        for row in zip(x,y):
            cw.writerow(row)
        return x,y

    def drawParams(self,ax,d):
        ymin, ymax = ax.get_ylim()
        xmin, xmax = ax.get_xlim()
        inc=(ymax-ymin)/20
        xinc=(xmax-xmin)/20
        y=ymax-inc
        for k in d:
            s = k+'='+str(round(d[k],3))
            ax.text(xmin+xinc,y,s,fontsize=10)
            y-=inc
        return

    def pltConfig(self):
        #plt.rc('text', usetex=True)
        plt.rc('figure.subplot', hspace=0.3,wspace=0.3)
        #plt.rc('axes',titlesize=22)
        plt.rc('font',family='monospace')
        return

    def doAll(self, methods=['method 1']):
        """Process all datasets in ekinprj"""
        E=self.E
        vals={}
        from Dialogs import PEATDialog
        pb=PEATDialog(self.mainwin, option='progressbar',
                                      message='Analysing Data..')
        pb.update_progress(0)
        total = len(E.datasets); count=0
        for d in E.datasets:
            if '_diff' in d or '_vanthoff' in d:
                continue
            vals[d]={}
            name = d
            if 'method 1' in methods:
                vals[d]['dH1'], vals[d]['dS1'], ax = self.fitVantHoff(E,d,
                                                         transwidth=int(self.tw.getvalue()),
                                                         show=False,figname=name)
            if 'method 2' in methods:
                vals[d]['dH2'], vals[d]['dTm2'], vals[d]['dCp2'] = self.fitElwellSchellman(E,d,show=False,figname=name)
            if 'method 3' in methods:
                vals[d]['dH3'], vals[d]['dTm3'] = self.fitDifferentialCurve(E,d,show=False,figname=name)
            count += 1
            pb.update_progress(float(count)/total*100.0)
        pb.close()
        self.showTable(vals)
        return

    def showTable(self, data):
        """Show results in table"""
        from PEATDB.DictEdit import DictEditor
        D=DictEditor(self.mainwin)
        D.loadTable(data)
        return

    def benchmark(self,E=None,d=None, method=1):
        """Test methods with varying paramaters, smoothing etc"""
        if E==None and self.E != None:
            E = self.E; d=self.dmenu.getcurselection()

        path='vh_benchmark'
        if not os.path.exists(path):
            os.mkdir(path)
        dHvals=[]

        if method == 1:
            xlabel = 'width (K)'
            title = 'method 1: deltaH variation with trans region width fit'
            vals=range(5,140,5)
            for w in vals:
                dH, dS, ax = self.fitVantHoff(E,d,transwidth=w,show=False,
                                              figname=os.path.join(path,'%s_%s.png' %(d,w)))
                if dH == None: dH=0
                dHvals.append(dH)
            #take best values from middle
            #dHvals= dHvals[5:16]
        elif method == 2:
            xlabel = 'width (K)'
            title = 'method 2: deltaH variation with width fit'
            vals=range(5,140,5)
            for w in vals:
                dH, dcp, dTm = self.fitElwellSchellman(E,d,transwidth=w,show=False,
                                                       figname=os.path.join(path,'%s_%s.png' %(d,w)))
                dHvals.append(dH)
        elif method == 3:
            xlabel = 'smoothing degree'
            title = 'method 3: deltaH variation with degree of smoothing'
            vals=range(1,30,3)
            for s in vals:
                dH, dTm = self.fitDifferentialCurve(E,d,smooth=s,show=False,
                                                    figname=os.path.join(path,'%s_%s.png' %(d,s)))
                dHvals.append(dH)
        mean = numpy.mean(dHvals)
        stdev = numpy.std(dHvals)
        f=plt.figure()
        ax=f.add_subplot(111)
        ax.plot(vals, dHvals,lw=2)
        ax.set_xlabel(xlabel)
        ax.set_ylabel('deltaH (kJ)')
        ax.set_title('mean: %2.2f stdev: %2.2f'%(mean, stdev))
        f.suptitle(title)
        f.savefig('benchmark_%s.png' %method)
        cw=csv.writer(open('benchmark_%s.csv' %method,'w'))
        for row in zip(vals,dHvals):
            cw.writerow(row)
        return

    def benchmarkLimitedData(self, E=None,d=None, method=1):
        """test any method with varying limited data"""
        if E==None and self.E != None:
            E = self.E; d=self.dmenu.getcurselection()

        path='vh_benchmark'
        if not os.path.exists(path):
            os.mkdir(path)
        dHvals=[]
        vals=[]
        if method == 1:
            L=range(5,140,5)
            for w in vals:
                dH, dS, ax = self.fitVantHoff(E,d,transwidth=w,show=False,
                                              figname=os.path.join(path,'%s_%s.png' %(d,w)))
        return

    @classmethod
    def plotCorrelation(self,x=None,y=None,xlabel='method1',ylabel='method2'):
        if x==None:
            data=open('compared.csv','r')
            cr=csv.reader(data)
            x=[float(r[0]) for r in cr]; data.seek(0)
            y=[float(r[1]) for r in cr]
        f=plt.figure()
        ax=f.add_subplot(111)

        line = ax.scatter(x, y, marker='o',alpha=0.8)
        cl = numpy.arange(0,max(x)+50)
        ax.plot(cl, cl, 'g', alpha=0.5,lw=2)
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_xlim(150,600); ax.set_ylim(150,600)
        ax.set_title('Correlation')
        from scipy.stats import stats
        cc = str(round(pow(stats.pearsonr(x,y)[0],2),2))
        ax.text(400,180, r'$r^2= %s$' %cc, fontsize=16)
        self.showTkFigure(f)
        return

    def showTkFigure(self, fig):
        from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2TkAgg
        fr = Toplevel()
        canvas = FigureCanvasTkAgg(fig, master=fr)
        #self.canvas.show()
        canvas.get_tk_widget().pack(side=TOP, fill=X, expand=1)
        mtoolbar = NavigationToolbar2TkAgg(canvas, fr)
        mtoolbar.update()
        canvas._tkcanvas.pack(side=BOTTOM, fill=BOTH, expand=1)
        return
Exemple #19
0
def main():
    """Run some analysis"""

    from optparse import OptionParser
    parser = OptionParser()
    app = VantHoff()
    parser.add_option("-f", "--file", dest="file", help="Open a local db")
    parser.add_option("-e",
                      "--ekinprj",
                      dest="ekinprj",
                      help="Open an ekin project")
    parser.add_option("-d", "--dataset", dest="dataset", help="Dataset name")
    parser.add_option(
        "-m",
        "--method",
        dest="method",
        default=1,
        type='int',
        help=
        "Choose method - 1: Van't Hoff plot, 2: Schellman, 3: Differential fit, 4: Breslauer"
    )
    parser.add_option("-b",
                      "--benchmark",
                      dest="benchmark",
                      action='store_true',
                      help="Test",
                      default=False)
    parser.add_option("-a",
                      "--all",
                      dest="all",
                      action='store_true',
                      help="Do all datasets in ekinprj",
                      default=False)
    parser.add_option("-w",
                      "--width",
                      dest="width",
                      default=50,
                      type='int',
                      help="Width of transition region to fit for method 1")
    parser.add_option(
        "-s",
        "--smoothing",
        dest="smoothing",
        default=5,
        type='int',
        help="Degree of smoothing to apply in method 2 (default 5)")
    parser.add_option("-i",
                      "--invert",
                      dest="invert",
                      action='store_true',
                      help="Invert raw data",
                      default=False)

    opts, remainder = parser.parse_args()

    if opts.file != None and os.path.exists(opts.file):
        app.loadDB(opts.file)
    if opts.ekinprj != None and os.path.exists(opts.ekinprj):
        E = EkinProject()
        E.openProject(opts.ekinprj)
        d = opts.dataset
    else:
        x, y = app.simulateCD()
        E = EkinProject()
        d = 'cdtest'
        E.insertDataset(xydata=[x, y], newname=d)
    if opts.all == True:
        self.doAll(E, methods)
    if opts.benchmark == True:
        app.benchmark(E, d, method=opts.method)

        #app.plotCorrelation()
    else:
        if opts.method == 1:
            app.fitVantHoff(E,
                            d,
                            transwidth=opts.width,
                            invert=opts.invert,
                            figname=d)
        elif opts.method == 2:
            app.fitElwellSchellman(E,
                                   d,
                                   transwidth=opts.width,
                                   invert=opts.invert,
                                   figname=d)
        elif opts.method == 3:
            app.fitDifferentialCurve(E,
                                     d,
                                     smooth=opts.smoothing,
                                     invert=opts.invert,
                                     figname=d)
        elif opts.method == 4:
            app.breslauerMethod(E, d, invert=opts.invert)
Exemple #20
0
    def run(self, callback=None):
        """Do initial import/fitting run with the current config"""

        self.stop=False
        self.loadModels()
        self.prepareData()
        print 'processing files in queue..'

        self.parseLabels()
        imported = {}   #raw data
        results = {}    #fitted data
        #print self.queue

        for key in self.queue:
            filename = self.queue[key]
            lines = self.openRaw(filename)
            if lines == None:
                continue
            data = self.doImport(lines)
            imported[key] = data

        #rebuild dict into a nested structure if it's flat (i.e. from single files)
        '''from Data import NestedData
        D = NestedData(imported)
        D.buildNestedStructure([0,2])
        D.show()
        imported = D.data
        self.namelabels = None'''

        #try to average replicates here before we process
        if self.replicates == 1:
            if self.namelabels != None:
                imported = Utilities.addReplicates(imported, self.namelabels)
            else:
                print 'no replicates detected from labels'

        #re-arrange the imported dict if we want to group our output per field
        if self.groupbyfields == 1:
            imported = Utilities.arrangeDictbySecondaryKey(imported, self.namelabels)

        total = len(imported)
        #print imported
        #print self.namelabels

        c=0.0
        for key in imported:
            if self.stop == True:
                print 'cancelled'
                return
            #set filename
            fname = os.path.basename(key)
            fname = os.path.join(self.workingdir, fname)
            data = imported[key]

            if self.function1 != '':
                data = self.doProcessingStep(data, fname)

            if self.namelabels == None or not self.namelabels.has_key(key):
                namelabel = key
            else:
                namelabel = self.namelabels[key]
            #print namelabel, key
            #print data

            #if we have models to fit this means we might need to propagate fit data
            if self.model1 != '':
                Em = EkinProject()
                #grouping by file labels handled here
                if self.groupbyname == 1:
                    #we don't pass the last model if it has to be
                    #reserved for a final round of fitting from the files dict
                    models = self.models[:-1]
                    variables = self.variables[:-1]
                    E,fits = self.processFits(rawdata=data, Em=Em,
                                               models=models,variables=variables)
                else:
                    E,fits = self.processFits(rawdata=data, Em=Em)
                results[namelabel] = fits
                #print E.datasets, namelabel
            else:
                #if no fitting we just put the data in ekin
                Em = Utilities.getEkinProject(data)
                results[namelabel] = data

            Em.saveProject(fname)
            Em.exportDatasets(fname, append=True)
            if self.model1 != '':
                self.saveFitstoCSV(Em, fname)
            if self.saveplots == 1:
                self.saveEkinPlotstoImages(Em, fname)
            c+=1.0
            if callback != None:
                callback(c/total*100)

        #if grouped by file names then we process that here from results
        if self.groupbyname == 1:
            results = Utilities.extractSecondaryKeysFromDict(results)
            Em = EkinProject()
            #print results
            E,fits = self.processFits(rawdata=results, Em=Em)
            fname = os.path.join(self.workingdir, 'final')
            Em.saveProject(os.path.join(self.workingdir, fname))
            Em.exportDatasets(os.path.join(self.workingdir, fname))
            if self.model1 != '':
                self.saveFitstoCSV(Em, fname)
            self.saveEkinPlotstoImages(Em, fname)
        print 'processing done'
        print 'results saved to %s' %self.workingdir
        self.results = results
        return results
Exemple #21
0
    def showEkinPlots(self,
                      ekindata=None,
                      project=None,
                      filename=None,
                      datasets='ALL',
                      title='Ekin plots',
                      outfile=None,
                      imgpath=None,
                      path='',
                      normalise=False,
                      showfitvars=False,
                      plotoption=1,
                      columns=2,
                      legend=False,
                      size=(8, 6),
                      logx=False,
                      logy=False):
        """Plot ekin datasets from the provided ekin project data"""
        def downloadLink():
            #do csv download link for data displayed
            print '<table id="mytable" valign=top>'
            cfname = tempfile.mktemp('.csv', dir=csvpath)
            E.exportCSV(filename=cfname)
            p = os.path.split(path)[0]
            print '<td><a href=%s title="right-click to save as"> download data </a>' \
                      %os.path.join(p, 'csv', os.path.basename(cfname))
            print '</td></tr>'
            print '</table>'

        csvpath = os.path.join(os.path.split(imgpath)[0], 'csv')

        print project
        if ekindata != None:  #convert from ekindata
            E = EkinProject(data=ekindata, mode='NMR titration')
        elif project != None:  #just passed object
            E = project
        elif filename != None:  #load project from file
            E = EkinProject()
            E.openProject(project)
        else:
            return
        E.checkDatasets()
        #if outfile is given, we override imgpath
        if outfile != None and imgpath == None:
            imgpath = os.path.dirname(outfile)
        if imgpath != None:
            tempfile.tempdir = imgpath

        size = (8, 6)
        if datasets == 'ALL':
            #we plot all the datasets
            datasets = E.datasets
        if plotoption == 1:
            if columns > 2:
                size = (4, 3)
            imagenames = {}
            for d in datasets:
                imgfile = self.maketempImage()
                name = os.path.basename(imgfile)
                E.plotDatasets(d,
                               filename=imgfile,
                               size=size,
                               linecolor='r',
                               normalise=normalise,
                               showfitvars=showfitvars,
                               legend=legend,
                               logx=logx,
                               logy=logy)
                imagenames[d] = name

        elif plotoption == 3:
            name = self.maketempImage()
            E.plotDatasets(datasets,
                           filename=name,
                           plotoption=3,
                           size=size,
                           normalise=normalise,
                           legend=legend,
                           logx=logx,
                           logy=logy)

        if outfile != None:
            saveout = sys.stdout
            fsock = open(outfile, 'w')
            sys.stdout = fsock

        self.doheader(title)
        downloadLink()

        print '<table id="mytable" align=center cellspacing="0" borderwidth=1>'
        row = 1
        c = 1
        datasets.sort()
        if plotoption == 1:
            for d in datasets:
                if not imagenames.has_key(d):
                    continue
                if c == 1:
                    print '<tr>'
                print '<td> <img src=%s/%s  align=center></td>' % (
                    path, imagenames[d])
                print '<td class="alt">'
                #use ekinproject to supply formatted fit and model info here..
                self.showMetaData(ekinproj=E, dataset=d)
                print '</td>'
                c = c + 1
                if c >= columns:
                    print '</tr>'
                    row = row + 1
                    c = 1
        elif plotoption == 3:
            print '<td> <img src=%s/%s  align=center></td>' % (
                path, os.path.basename(name))
            print '<td class="alt">'
            #use ekinproject to supply formatted fit and model info here..
            x = 1
            for d in datasets:
                if x > 2:
                    n = True
                    x = 0
                else:
                    n = False
                if n == False:
                    print '<td>'
                self.showMetaData(ekinproj=E, dataset=d)
                x += 1
            print '</td>'
            c = c + 1
            if c >= columns:
                print '</tr>'
                row = row + 1
                c = 1

        print '</table>'
        if outfile != None:
            sys.stdout.flush()
            sys.stdout = saveout
            fsock.close()
        return
Exemple #22
0
    def plotpKDCalcs(self, calcs, Ed=None, option=1):
        """Do pKD calcs with exp data plots"""
        from PEATDB.Ekin.Web import EkinWeb
        from PEATDB.Ekin.Base import EkinProject
        from PEATDB.Ekin.Convert import EkinConvert
        from PEATDB.Ekin.Titration import TitrationAnalyser
        import PEATDB.Ekin.Utils as Utils
        t = TitrationAnalyser()
        c=calcs

        EW = EkinWeb()

        if option == '2':
            print '<a>Just showing experimental data</a>'
            EW.showEkinPlots(project=Ed, datasets='ALL',
                              path=self.plotsdir,
                              imgpath=self.imagepath)
            return
        #create ekin proj from pKD titcurves
        Ec = EkinProject()
        for r in c.keys():
            xd=[];yd=[]
            for i in c[r]:
                if type(i) is types.StringType:
                    continue
                xd.append(i)
                yd.append(c[r][i])
            edata=EkinConvert.xy2ekin([xd,yd])
            Ec.insertDataset(edata, r)

        print '<a>Please wait, fitting calcated curves...</a>'
        sys.stdout.flush()
        Ec.fitDatasets(models=['1 pKa 2 Chemical shifts'], silent=True)

        if option == '3':
            print '<a>Just showing pKD data</a>'
            EW.showEkinPlots(project=Ec, datasets='ALL',
                              path=self.plotsdir,
                              imgpath=self.imagepath)
            return

        #transform exp data names to match pKD ones
        s=':'
        usechainid = True
        #if pKD names have no chain id, we don't need one for exp names
        if Ec.datasets[0].startswith(':'):
            usechainid=False
        for d in Ed.datasets[:]:
            r = Ed.getMetaData(d)
            if r != None:
                if r['chain_id'] == None or usechainid == False:
                    chain = ''
                else:
                    chain = r['chain_id']
                new = chain+s+Utils.leadingZeros(r['res_num'],4)+s+r['residue']
                if new in Ed.datasets:
                    atom = r['atom']
                    new = new + '_' + atom
                Ed.renameDataset(d, new)

        #now we overlay the same datasets in Ed and Ec
        #also handles cases where same residue multiple times for diff atoms in exp data
        for d in Ed.datasets:
            if d in Ec.datasets:
                Ep = EkinProject()
                cdata = Ec.getDataset(d)
                Ep.insertDataset(cdata, d+'_pKD')
                Ep.setFitData(d+'_pKD', Ec.getFitData(d))
                ddata = Ed.getDataset(d)
                Ep.insertDataset(ddata, d+'_exp')
                Ep.setFitData(d+'_exp', Ed.getFitData(d))
                EW.showEkinPlots(project=Ep, datasets='ALL', plotoption=3,
                                 normalise=True, legend=True,
                                 path=self.plotsdir,
                                 imgpath=self.imagepath)

        return
Exemple #23
0
    def showEkinPlots(self, ekindata=None, project=None, filename=None,
                            datasets='ALL',
                            title='Ekin plots',
                            outfile=None, imgpath=None, path='',
                            normalise=False, showfitvars=False,
                            plotoption=1, columns=2, legend=False, size=(8,6),
                            logx=False, logy=False):
        """Plot ekin datasets from the provided ekin project data"""

        def downloadLink():
            #do csv download link for data displayed
            print '<table id="mytable" valign=top>'
            cfname = tempfile.mktemp('.csv', dir=csvpath)
            E.exportCSV(filename=cfname)
            p = os.path.split(path)[0]
            print '<td><a href=%s title="right-click to save as"> download data </a>' \
                      %os.path.join(p, 'csv', os.path.basename(cfname))
            print '</td></tr>'
            print '</table>'

        csvpath = os.path.join( os.path.split(imgpath)[0], 'csv')

	print project
        if ekindata != None: #convert from ekindata
            E = EkinProject(data=ekindata, mode='NMR titration')
        elif project != None: #just passed object
            E = project
        elif filename != None: #load project from file
            E = EkinProject()
            E.openProject(project)
        else:
            return
	E.checkDatasets()
        #if outfile is given, we override imgpath
        if outfile != None and imgpath==None:
            imgpath = os.path.dirname(outfile)
        if imgpath != None:
            tempfile.tempdir = imgpath

        size=(8,6)
        if datasets == 'ALL':
            #we plot all the datasets
            datasets = E.datasets
        if plotoption == 1:
            if columns>2:
                size=(4,3)
            imagenames={}
            for d in datasets:
                imgfile = self.maketempImage()
                name = os.path.basename(imgfile)                
                E.plotDatasets(d, filename=imgfile, size=size, linecolor='r',
                                    normalise=normalise,showfitvars=showfitvars,legend=legend,
                                    logx=logx, logy=logy)
                imagenames[d] = name

        elif plotoption == 3:
            name = self.maketempImage()
            E.plotDatasets(datasets, filename=name, plotoption=3, size=size,
                                normalise=normalise, legend=legend,
                                logx=logx, logy=logy)

        if outfile != None:
            saveout = sys.stdout
            fsock = open(outfile, 'w')
            sys.stdout = fsock

        self.doheader(title)
        downloadLink()

        print '<table id="mytable" align=center cellspacing="0" borderwidth=1>'
        row=1;c=1
        datasets.sort()
        if plotoption == 1:
            for d in datasets:
                if not imagenames.has_key(d):
                    continue
                if c==1:
                    print '<tr>'
                print '<td> <img src=%s/%s  align=center></td>' % (path, imagenames[d])
                print '<td class="alt">'
                #use ekinproject to supply formatted fit and model info here..
                self.showMetaData(ekinproj=E, dataset=d)
                print '</td>'
                c=c+1
                if c >= columns:
                    print '</tr>'
                    row=row+1
                    c=1
        elif plotoption == 3:
            print '<td> <img src=%s/%s  align=center></td>' % (path, os.path.basename(name))
            print '<td class="alt">'
            #use ekinproject to supply formatted fit and model info here..
	    x=1
            for d in datasets:
		if x>2:
		   n=True
		   x=0
	    	else:
		   n=False
		if n==False:
		   print '<td>'
            	self.showMetaData(ekinproj=E, dataset=d)
		x+=1
            print '</td>'
            c=c+1
            if c >= columns:
                print '</tr>'
                row=row+1
                c=1


        print '</table>'
        if outfile != None:
            sys.stdout.flush()
            sys.stdout = saveout
            fsock.close()
        return
Exemple #24
0
def importOldProj(datadir,
                  local=None,
                  server=None,
                  project=None,
                  username=None):
    """Import old peat projects"""
    import PEAT_DB.Database as peatDB
    from PEAT_DB.PEAT_dict import PEAT_dict, sub_dict
    import copy
    if local != None:
        newDB = PDatabase(local=local)
    elif server != None:
        newDB = PDatabase(server=server,
                          username=username,
                          port=8080,
                          password='******',
                          project=project)

    print newDB
    PT = peatDB.Database(datadir, Tk=False)
    oldDB = PT.DB
    print 'got old peat_db with %s proteins' % len(PT.proteins)

    print PT.DB.keys()
    #import meta stuff like userfields, table
    for p in newDB.meta.special:
        if not p in PT.DB.keys():
            continue
        print 'adding', p
        for k in PT.DB[p]:
            newDB.meta[p][k] = copy.deepcopy(PT.DB[p][k])
    newDB.meta._p_changed = 1

    for p in PT.proteins:
        if p in newDB.meta.special:
            continue

        name = oldDB[p]['Name']
        rec = PEATRecord(name=name)
        for col in oldDB[p].keys():
            cdata = oldDB[p][col]
            recdata = {}
            if col == 'name':
                cdata = oldDB[p]['Name']

            if oldDB['userfields'].has_key(col) and oldDB['userfields'][col][
                    'field_type'] in ekintypes:
                E = EkinProject(data=cdata)
                E.length = len(E.datasets)
                if len(E.datasets) == 0:
                    continue
                cdata = E

            if type(cdata) == sub_dict:
                for k in cdata.keys():
                    recdata[k] = copy.deepcopy(cdata[k])
            else:
                recdata = cdata
            if cdata != '' and cdata != None:
                rec.addField(col, data=recdata)
        newDB.add(p, rec)
    print newDB.meta.userfields
    #remove any file cols, too hard to import
    for m in newDB.meta.userfields.keys()[:]:
        if newDB.meta.userfields[m]['field_type'] == 'File':
            newDB.deleteField(m)
    newDB.commit(user='******', note='import')
    newDB.close()
    print 'import done'

    return
Exemple #25
0
def importOldProj(datadir,local=None, server=None,
                    project=None, username=None):
    """Import old peat projects"""
    import PEAT_DB.Database as peatDB
    from PEAT_DB.PEAT_dict import PEAT_dict, sub_dict    
    import copy
    if local != None:
        newDB = PDatabase(local=local)
    elif server != None:
        newDB = PDatabase(server=server, username=username, port=8080,
                          password='******', project=project)

    print newDB
    PT = peatDB.Database(datadir, Tk=False)
    oldDB = PT.DB
    print 'got old peat_db with %s proteins' %len(PT.proteins)

    print PT.DB.keys()
    #import meta stuff like userfields, table
    for p in newDB.meta.special:
        if not p in PT.DB.keys():
            continue
        print 'adding',p
        for k in PT.DB[p]:
            newDB.meta[p][k] = copy.deepcopy(PT.DB[p][k])
    newDB.meta._p_changed = 1

    for p in PT.proteins:
        if p in newDB.meta.special:
            continue

        name = oldDB[p]['Name']         
        rec = PEATRecord(name=name)
        for col in oldDB[p].keys():
            cdata = oldDB[p][col]
            recdata = {}
            if col == 'name':
                cdata = oldDB[p]['Name']
  
            if oldDB['userfields'].has_key(col) and oldDB['userfields'][col]['field_type'] in ekintypes:
                E=EkinProject(data=cdata)
                E.length = len(E.datasets)
                if len(E.datasets)==0:
                    continue
                cdata = E

            if type(cdata) == sub_dict:
                for k in cdata.keys():
                    recdata[k] = copy.deepcopy(cdata[k])
            else:
                recdata = cdata
            if cdata != '' and cdata != None:
                rec.addField(col, data=recdata)
        newDB.add(p,rec)
    print newDB.meta.userfields
    #remove any file cols, too hard to import
    for m in newDB.meta.userfields.keys()[:]:
        if newDB.meta.userfields[m]['field_type'] == 'File':
            newDB.deleteField(m)
    newDB.commit(user='******', note='import')
    newDB.close()
    print 'import done'

    return
Exemple #26
0
def main():
    """Run some analysis"""
    from optparse import OptionParser
    parser = OptionParser()
    app = NMRTitration()
    DB=None; E=None
    parser.add_option("-f", "--file", dest="file",
                        help="Open a local db")
    parser.add_option("-e", "--ekinprj", dest="ekinprj",
                        help="Open an ekin project")
    parser.add_option("-s", "--server", dest="server", help="field")
    parser.add_option("-t", "--analysis", dest="analysis", action='store_true',
                       help="titr db analysis", default=False)
    parser.add_option("-r", "--refit", dest="refit", action='store_true',
                       help="refit specific ekin data", default=False)
    parser.add_option("-u", "--getexperrs", dest="getexperrs", action='store_true',
                       help="get exp uncertainties", default=False)
    parser.add_option("-m", "--addmeta", dest="addmeta", action='store_true',
                       help="add meta data for NMR", default=False)
    parser.add_option("-p", "--protein", dest="protein", help="protein")
    parser.add_option("-c", "--col", dest="col", help="field")
    parser.add_option("-a", "--atom", dest="atom", help="atom")
    parser.add_option("-x", "--export", dest="export", action='store_true',
                       help="export db", default=False)
    parser.add_option("-b", "--benchmark", dest="benchmark", action='store_true',
                       help="benchmark some stuff", default=False)
    parser.add_option("-g", "--gui", dest="gui", action='store_true',
                       help="start gui app", default=False)

    opts, remainder = parser.parse_args()
    if opts.file != None and os.path.exists(opts.file):
        app.loadDB(opts.file)
    elif opts.server != None:
        DB = PDatabase(server='localhost', username='******',
                       password='******', project='titration_db',
                       port=8080)

    if opts.gui == True:
        app.main()
        app.mainwin.mainloop()
        return

    yuncerts = {'H':0.03,'N':0.1,'C':0.2}
    try:
        yuncert=yuncerts[opts.atom]
    except:
        yuncert=None

    if opts.ekinprj != None:
        E = EkinProject()
        E.openProject(opts.ekinprj)

    #some tit db analysis
    if opts.analysis == True and opts.server != None:
        complete = ['HEWL', 'Bovine Beta-Lactoglobulin',
                    'Plastocyanin (Anabaena variabilis)',
                    'Plastocyanin (Phormidium)',
                    'Glutaredoxin',
                    'Protein G B1','Xylanase (Bacillus subtilus)']
        if opts.col == None:
            print 'provide a column'
        else:
            app.analyseTitDB(DB, opts.col)#, complete)
            #app.addpKaTables(DB, complete)

    elif opts.benchmark == True:
        app.benchmarkExpErr(DB)
    elif opts.col != None or E != None:
        app.titDBUtils(DB, opts.col, opts.protein, a=opts.atom, E=E,
                        refit=opts.refit, addmeta=opts.addmeta,
                        getexperrs=opts.getexperrs, yuncert=yuncert)
    elif opts.export == True:
        app.exportAll(DB, col=opts.col)
Exemple #27
0
    def run(self, callback=None):
        """Do initial import/fitting run with the current config"""

        self.stop = False
        self.loadModels()
        self.prepareData()
        print 'processing files in queue..'

        self.parseLabels()
        imported = {}  #raw data
        results = {}  #fitted data
        #print self.queue

        for key in self.queue:
            filename = self.queue[key]
            lines = self.openRaw(filename)
            if lines == None:
                continue
            data = self.doImport(lines)
            imported[key] = data

        #rebuild dict into a nested structure if it's flat (i.e. from single files)
        '''from Data import NestedData
        D = NestedData(imported)
        D.buildNestedStructure([0,2])
        D.show()
        imported = D.data
        self.namelabels = None'''

        #try to average replicates here before we process
        if self.replicates == 1:
            if self.namelabels != None:
                imported = Utilities.addReplicates(imported, self.namelabels)
            else:
                print 'no replicates detected from labels'

        #re-arrange the imported dict if we want to group our output per field
        if self.groupbyfields == 1:
            imported = Utilities.arrangeDictbySecondaryKey(
                imported, self.namelabels)

        total = len(imported)
        #print imported
        #print self.namelabels

        c = 0.0
        for key in imported:
            if self.stop == True:
                print 'cancelled'
                return
            #set filename
            fname = os.path.basename(key)
            fname = os.path.join(self.workingdir, fname)
            data = imported[key]

            if self.function1 != '':
                data = self.doProcessingStep(data, fname)

            if self.namelabels == None or not self.namelabels.has_key(key):
                namelabel = key
            else:
                namelabel = self.namelabels[key]
            #print namelabel, key
            #print data

            #if we have models to fit this means we might need to propagate fit data
            if self.model1 != '':
                Em = EkinProject()
                #grouping by file labels handled here
                if self.groupbyname == 1:
                    #we don't pass the last model if it has to be
                    #reserved for a final round of fitting from the files dict
                    models = self.models[:-1]
                    variables = self.variables[:-1]
                    E, fits = self.processFits(rawdata=data,
                                               Em=Em,
                                               models=models,
                                               variables=variables)
                else:
                    E, fits = self.processFits(rawdata=data, Em=Em)
                results[namelabel] = fits
                #print E.datasets, namelabel
            else:
                #if no fitting we just put the data in ekin
                Em = Utilities.getEkinProject(data)
                results[namelabel] = data

            Em.saveProject(fname)
            Em.exportDatasets(fname, append=True)
            if self.model1 != '':
                self.saveFitstoCSV(Em, fname)
            if self.saveplots == 1:
                self.saveEkinPlotstoImages(Em, fname)
            c += 1.0
            if callback != None:
                callback(c / total * 100)

        #if grouped by file names then we process that here from results
        if self.groupbyname == 1:
            results = Utilities.extractSecondaryKeysFromDict(results)
            Em = EkinProject()
            #print results
            E, fits = self.processFits(rawdata=results, Em=Em)
            fname = os.path.join(self.workingdir, 'final')
            Em.saveProject(os.path.join(self.workingdir, fname))
            Em.exportDatasets(os.path.join(self.workingdir, fname))
            if self.model1 != '':
                self.saveFitstoCSV(Em, fname)
            self.saveEkinPlotstoImages(Em, fname)
        print 'processing done'
        print 'results saved to %s' % self.workingdir
        self.results = results
        return results
Exemple #28
0
class VantHoff(Plugin):
    """A plugin to do Van't Hoff Analysis of temperature melting curves"""
    """Author: Damien Farrell"""

    capabilities = ['gui', 'uses_sidepane']
    requires = ['pylab', 'numpy']
    menuentry = "Van't Hoff Analysis"

    gui_methods = {
        'getCSV': 'Import CSV',
        'loadEkin': 'Load Ekin Proj',
        'saveEkin': 'Save Ekin Proj',
        'doAnalysis': "Do Analysis",
        #'benchmark': 'Do Benchmark',
        'close': 'Close'
    }
    about = "A plugin to do Van't Hoff Analysis of temperature melting curves"
    R = 8.3144

    def __init__(self):
        self.path = os.path.expanduser("~")
        self.pltConfig()
        self.E = None
        return

    def main(self, parent):
        if parent == None:
            return
        self.parent = parent
        self.DB = parent.DB
        self.xydata = None
        self._doFrame()
        return

    def _doFrame(self):
        if 'uses_sidepane' in self.capabilities:
            self.mainwin = self.parent.createChildFrame(width=600)
        else:
            self.mainwin = Toplevel()
            self.mainwin.title(self.menuentry)
            self.mainwin.geometry('800x600+200+100')

        methods = self._getmethods()
        fr = Frame(self.mainwin)
        fr.pack(side=LEFT, fill=BOTH)
        methods = [m for m in methods if m[0] in self.gui_methods.keys()]
        self._createButtons(methods, fr)
        self.showDatasetSelector()
        self.doall = Pmw.RadioSelect(fr,
                                     buttontype='checkbutton',
                                     orient='horizontal',
                                     labelpos='w')
        self.doall.add('Process All')
        self.doall.pack()
        self.conversions = Pmw.RadioSelect(fr,
                                           buttontype='checkbutton',
                                           orient='horizontal',
                                           labelpos='w')
        self.conversions.add('Convert Celsius-Kelvin')
        self.conversions.pack()
        self.methods = Pmw.RadioSelect(fr,
                                       buttontype='checkbutton',
                                       orient='vertical',
                                       labelpos='w',
                                       label_text='Methods:')
        for m in ['method 1', 'method 2', 'method 3', 'method 4']:
            self.methods.add(m)
        self.methods.invoke('method 1')
        self.methods.pack()
        self.sm = Pmw.EntryField(fr,
                                 labelpos='w',
                                 value=5,
                                 label_text='Smoothing:')
        self.sm.pack()
        self.tw = Pmw.EntryField(fr,
                                 labelpos='w',
                                 value=60,
                                 label_text='Width of transition:')
        self.tw.pack()
        return

    def _getmethods(self):
        """Get a list of all available public methods"""
        import inspect
        mems = inspect.getmembers(self, inspect.ismethod)
        methods = [m for m in mems if not m[0].startswith('_')]
        return methods

    def _createButtons(self, methods, fr=None):
        """Dynamically create buttons for supplied methods, which is a tuple
            of (method name, label)"""
        for m in methods:
            b = Button(fr, text=self.gui_methods[m[0]], command=m[1])
            b.pack(side=TOP, fill=BOTH)
        return

    def close(self):
        self.mainwin.destroy()
        self.plotframe = None
        return

    def showDatasetSelector(self):
        if self.E == None:
            return
        if hasattr(self, 'dmenu'):
            self.dmenu.destroy()
        self.dmenu = Pmw.OptionMenu(self.mainwin,
                                    labelpos='w',
                                    label_text='Dataset:',
                                    items=sorted(self.E.datasets),
                                    command=self.showPreview,
                                    menubutton_width=8)
        self.dmenu.pack(side=TOP, fill=BOTH)
        return

    def showPreview(self, event=None):
        if self.E == None:
            return
        if not hasattr(self, 'plotframe') or self.plotframe == None:
            from Ekin.Plotting import PlotPanel
            self.plotframe = PlotPanel(parent=self.mainwin, side=BOTTOM)
        self.plotframe.setProject(self.E)
        d = self.dmenu.getcurselection()
        self.plotframe.plotCurrent(d)
        #plt.close(1)
        return

    def getCSV(self):
        """Import a csv file"""
        self.E = EkinProject()
        from PEATDB.Ekin.IO import Importer
        importer = Importer(self, parent_win=self.mainwin)
        newdata = importer.import_multiple()
        if newdata == None: return
        for n in newdata.keys():
            self.E.insertDataset(newdata[n], n, update=None)
        print 'imported %s datasets' % len(self.E.datasets)
        self.showDatasetSelector()
        self.showPreview()
        return

    def loadEkin(self):
        """Load the ekin prj"""

        filename = tkFileDialog.askopenfilename(defaultextension='.ekinprj',
                                                initialdir=os.getcwd(),
                                                filetypes=[
                                                    ("ekinprj", "*.ekinprj"),
                                                    ("All files", "*.*")
                                                ],
                                                parent=self.mainwin)
        if not os.path.isfile(filename):
            return
        self.E = EkinProject()
        self.E.openProject(filename)
        self.showDatasetSelector()
        self.showPreview()
        return

    def saveEkin(self):
        """save proj"""
        if self.E != None:
            if self.E.filename == None:
                self.E.filename = tkFileDialog.asksaveasfilename(
                    defaultextension='.ekinprj',
                    initialdir=os.getcwd(),
                    filetypes=[("ekinprj", "*.ekinprj"), ("All files", "*.*")],
                    parent=self.mainwin)

            self.E.saveProject()
            print 'saved ekin proj'
        return

    def doAnalysis(self):
        """Execute from GUI"""
        if self.E == None:
            return
        methods = self.methods.getcurselection()
        if 'Process All' in self.doall.getcurselection():
            self.doAll(methods=methods)
        else:
            if 'method 1' in methods:
                self.fitVantHoff(E=self.E,
                                 d=self.dmenu.getcurselection(),
                                 transwidth=int(self.tw.getvalue()))
            if 'method 2' in methods:
                self.fitElwellSchellman(E=self.E,
                                        d=self.dmenu.getcurselection(),
                                        transwidth=int(self.tw.getvalue()))
            if 'method 3' in methods:
                self.fitDifferentialCurve(E=self.E,
                                          d=self.dmenu.getcurselection(),
                                          smooth=int(self.sm.getvalue()))
            if 'method 4' in methods:
                self.breslauerMethod(
                    E=self.E,
                    d=self.dmenu.getcurselection())  #,invert=opts.invert)
        return

    def guessMidpoint(self, x, y):
        """guess midpoint for unfolding model"""
        midy = min(y) + (max(y) - min(y)) / 2.0
        midx = 0
        closest = 1e4
        for i in range(len(x)):
            c = abs(y[i] - midy)
            if c < closest:
                midx = x[i]
                closest = c
        return midx

    def transformCD(self, x, y, transwidth=None, ax=None):
        """Transform raw data into fraction unfolded per temp value, by fitting to
            a general unfolding equation that extracts baseline/slopes"""
        #fit baseline slopes and get intercepts
        d50 = self.guessMidpoint(x, y)
        print 'fitting to get baseline slopes and intercepts..'
        print 'midpoint is %s' % d50
        A, X = Fitting.doFit(expdata=zip(x, y),
                             model='Unfolding',
                             noiter=50,
                             silent=True,
                             guess=False,
                             startvalues=[1, 1, 1, 1, 1, d50])
        #print X.getResult()
        fity = X.getFitLine(x)
        fd = X.getFitDict()
        if ax != None:
            p = ax.plot(x, fity, 'r', lw=2)
            self.drawParams(ax, fd)
        #we then use slopes and intercepts get frac unfolded at each temp
        mn = fd['bn']
        mu = fd['bd']  #slopes
        #if mu>0.01: mu = 0.01
        yn = fd['an']
        yu = fd['ad']  #intercepts
        d50 = fd['d50']
        m = fd['m']

        t = []
        f = []
        #print mu, mn
        for T, yo in zip(x, y):
            fu = (yo - (yn + mn * T)) / ((yu + mu * T) - (yn + mn * T))
            #print fu, (yo-(yn+mn*T)), (m), mu, mn
            #if f>0:
            f.append(fu)
            t.append(T)

        #try to take useful transition region of data
        at, af = t, f
        diff = 1e5
        if transwidth != None:
            for i in t:
                d = abs(i - d50)
                if d < diff:
                    mid = t.index(i)
                    diff = d
            L = int(mid - transwidth)
            U = int(mid + transwidth)
            t, f = t[L:U], f[L:U]

        return at, af, t, f

    def fitVantHoff(self,
                    E=None,
                    d=None,
                    xy=None,
                    transwidth=80,
                    invert=False,
                    show=True,
                    figname=None):
        """Derive fraction unfolded, get K and fit to Van't Hoff.
           see http://www.jbc.org/content/277/43/40717.full
           or http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2144003/
        """
        if E != None:
            if not d in E.datasets:
                print 'no such dataset, %s' % d
                print 'available datasets:', E.datasets
                return
            ek = E.getDataset(d)
            x, y = ek.getxySorted()
        elif xy != None:
            x, y = xy

        if 'Convert Celsius-Kelvin' in self.conversions.getcurselection():
            x = [i + 273 for i in x]
        if invert == True:
            y = [max(y) - i for i in y[:]]

        f = plt.figure(figsize=(18, 6))
        ax = f.add_subplot(131)
        p = ax.plot(x, y, 'o', alpha=0.6)
        ax.set_xlabel('T(K)')
        ax.set_ylabel('mdeg')
        ax.set_title('raw data')

        x1, y1, x, y = self.transformCD(x, y, transwidth, ax)
        cw = csv.writer(open('frac_unfolded_' + d + '.csv', 'w'))
        cw.writerow(['temp', 'frac'])
        for i in zip(x1, y1):
            cw.writerow(i)

        #derive lnK vs 1/T
        t = []
        k = []

        for T, fu in zip(x, y):
            if fu >= 1 or fu <= 0:
                continue
            K = fu / (1 - fu)
            klog = math.log(K)
            k.append(klog)
            t.append(1 / T)

        if len(t) < 2: return None, None, None

        ax = f.add_subplot(132)
        p = ax.plot(x1, y1, 'o', color='g', alpha=0.6)
        ax.set_xlabel('T(K)')
        ax.set_ylabel('fu')
        ax.set_title('fraction unfolded')

        ax = f.add_subplot(133)
        p = ax.plot(t, k, 'x', mew=2, color='black')
        ax.set_xlabel('1/T')  #(r'$1/T ($K^-1)$')
        ax.set_ylabel('ln K')

        formatter = matplotlib.ticker.ScalarFormatter()
        formatter.set_scientific(True)
        formatter.set_powerlimits((0, 0))
        ax.xaxis.set_major_formatter(formatter)
        for l in ax.get_xticklabels():
            l.set_rotation(30)

        #fit this van't hoff plot
        A, X = Fitting.doFit(expdata=zip(t, k), model='Linear')
        fitk = X.getFitLine(t)
        p = ax.plot(t, fitk, 'r', lw=2)
        fd = X.getFitDict()
        #self.drawParams(ax,fd)

        #slope is deltaH/R/1000 in kJ
        deltaH = -fd['a'] * self.R / 1000
        deltaS = fd['b'] * self.R / 1000
        f.suptitle("Method 1 - deltaH: %2.2f deltaS: %2.2f" % (deltaH, deltaS),
                   size=18)
        f.subplots_adjust(bottom=0.15, top=0.85)

        if show == True:
            self.showTkFigure(f)

        if figname == None: figname = d
        figname = figname.replace('.', '_')
        fname = figname + 'm1' + '.png'
        f.savefig(fname, dpi=300)
        print 'plot saved to %s' % os.path.abspath(fname)
        #plt.close()
        if E != None:
            fdata = Fitting.makeFitData(X.name, vrs=X.variables)
            E.insertDataset(xydata=[t, k],
                            newname=d + '_vanthoff',
                            replace=True,
                            fit=fdata)
            #E.saveProject()
        return deltaH, deltaS, ax

    def fitElwellSchellman(self,
                           E=None,
                           d=None,
                           xy=None,
                           transwidth=50,
                           invert=False,
                           show=True,
                           figname=None):
        """Fit entire raw data simultaneously to the three main thermodynamic
           parameters using Elwell/Schellman method"""
        if E != None:
            ek = E.getDataset(d)
            x, y, a, xerr, yerr = ek.getAll()
        elif xy != None:
            x, y = xy
        else:
            return
        if invert == True:
            y = [max(y) - i for i in y[:]]
        f = plt.figure(figsize=(10, 5))
        ax = f.add_subplot(121)
        p = ax.plot(x, y, 'o', alpha=0.5)
        ax.set_xlabel('T')
        ax.set_xlabel('mdeg')
        ax.set_title('raw data')

        x1, y1, x, y = self.transformCD(x, y, transwidth, ax)

        t = []
        dg = []
        R = 8.3144e-3
        for T, fu in zip(x, y):
            if fu >= 1 or fu <= 0:
                continue
            K = fu / (1 - fu)
            deltaGt = -R * T * math.log(K)
            dg.append(deltaGt)
            t.append(T)

        ax1 = f.add_subplot(122)
        p = ax1.plot(t, dg, 'x', mew=2, color='black')
        ax1.set_xlabel('T')
        ax1.set_ylabel('dG(T)')
        ax.set_title('stability curve')

        A, X = Fitting.doFit(expdata=zip(t, dg),
                             model='schellman',
                             grad=1e-9,
                             conv=1e-9)
        fity = X.getFitLine(t)
        p = ax1.plot(t, fity, 'r', lw=2)
        fd = X.getFitDict()
        self.drawParams(ax1, fd)
        deltaH = fd['deltaH']
        deltacp = fd['deltacp']
        Tm = fd['Tm']
        f.suptitle("Method 2 - deltaH: %2.2f deltaCp: %2.2e Tm: %2.2f" %
                   (deltaH, deltacp, Tm),
                   size=18)
        if show == True:
            self.showTkFigure(f)

        if figname == None: figname = d
        figname = figname.replace('.', '_')
        fname = figname + 'm1' + '.png'
        f.savefig(fname, dpi=300)
        print 'plot saved to %s' % os.path.abspath(fname)
        if E != None:
            fdata = Fitting.makeFitData(X.name, vrs=X.variables)
            E.insertDataset(xydata=[t, dg],
                            newname=d + '_vanthoff2',
                            replace=True,
                            fit=fdata)
            #E.saveProject()
        return deltaH, Tm, deltacp

    def breslauerMethod(self,
                        E=None,
                        d=None,
                        xy=None,
                        invert=False,
                        show=True,
                        figname=None):
        """Finds slope of trans region and plugs this in to equation
        http://www.springerlink.com/content/r34n0201g30563u7/  """
        if E != None:
            ek = E.getDataset(d)
            x, y, a, xerr, yerr = ek.getAll()
        elif xy != None:
            x, y = xy
        else:
            return
        f = plt.figure(figsize=(10, 6))
        ax = f.add_subplot(111)
        ax.set_xlabel('T')
        p = ax.plot(x, y, 'o', alpha=0.5)
        d50 = self.guessMidpoint(x, y)
        A, X = Fitting.doFit(expdata=zip(x, y),
                             model='Unfolding',
                             conv=1e-7,
                             noiter=60,
                             guess=False,
                             startvalues=[1, 1, 1, 1, 1, d50])
        fity = X.getFitLine(x)
        p = ax.plot(x, fity, 'r', lw=2)
        fd = X.getFitDict()
        self.drawParams(ax, fd)
        Tm = fd['d50']
        m = fd['m']
        R = 8.3144e-3
        deltaH = R * math.pow(Tm, 2) * m
        f.suptitle("Method 4 - deltaH: %2.2f Tm: %2.2f" % (deltaH, Tm),
                   size=18)
        if show == True:
            self.showTkFigure(f)
        if figname != None:
            figname = figname.replace('.', '_')
            f.savefig(figname)
            plt.close()
        return deltaH, Tm

    def fitDifferentialCurve(self,
                             E=None,
                             d=None,
                             xy=None,
                             smooth=0,
                             invert=False,
                             show=True,
                             figname=None):
        """Derive differential denaturation curve and fit to get deltaH
           We smooth the unfolding curve and then differentiate and finally
           fit to a 3 parameter equation.
           See http://www.ncbi.nlm.nih.gov/pubmed/10933511"""

        if E != None:
            ek = E.getDataset(d)
            x, y, a, xerr, yerr = ek.getAll()
        elif xy != None:
            x, y = xy
        else:
            return
        if invert == True:
            y = [max(y) - i for i in y[:]]

        leg = []
        lines = []
        f = plt.figure(figsize=(10, 5))
        ax = f.add_subplot(121)
        p = ax.plot(x, y, 'x', color='black', mew=3, alpha=0.5)
        leg.append(p)
        lines.append('original')
        #smooth
        if smooth == 0:
            smooth = int(len(x) / 15.0)
        s = self.smoothListGaussian(y, smooth)
        p = ax.plot(x[:len(s) - 1], s[:-1], lw=3)
        leg.append(p)
        lines.append('smoothed')
        ax.set_title("original data")
        ax.set_xlabel('T')
        ax1 = f.add_subplot(122)
        #differentiate
        dx, ds = self.differentiate(x[:len(s)], s)
        #ds = [i/max(ds) for i in ds]
        ds = [i * 10 for i in ds]
        cw = csv.writer(open('diffcd.csv', 'w'))
        for row in zip(dx, ds):
            cw.writerow(row)
        p = ax1.plot(dx, ds, '-', lw=1.5, alpha=0.7, color='black')
        leg.append(p)
        lines.append('differential')
        ax1.set_title("differential denaturation")
        ax1.set_xlabel('T')
        ax1.set_ylabel('dsignal/dT')

        A, X = Fitting.doFit(expdata=zip(dx, ds),
                             model='diffDenaturation',
                             grad=1e-9,
                             conv=1e-10)
        fity = X.getFitLine(dx)
        p = ax1.plot(dx, fity, 'r', lw=2)
        leg.append(p)
        lines.append('fit')
        t = X.getFitDict()
        self.drawParams(ax1, t)
        dHkcal = t['deltaH'] / 4.184
        f.suptitle('Method 3 - deltaH: %2.2f kJ/mol (%2.2f kcal) Tm: %2.2f' %
                   (t['deltaH'], dHkcal, t['Tm']),
                   size=18)
        ax.legend(leg, lines, loc='best', prop=FontProperties(size="smaller"))
        #f.subplots_adjust(hspace=0.8)
        if show == True:
            self.showTkFigure(f)
        if figname != None:
            figname = figname.replace('.', '_')
            f.savefig(figname + 'm3', dpi=300)
            plt.close()
        if E != None:
            fdata = Fitting.makeFitData(X.name, vrs=X.variables)
            E.insertDataset(xydata=[dx, ds],
                            newname=d + '_diff',
                            replace=True,
                            fit=fdata)
            #E.saveProject()
        return t['deltaH'], t['Tm']

    def differentiate(self, x, y):
        dy = numpy.diff(y, 1)
        dx = x[:len(dy)]
        return dx, dy

    def smoothListGaussian(self, data, degree=5):
        """Gaussian data smoothing function"""
        #buffer data to avoid offset result
        data = list(data)
        data = [data[0]] * (degree - 1) + data + [data[-1]] * degree
        window = degree * 2 - 1
        weight = numpy.array([1.0] * window)
        weightGauss = []
        for i in range(window):
            i = i - degree + 1
            frac = i / float(window)
            gauss = 1 / (numpy.exp((4 * (frac))**2))
            weightGauss.append(gauss)
        weight = numpy.array(weightGauss) * weight
        smoothed = [0.0] * (len(data) - window)
        for i in range(len(smoothed)):
            smoothed[i] = sum(
                numpy.array(data[i:i + window]) * weight) / sum(weight)
        return smoothed

    def invert(self, data):
        inv = [i for i in data]
        return inv

    def simulateCD(self, noise=1.0):
        """Simulate some CD spec data"""
        x = list(numpy.arange(290, 380, 0.2))
        y = []
        X = Fitting.getFitter(model='Unfolding',
                              vrs=[-16, 0.01, -11.6, 0.01, 2.7, 324])
        fity = X.getFitLine(x)
        for i in fity:
            noise = numpy.random.normal(i, 1.0 / 2)
            y.append(i + noise)
        cw = csv.writer(open('cd.csv', 'w'))
        for row in zip(x, y):
            cw.writerow(row)
        return x, y

    def drawParams(self, ax, d):
        ymin, ymax = ax.get_ylim()
        xmin, xmax = ax.get_xlim()
        inc = (ymax - ymin) / 20
        xinc = (xmax - xmin) / 20
        y = ymax - inc
        for k in d:
            s = k + '=' + str(round(d[k], 3))
            ax.text(xmin + xinc, y, s, fontsize=10)
            y -= inc
        return

    def pltConfig(self):
        #plt.rc('text', usetex=True)
        plt.rc('figure.subplot', hspace=0.3, wspace=0.3)
        #plt.rc('axes',titlesize=22)
        plt.rc('font', family='monospace')
        return

    def doAll(self, methods=['method 1']):
        """Process all datasets in ekinprj"""
        E = self.E
        vals = {}
        from Dialogs import PEATDialog
        pb = PEATDialog(self.mainwin,
                        option='progressbar',
                        message='Analysing Data..')
        pb.update_progress(0)
        total = len(E.datasets)
        count = 0
        for d in E.datasets:
            if '_diff' in d or '_vanthoff' in d:
                continue
            vals[d] = {}
            name = d
            if 'method 1' in methods:
                vals[d]['dH1'], vals[d]['dS1'], ax = self.fitVantHoff(
                    E,
                    d,
                    transwidth=int(self.tw.getvalue()),
                    show=False,
                    figname=name)
            if 'method 2' in methods:
                vals[d]['dH2'], vals[d]['dTm2'], vals[d][
                    'dCp2'] = self.fitElwellSchellman(E,
                                                      d,
                                                      show=False,
                                                      figname=name)
            if 'method 3' in methods:
                vals[d]['dH3'], vals[d]['dTm3'] = self.fitDifferentialCurve(
                    E, d, show=False, figname=name)
            count += 1
            pb.update_progress(float(count) / total * 100.0)
        pb.close()
        self.showTable(vals)
        return

    def showTable(self, data):
        """Show results in table"""
        from PEATDB.DictEdit import DictEditor
        D = DictEditor(self.mainwin)
        D.loadTable(data)
        return

    def benchmark(self, E=None, d=None, method=1):
        """Test methods with varying paramaters, smoothing etc"""
        if E == None and self.E != None:
            E = self.E
            d = self.dmenu.getcurselection()

        path = 'vh_benchmark'
        if not os.path.exists(path):
            os.mkdir(path)
        dHvals = []

        if method == 1:
            xlabel = 'width (K)'
            title = 'method 1: deltaH variation with trans region width fit'
            vals = range(5, 140, 5)
            for w in vals:
                dH, dS, ax = self.fitVantHoff(E,
                                              d,
                                              transwidth=w,
                                              show=False,
                                              figname=os.path.join(
                                                  path, '%s_%s.png' % (d, w)))
                if dH == None: dH = 0
                dHvals.append(dH)
            #take best values from middle
            #dHvals= dHvals[5:16]
        elif method == 2:
            xlabel = 'width (K)'
            title = 'method 2: deltaH variation with width fit'
            vals = range(5, 140, 5)
            for w in vals:
                dH, dcp, dTm = self.fitElwellSchellman(
                    E,
                    d,
                    transwidth=w,
                    show=False,
                    figname=os.path.join(path, '%s_%s.png' % (d, w)))
                dHvals.append(dH)
        elif method == 3:
            xlabel = 'smoothing degree'
            title = 'method 3: deltaH variation with degree of smoothing'
            vals = range(1, 30, 3)
            for s in vals:
                dH, dTm = self.fitDifferentialCurve(E,
                                                    d,
                                                    smooth=s,
                                                    show=False,
                                                    figname=os.path.join(
                                                        path,
                                                        '%s_%s.png' % (d, s)))
                dHvals.append(dH)
        mean = numpy.mean(dHvals)
        stdev = numpy.std(dHvals)
        f = plt.figure()
        ax = f.add_subplot(111)
        ax.plot(vals, dHvals, lw=2)
        ax.set_xlabel(xlabel)
        ax.set_ylabel('deltaH (kJ)')
        ax.set_title('mean: %2.2f stdev: %2.2f' % (mean, stdev))
        f.suptitle(title)
        f.savefig('benchmark_%s.png' % method)
        cw = csv.writer(open('benchmark_%s.csv' % method, 'w'))
        for row in zip(vals, dHvals):
            cw.writerow(row)
        return

    def benchmarkLimitedData(self, E=None, d=None, method=1):
        """test any method with varying limited data"""
        if E == None and self.E != None:
            E = self.E
            d = self.dmenu.getcurselection()

        path = 'vh_benchmark'
        if not os.path.exists(path):
            os.mkdir(path)
        dHvals = []
        vals = []
        if method == 1:
            L = range(5, 140, 5)
            for w in vals:
                dH, dS, ax = self.fitVantHoff(E,
                                              d,
                                              transwidth=w,
                                              show=False,
                                              figname=os.path.join(
                                                  path, '%s_%s.png' % (d, w)))
        return

    @classmethod
    def plotCorrelation(self,
                        x=None,
                        y=None,
                        xlabel='method1',
                        ylabel='method2'):
        if x == None:
            data = open('compared.csv', 'r')
            cr = csv.reader(data)
            x = [float(r[0]) for r in cr]
            data.seek(0)
            y = [float(r[1]) for r in cr]
        f = plt.figure()
        ax = f.add_subplot(111)

        line = ax.scatter(x, y, marker='o', alpha=0.8)
        cl = numpy.arange(0, max(x) + 50)
        ax.plot(cl, cl, 'g', alpha=0.5, lw=2)
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_xlim(150, 600)
        ax.set_ylim(150, 600)
        ax.set_title('Correlation')
        from scipy.stats import stats
        cc = str(round(pow(stats.pearsonr(x, y)[0], 2), 2))
        ax.text(400, 180, r'$r^2= %s$' % cc, fontsize=16)
        self.showTkFigure(f)
        return

    def showTkFigure(self, fig):
        from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2TkAgg
        fr = Toplevel()
        canvas = FigureCanvasTkAgg(fig, master=fr)
        #self.canvas.show()
        canvas.get_tk_widget().pack(side=TOP, fill=X, expand=1)
        mtoolbar = NavigationToolbar2TkAgg(canvas, fr)
        mtoolbar.update()
        canvas._tkcanvas.pack(side=BOTTOM, fill=BOTH, expand=1)
        return