def kineticsTest(): """Tests kinetics data for paper""" p = Pipeline() colheaderlabels = 'wt 5,wt 3,wt 2,68 5,68 3,68 2,138 5,138 3,138 2,248 5,248 3,248 2' rowheaderlabels = '3.2,1.6,0.8,0.4,0.2,0.1,0.05,0.025' conf = { 'format': 'kineticsdata', 'delimeter': 'tab', 'rowstart': 3, 'colend': 12, 'rowrepeat': 9, 'colheaderlabels': colheaderlabels, 'rowheaderlabels': rowheaderlabels, 'decimalsymbol': ',', 'xformat': '%M:%S', 'groupbyname': 1, 'parsenamesindex': 2, 'parsemethod': 'numeric', 'model1': 'linear', 'model2': 'Michaelis-Menten', 'model3': '1 pKa 2 Chemical shifts', 'variable1': 'a', 'variable2': 'Km', 'variable3': 'pKa', #'xerror':.1,'yerror':0.05, } p = Pipeline() p.createConfig('temp.conf', **conf) path = 'testfiles/kineticsdata/jan/setF/MM/rep1' p.addFolder(path) p.run() return
def multiFolderTest(): """Handling of multiple folders in a hierarchy with replicates""" p = Pipeline() conf = { 'format': 'databycolumn', 'groupbyname': 1, 'parsenamesindex': 0, 'parsemethod': 'numeric', 'replicates': 1, #'saveplots':1, 'model1': 'linear', 'variable1': 'a', 'model2': 'sigmoid', 'variable2': 'tm' } p.createConfig('temp.conf', **conf) path = 'testfiles/multifolders' Utilities.createDirectory(path) phs = range(2, 10) reps = range(1, 4) #replicates names = Utilities.createRandomStrings(3, 6) today = str(datetime.date.today()) for i in phs: #sigmoid dependence of the slopes on 'ph' #so we know we are getting the right results val = 1 / (1 + exp((i - 4) / 1.04)) folder = os.path.join(path, 'ph' + str(i)) Utilities.createDirectory(folder) for r in reps: fname = os.path.join(folder, 'r' + str(r) + '_' + today + '.txt') Utilities.createTempData(fname, names, val) p.addFolder(path) p.run() return
def kineticsTest(): """Tests kinetics data for paper""" p = Pipeline() colheaderlabels = "wt 5,wt 3,wt 2,68 5,68 3,68 2,138 5,138 3,138 2,248 5,248 3,248 2" rowheaderlabels = "3.2,1.6,0.8,0.4,0.2,0.1,0.05,0.025" conf = { "format": "kineticsdata", "delimeter": "tab", "rowstart": 3, "colend": 12, "rowrepeat": 9, "colheaderlabels": colheaderlabels, "rowheaderlabels": rowheaderlabels, "decimalsymbol": ",", "xformat": "%M:%S", "groupbyname": 1, "parsenamesindex": 2, "parsemethod": "numeric", "model1": "linear", "model2": "Michaelis-Menten", "model3": "1 pKa 2 Chemical shifts", "variable1": "a", "variable2": "Km", "variable3": "pKa", #'xerror':.1,'yerror':0.05, } p = Pipeline() p.createConfig("temp.conf", **conf) path = "testfiles/kineticsdata/jan/setF/MM/rep1" p.addFolder(path) p.run() return
def replicatesTest(): """Tests handling of replicates""" p = Pipeline() conf = { 'format': 'databycolumn', 'groupbyname': 1, 'parsenamesindex': 1, 'parsemethod': 'numeric', 'replicates': 1, 'model1': 'linear', 'variable1': 'a', 'model2': 'sigmoid', 'variable2': 'tm' } p.createConfig('temp.conf', **conf) reps = ['rep1', 'rep2', 'rep3'] path = 'testfiles/replicates' Utilities.createDirectory(path) names = Utilities.createRandomStrings(3, 6) for r in reps: rpath = os.path.join(path, r) Utilities.createGroupedData(rpath, names=names) p.addFolder(path) p.run() return
def __init__(self, parent=None, rawfile=None, conffile=None): self.parent=parent if not self.parent: Frame.__init__(self) self.main=self.master else: self.main=Toplevel() self.master=self.main self.main.title('DataPipeline Desktop') ws = self.main.winfo_screenwidth() hs = self.main.winfo_screenheight() w = 800; h=600 x = (ws/2)-(w/2); y = (hs/2)-(h/2) self.main.geometry('%dx%d+%d+%d' % (w,h,x,y)) self.main.protocol('WM_DELETE_WINDOW',self.quit) #pipeline object is used for everything except gui stuff self.p = Pipeline(conffile) self.setupVars() self.setupGUI() #redirect stdout to log win self.log.delete(1.0,END) sys.stdout = self #sys.stderr = self return
def fitPropagationTest(): """Tests the propagation of fit data direct from a dict - no importing""" start = time.time() p = Pipeline() conf = { 'model1': 'linear', 'model2': 'Michaelis-Menten', 'model3': 'sigmoid', 'variable1': 'a', 'variable2': 'Km', 'variable3': 'tm', #'xerror':.1,'yerror':0.05, } p.createConfig('temp.conf', **conf) data = Utilities.createNestedData() Em = EkinProject() E, fits = p.processFits(data, Em=Em) print 'final fits', fits fname = os.path.join(p.workingdir, 'results') Em.saveProject(fname) p.saveEkinPlotstoImages(Em, fname) print 'completed fit propagation test' print 'took %s seconds' % round((time.time() - start), 2) print '-------------------' return
def peakDetectionTest(path=None, noise=0.08): """Use pre-processing funcs to detect peaks""" if path == None: path = "testfiles" names = Utilities.createRandomStrings(8, 6) fname = os.path.join(path, 'spectraldata.txt') peaks = Utilities.createSimulatedSpectralData(fname, names, noise=noise) conf = { 'format': 'databycolumn', 'saveplots': 1, 'marker': '-', 'markers': '-,x', 'alpha': 0.7, 'normalise': 1, 'function1': 'smooth', 'function2': 'baselinecorrection', 'function3': 'detectpeaks' } p = Pipeline() p.createConfig('temp.conf', **conf) p.openRaw(fname) results = p.run() #compare predicted peaks successrates = [] res = results[results.keys()[0]] for name in peaks: #print name, sorted(peaks[name]), results[name][0] orig = set(peaks[name]) pred = set(res[name][0]) s = float(len(orig.intersection(pred))) / (len(orig)) successrates.append(s) return np.mean(successrates)
def replicatesTest(): """Tests handling of replicates""" p = Pipeline() conf = { "format": "databycolumn", "groupbyname": 1, "parsenamesindex": 1, "parsemethod": "numeric", "replicates": 1, "model1": "linear", "variable1": "a", "model2": "sigmoid", "variable2": "tm", } p.createConfig("temp.conf", **conf) reps = ["rep1", "rep2", "rep3"] path = "testfiles/replicates" Utilities.createDirectory(path) names = Utilities.createRandomStrings(3, 6) for r in reps: rpath = os.path.join(path, r) Utilities.createGroupedData(rpath, names=names) p.addFolder(path) p.run() return
def multiFolderTest(): """Handling of multiple folders in a hierarchy with replicates""" p = Pipeline() conf = { "format": "databycolumn", "groupbyname": 1, "parsenamesindex": 0, "parsemethod": "numeric", "replicates": 1, #'saveplots':1, "model1": "linear", "variable1": "a", "model2": "sigmoid", "variable2": "tm", } p.createConfig("temp.conf", **conf) path = "testfiles/multifolders" Utilities.createDirectory(path) phs = range(2, 10) reps = range(1, 4) # replicates names = Utilities.createRandomStrings(3, 6) today = str(datetime.date.today()) for i in phs: # sigmoid dependence of the slopes on 'ph' # so we know we are getting the right results val = 1 / (1 + exp((i - 4) / 1.04)) folder = os.path.join(path, "ph" + str(i)) Utilities.createDirectory(folder) for r in reps: fname = os.path.join(folder, "r" + str(r) + "_" + today + ".txt") Utilities.createTempData(fname, names, val) p.addFolder(path) p.run() return
def doTest(info, name='test', path='testfiles'): print 'running test %s' % name p = Pipeline() conf = info[0] filename = info[1] confpath = os.path.join(p.defaultpath, 'temp.conf') p.createConfig(confpath, **conf) lines = p.openRaw(os.path.join(path, filename)) data = p.doImport(lines) if p.model1 != '': p.run() return
def peakDetectionTest(path=None, noise=0.08): """Use pre-processing funcs to detect peaks""" if path == None: path = "testfiles" names = Utilities.createRandomStrings(8, 6) fname = os.path.join(path, "spectraldata.txt") peaks = Utilities.createSimulatedSpectralData(fname, names, noise=noise) conf = { "format": "databycolumn", "saveplots": 1, "marker": "-", "markers": "-,x", "alpha": 0.7, "normalise": 1, "function1": "smooth", "function2": "baselinecorrection", "function3": "detectpeaks", } p = Pipeline() p.createConfig("temp.conf", **conf) p.openRaw(fname) results = p.run() # compare predicted peaks successrates = [] res = results[results.keys()[0]] for name in peaks: # print name, sorted(peaks[name]), results[name][0] orig = set(peaks[name]) pred = set(res[name][0]) s = float(len(orig.intersection(pred))) / (len(orig)) successrates.append(s) return np.mean(successrates)
def fitPropagationTest(): """Tests the propagation of fit data direct from a dict - no importing""" start = time.time() p = Pipeline() conf = { "model1": "linear", "model2": "Michaelis-Menten", "model3": "sigmoid", "variable1": "a", "variable2": "Km", "variable3": "tm", #'xerror':.1,'yerror':0.05, } p.createConfig("temp.conf", **conf) data = Utilities.createNestedData() Em = EkinProject() E, fits = p.processFits(data, Em=Em) print "final fits", fits fname = os.path.join(p.workingdir, "results") Em.saveProject(fname) p.saveEkinPlotstoImages(Em, fname) print "completed fit propagation test" print "took %s seconds" % round((time.time() - start), 2) print "-------------------" return
def doTest(info, name="test", path="testfiles"): print "running test %s" % name p = Pipeline() conf = info[0] filename = info[1] confpath = os.path.join(p.defaultpath, "temp.conf") p.createConfig(confpath, **conf) lines = p.openRaw(os.path.join(path, filename)) data = p.doImport(lines) if p.model1 != "": p.run() return
def main(): from optparse import OptionParser parser = OptionParser() parser.add_option("-c", "--conf", dest="conf", help="Provide a conf file", metavar="FILE") parser.add_option("-f", "--file", dest="file", help="Raw file", metavar="FILE") parser.add_option("-d", "--dir", dest="directory", help="Folder of raw files") parser.add_option("-p", "--project", dest="project", help="Project file", metavar="FILE") opts, remainder = parser.parse_args() P = Pipeline() if opts.project != None: P = loadProject(opts.project) else: if opts.conf != None: P.parseConfig(opts.conf) if opts.file != None: P.openRaw(opts.file) if opts.directory != None: P.addFolder(opts.directory) P.run()
def groupbyFieldsTest(): """Tests grouping by fields function using NMRdata""" conf = { 'format': 'databycolumn', 'colheaderlabels': '15N,1H', 'parsenamesindex': 0, 'parsemethod': 'numeric', 'delimeter': ' ', 'groupbyfields': 1, 'extension': '.inp' } path = 'testfiles/NMRdata' p = Pipeline() p.createConfig('temp.conf', **conf) p.addFolder(path) p.run() return
def multiFileTest(): """Test handling of single datasets per file with grouping per filename""" path = 'testfiles/singlefiles' Utilities.createSingleFileData(path) conf = { 'format': 'databycolumn', 'groupbyname': 1, 'parsenamesindex': '0,1', 'parsemethod': 'both', 'model1': 'linear', 'variable1': 'a', 'model2': 'sigmoid', 'variable2': 'tm' } p = Pipeline() p.createConfig('temp.conf', **conf) p.addFolder(path) p.run() return
def preProcessingTest(): """Test processing steps like differentation of the data""" path = "testfiles" names = Utilities.createRandomStrings(8, 6) fname = os.path.join(path, 'preprocessingtest.txt') Utilities.createCDData(fname, names, 300, .5) conf = { 'format': 'databycolumn', 'model1': 'gaussian', 'function1': 'differentiate', 'function2': 'gaussiansmooth', 'iterations': 100, 'variable1': 'a', 'saveplots': 1 } p = Pipeline() p.createConfig('temp.conf', **conf) p.openRaw(fname) p.run() return
def groupedFilesTest(): """Tests the processing and grouping of multiple files with the same sets of datasets in all files""" path = 'testfiles/grouped' Utilities.createGroupedData(path) conf = { 'format': 'databycolumn', 'groupbyname': 1, 'parsenamesindex': 0, 'parsemethod': 'numeric', 'model1': 'linear', 'variable1': 'a', 'model2': 'sigmoid', 'variable2': 'tm' } p = Pipeline() p.createConfig('temp.conf', **conf) p.addFolder(path) p.run() return
def groupbyFieldsTest(): """Tests grouping by fields function using NMRdata""" conf = { "format": "databycolumn", "colheaderlabels": "15N,1H", "parsenamesindex": 0, "parsemethod": "numeric", "delimeter": " ", "groupbyfields": 1, "extension": ".inp", } path = "testfiles/NMRdata" p = Pipeline() p.createConfig("temp.conf", **conf) p.addFolder(path) p.run() return
def multiFileTest(): """Test handling of single datasets per file with grouping per filename""" path = "testfiles/singlefiles" Utilities.createSingleFileData(path) conf = { "format": "databycolumn", "groupbyname": 1, "parsenamesindex": "0,1", "parsemethod": "both", "model1": "linear", "variable1": "a", "model2": "sigmoid", "variable2": "tm", } p = Pipeline() p.createConfig("temp.conf", **conf) p.addFolder(path) p.run() return
def groupedFilesTest(): """Tests the processing and grouping of multiple files with the same sets of datasets in all files""" path = "testfiles/grouped" Utilities.createGroupedData(path) conf = { "format": "databycolumn", "groupbyname": 1, "parsenamesindex": 0, "parsemethod": "numeric", "model1": "linear", "variable1": "a", "model2": "sigmoid", "variable2": "tm", } p = Pipeline() p.createConfig("temp.conf", **conf) p.addFolder(path) p.run() return
def preProcessingTest(): """Test processing steps like differentation of the data""" path = "testfiles" names = Utilities.createRandomStrings(8, 6) fname = os.path.join(path, "preprocessingtest.txt") Utilities.createCDData(fname, names, 300, 0.5) conf = { "format": "databycolumn", "model1": "gaussian", "function1": "differentiate", "function2": "gaussiansmooth", "iterations": 100, "variable1": "a", "saveplots": 1, } p = Pipeline() p.createConfig("temp.conf", **conf) p.openRaw(fname) p.run() return
def setUp(self): self.p = Pipeline() modulepath = os.path.dirname(DataPipeline.__file__) self.confpath = os.path.join(self.p.defaultpath, 'temp.conf') self.filepath = os.path.join(modulepath, 'testfiles')
class PipeApp(Frame, GUI_help): """Data pipe GUI for importing and fitting of raw data. This class uses ekin provided an automated pipeline for fitting raw text data files and propagating errors. Uses a config file to store the pipeline settings""" def __init__(self, parent=None, rawfile=None, conffile=None): self.parent=parent if not self.parent: Frame.__init__(self) self.main=self.master else: self.main=Toplevel() self.master=self.main self.main.title('DataPipeline Desktop') ws = self.main.winfo_screenwidth() hs = self.main.winfo_screenheight() w = 800; h=600 x = (ws/2)-(w/2); y = (hs/2)-(h/2) self.main.geometry('%dx%d+%d+%d' % (w,h,x,y)) self.main.protocol('WM_DELETE_WINDOW',self.quit) #pipeline object is used for everything except gui stuff self.p = Pipeline(conffile) self.setupVars() self.setupGUI() #redirect stdout to log win self.log.delete(1.0,END) sys.stdout = self #sys.stderr = self return def setupVars(self): """tk vars""" self.conffilevar = StringVar() self.queuefilesvar = IntVar() self.currfilevar = StringVar() return def setupGUI(self): """Do GUI elements""" self.createMenuBar() self.infopane = Frame(self.main,height=20) self.infopane.pack(side=BOTTOM,fill=BOTH,pady=4) self.updateinfoPane() Label(self.infopane,text='Conf file:').pack(side=LEFT) Label(self.infopane,textvariable=self.conffilevar,fg='darkblue').pack(side=LEFT,padx=4) Label(self.infopane,text='Files in queue:').pack(side=LEFT,padx=4) Label(self.infopane,textvariable=self.queuefilesvar,fg='darkblue').pack(side=LEFT) Label(self.infopane,text='Current file:').pack(side=LEFT,padx=4) Label(self.infopane,textvariable=self.currfilevar,fg='darkblue').pack(side=LEFT) self.m = PanedWindow(self.main, orient=HORIZONTAL, sashwidth=3, showhandle=True) self.m1 = PanedWindow(self.m, orient=VERTICAL, sashwidth=3, showhandle=True) self.m2 = PanedWindow(self.m, orient=VERTICAL, sashwidth=3, showhandle=True) self.m.pack(side=TOP,fill=BOTH,expand=1) self.m.add(self.m1) self.rawcontents = Pmw.ScrolledText(self.m1, labelpos = 'n', label_text='Raw File Contents', rowheader=1, columnheader=1, Header_foreground = 'blue', rowheader_width = 3, usehullsize = 1, hull_width = 500, hull_height = 300, text_wrap='none') self.m1.add(self.rawcontents) self.previewer = PlotPreviewer(self.m1,app=self) self.m1.add(self.previewer) self.m.add(self.m2) self.queueFrame = queueManager(app=self) self.m2.add(self.queueFrame) self.log = Pmw.ScrolledText(self.m2, labelpos = 'n', label_text='Logs', usehullsize = 1, hull_width = 400, hull_height = 500, text_wrap='word') self.m2.add(self.log) return def updateinfoPane(self): if hasattr(self.p, 'configurationfile'): self.conffilevar.set(self.p.configurationfile) self.queuefilesvar.set(len(self.p.queue)) self.currfilevar.set(self.p.filename) return def createMenuBar(self): """Create the menu bar for the application. """ self.menu=Menu(self.main) self.file_menu={'01Open Raw File(s)':{'cmd':self.openRaw}, '02Load Config File':{'cmd':self.loadConfig}, '03Edit Current Config':{'cmd':self.editConfig}, '04Create Config':{'cmd':self.createConfig}, '05Quit':{'cmd':self.quit}} self.file_menu=self.create_pulldown(self.menu,self.file_menu) self.menu.add_cascade(label='File',menu=self.file_menu['var']) self.project_menu={'01Load Project':{'cmd': self.loadProject}, '02Save Project':{'cmd': self.saveProject}} self.project_menu=self.create_pulldown(self.menu,self.project_menu) self.menu.add_cascade(label='Project',menu=self.project_menu['var']) self.run_menu={'01Execute':{'cmd': self.execute}} self.run_menu=self.create_pulldown(self.menu,self.run_menu) self.menu.add_cascade(label='Run',menu=self.run_menu['var']) self.queue_menu={'01Add files to queue':{'cmd': self.addtoQueue}, '02Add folder to queue':{'cmd': self.addFolder}, '03Clear queue':{'cmd': self.clearQueue}} self.queue_menu=self.create_pulldown(self.menu,self.queue_menu) self.menu.add_cascade(label='Queue',menu=self.queue_menu['var']) self.utils_menu={'01Show Config Helper':{'cmd': self.launchHelper}, '02Model Design':{'cmd': self.launchModelDesigner}, '03Launch Ekin':{'cmd':self.openEkin}, '04Text Editor':{'cmd': self.startTextEditor}, '05Batch File Rename':{'cmd': self.batchFileRename}, '06Clear Log':{'cmd': self.clearLog}, '07Run Tests':{'cmd': self.runTests}} self.utils_menu=self.create_pulldown(self.menu,self.utils_menu) self.menu.add_cascade(label='Utilities',menu=self.utils_menu['var']) self.help_menu={ '01Online Help':{'cmd': self.help}, '02About':{'cmd': self.about},} self.help_menu=self.create_pulldown(self.menu,self.help_menu) self.menu.add_cascade(label='Help',menu=self.help_menu['var']) self.main.config(menu=self.menu) return def openRaw(self, filename=None): """Open a raw file, if more than one file we add them to queue""" if filename==None: filename = self.openFilename() if not os.path.exists(filename): return #now we open the first file only lines = self.p.openRaw(filename) self.updateinfoPane() self.showRawFile(lines) self.showPreview() return def createConfig(self): filename = self.saveFilename() if filename: self.p.createConfig(filename) return def loadConfig(self, filename=None): if filename == None: filename = self.openFilename('conf') if not filename: return self.p.parseConfig(filename) self.updateinfoPane() return def reloadConfig(self): self.loadConfig(self.p.configurationfile) return def editConfig(self): self.editFile(self.p.configurationfile) return def editFile(self, filename=None): """Edit a file""" if filename==None: filename = self.openFilename('conf') if not filename: return tf = TextEditor(parent=self,title=filename) tf.openFile(filename) tf.addFunction('Reload Config', self.reloadConfig) return def showRawFile(self, lines): """Show raw file contents""" c = self.rawcontents c.delete("1.0",END) c.component('columnheader').delete("1.0",END) c.component('rowheader').delete("1.0",END) count=0 for row in range(0, len(lines)): if type(lines[row]) is types.StringType: line = string.strip(lines[row]) else: line = lines[row] c.insert(END,'%s\n' %line) c.component('rowheader').insert(END, str(count)+'\n') count=count+1 return def stopCurrent(self): self.p.stop = True print 'cancel pressed.. please wait' return def execute(self): """Run current files in queue""" if len(self.p.queue) == 0: return from Dialogs import ProgressDialog signal=True self.pb = ProgressDialog(self.main, cancel=self.stopCurrent) self.pb.after(100, self.pb.updateValue()) self.p.run(callback=self.pb.updateValue) self.pb.close() return def showPreview(self,lines=None): """Show how the data looks with the import formatting applied""" self.previewer.update() return def addtoQueue(self, files=None): """Add files""" if files==None: files = self.openFilenames() self.p.addtoQueue(files) self.updateinfoPane() self.queueFrame.update() return def updateFromQueue(self): """Check current file open after a queue deletion""" if self.p.filename not in self.p.queue.values(): self.clearCurrentFile() return def clearCurrentFile(self): """Clear current file""" self.p.closeFile() self.updateinfoPane() self.rawcontents.clear() self.previewer.clear() return def addFolder(self, path=None): if path==None: path = self.openDirectory() self.p.addFolder(path) self.updateinfoPane() self.queueFrame.update() return def clearQueue(self): self.queueFrame.clear() return def runTests(self): """Run tests""" import Testing Testing.formatTests(Testing.basictests) print 'tests completed ok' return def openEkin(self, fname=None): """Open ekin""" EK = EkinApp(parent=self) return def launchModelDesigner(self): self.modelapp = ModelDesignApp(parent=self) if self.p.modelsfile != '': self.modelapp.loadModelsFile(self.p.modelsfile) return def launchHelper(self): wz = HelperDialog(parent=self) return def openFilename(self, ext=['txt','csv','xls']): if not type(ext) is types.ListType: ext=[ext] filetypes=[] for e in ext: filetypes.append(('%s files' %e,'*.%s' %e)) filetypes.append(("All files","*.*")) filename=tkFileDialog.askopenfilename(defaultextension=ext, initialdir=self.p.savedir, filetypes=filetypes, parent=self.main) return filename def openFilenames(self, ext='txt'): filetypes = [('%s files' %ext,'*.%s' %ext)] filetypes.append(("All files","*.*")) filename=tkFileDialog.askopenfilenames(defaultextension=ext, initialdir=self.p.savedir, filetypes=filetypes, parent=self.main) return filename def saveFilename(self, ext=''): if ext!='': filetypes = [('%s files' %ext,'*.%s' %ext)] else: filetypes = [] filetypes.append(("All files","*.*")) filename=tkFileDialog.asksaveasfilename(defaultextension='.'+ext, initialdir=self.p.savedir, filetypes=filetypes, parent=self.main) return filename def openDirectory(self): folder = tkFileDialog.askdirectory(parent=self.main, initialdir=os.getcwd(), title='Select folder') return folder def loadProject(self, filename=None): if filename == None: filename = self.openFilename('proj') f = open(filename,'r') try: self.p = pickle.load(f) except Exception,e: print 'failed to load project' print 'Error returned:', e return print 'loaded project', filename name = os.path.splitext(filename)[1] self.p.writeConfig(filename='%s.conf' %name) self.updateinfoPane() self.queueFrame.update() self.previewer.update() if self.p.lines != None: self.showRawFile(self.p.lines) return