def testSaveAndQueryAnalysis(self): obj1 = Analysis(name="pog1") obj2 = Analysis(name="pog2", currentstatus="COMPLETE") obj3 = Analysis(name="pog3") AnalysisUtils.setInputFiles(obj1, self.input_files, self.input_types) self.session.add(obj1) self.session.add(obj2) self.session.add(obj3) self.session.commit() obj = self.session.query(Analysis).filter_by(name='pog1').all() self.assertTrue(len(obj) == 1) self.assertTrue(obj[0].id == 1) self.assertTrue(obj[0].currentstatus == "NEW") obj = self.session.query(Analysis).filter_by(currentstatus='NEW').all() self.assertTrue(len(obj) == 2) for key, value in obj[0].__dict__.items(): print key, value print obj[0].status
def run(self): logging.info(" ========> AnalysisRunner for %20s called run" % (self.analysis.name)) self.analysis.output_strings = [] # We may want to put the output into an array for multiple commands. cmds = self.analysis.getCommands() logging.info( " ========> AnalysisRunner for %20s called run for %s commands" % (self.analysis.name, len(self.analysis.commands))) for cmdobj in cmds: cmd = cmdobj.command logging.info( " ========> AnalysisRunner for %20s running comand %s" % (self.analysis.name, cmd)) # Open a pipe p = Popen([cmd], shell=True, stdout=PIPE, stderr=PIPE, close_fds=True) # Loop over the output - Johnny B likely has something to say about this while p.poll() == None: (out, err) = p.communicate() #print "OUT - %s"%out #print "ERR - %s"%err if out != '': AnalysisUtils.addOutputString(self.analysis, out) sys.stdout.flush() if err != '': AnalysisUtils.addOutputString(self.analysis, err) sys.stderr.flush() logging.info( " ========> AnalysisRunner for %20s finished command: Output is" % (self.analysis.name)) for tmp in self.analysis.output_strings: tmp2 = tmp.output_string.split("\n") for t in tmp2: logging.info(" ========> Analysis %20s Output %s" % (self.analysis.name, t)) return True
def testGetInput(self): AnalysisUtils.setInputFiles(self.anaobj, self.inputs, ['dir']) tmpinputs = self.anaobj.input_files self.assertEqual(len(tmpinputs), len( self.inputs)) # Assertion that the test framework collates
def testGetOutput(self): AnalysisUtils.setInputFiles(self.anaobj, self.inputs, ['dir']) self.runner = AnalysisRunner(self.anaobj) self.runner.run() out = AnalysisUtils.getOutputStrings(self.runner.analysis) self.assertTrue(len(out) > 0)
def testCreateAnalysis(self): obj1 = Analysis(name="pog1") obj2 = Analysis(name="pog2", currentstatus="COMPLETE") obj3 = Analysis(name="pog3") AnalysisUtils.setInputFiles(obj1, self.input_files, self.input_types) obj1.output_dir = "/tmp" obj1.working_dir = "/tmp" obj1.init()
def setUp(self): self.factory = AnalysisFactory() self.ana = self.factory.createAnalysisFromModuleName("Bowtie2") self.ana.param = " -x ../testdata/databases/Arabidopsis_TAIR.9.171 " AnalysisUtils.setInputFiles(self.ana, ["../testdata/FoxP2_SL167.fastq"], ['fastq']) self.ana.init()
def testRun(self): AnalysisUtils.setInputFiles(self.anaobj, self.inputs, ['dir']) self.runner = AnalysisRunner(self.anaobj) self.runner.run() tmpstr = AnalysisUtils.getOutputStrings(self.anaobj) print tmpstr self.assertTrue(len(AnalysisUtils.getOutputStrings(self.anaobj)) > 0)
def getCommands(self): AnalysisUtils.checkDiskSpace(self) if AnalysisUtils.checkInputFiles(self) == False: raise Exception("Input files [%s] don't exist = can't continue" % (self.input_files)) command = "java -Xmx1024M -Djava.awt.headless=true -Djava.awt.headlesslib=true -classpath " + self.classpath + " " + " -Dfastqc.output_dir=" + self.working_dir + " uk.ac.babraham.FastQC.FastQCApplication " + self.input_files[ 0].input_file self.commands.append( AnalysisCommand(command=command, command_rank=len(self.commands) + 1)) return self.commands
def testCreateNewAnalysisRunner( self): # Function gets called automatically """New instance should create successfully""" anarun = AnalysisRunner(self.ana) tmpinputs = AnalysisUtils.getInputFiles(anarun.analysis) self.assertTrue(anarun)
def testCreateMummer(self): mummer = Mummer() self.assertTrue(mummer) self.assertTrue(AnalysisUtils.setInputFiles(mummer,self.input_files,['fasta','fasta'])) mummer.init() tmpfiles = AnalysisUtils.getInputFiles(mummer) self.assertTrue(len(tmpfiles) ==2) commands = mummer.getCommands() print commands self.assertTrue(len(commands) == 1) self.assertTrue(commands[0].command.index('tools/macosx/MUMmer3.23/nucmer --maxgap=500 --mincluster=100') > 0)
def testCreateAnalysis(self): input_files = ['pog1.fa','pog2.fa','pog3.fa'] input_types = ['fasta','fasta','fasta'] obj1 = Analysis(name="pog1") obj2 = Analysis(name="pog2",currentstatus="COMPLETE") obj3 = Analysis(name="pog3") if1 = AnalysisInputFile(input_file='pog1.fa',input_file_rank=1) if2 = AnalysisInputFile(input_file='pog2.fa',input_file_rank=2) obj1.input_files.append(if1) obj1.input_files.append(if2) AnalysisUtils.setInputFiles(obj1,input_files,input_types) self.session.add(obj1) self.session.add(obj2) self.session.add(obj3) self.session.commit() self.assertTrue(obj1.id == 1) self.assertTrue(obj2.name == "pog2") self.assertTrue(obj2.currentstatus == "COMPLETE") obj = self.session.query(Analysis).filter_by(name='pog1').all() self.assertTrue(len(obj) ==1) self.assertTrue(obj[0].id ==1) self.assertTrue(obj[0].currentstatus == "NEW") obj = self.session.query(Analysis).filter_by(currentstatus='NEW').all() self.assertTrue(len(obj) ==2) for key,value in obj[0].__dict__.items(): print key,value print obj[0].status
def postProcessOutput(self): AnalysisUtils.postProcessOutput(self) #3 reads; of these: # 3 (100.00%) were unpaired; of these: # 3 (100.00%) aligned 0 times # 0 (0.00%) aligned exactly 1 time # 0 (0.00%) aligned >1 times # 0.00% overall alignment rate tmpdat = {} for str1obj in self.output_strings: str1 = str1obj.output_string tmpstr = str1.split("\n") for str in tmpstr: match1 = re.match('(\d+) reads', str) match2 = re.match(' +(\d+) (.*?) aligned 0 times', str) match3 = re.match(' +(\d+) (.*?) aligned exactly 1 time', str) match4 = re.match(' +(\d+) (.*?) aligned >1 times', str) match5 = re.match('(.*) overall alignment rate', str) if match1: tmpdat['Number_of_Reads'] = match1.group(1) if match2: tmpdat['Aligned 0 Times'] = match2.group(1) tmpdat['Percent Aligned 0 Times'] = match2.group(2) if match3: tmpdat['Aligned Exactly 1 Time'] = match3.group(1) tmpdat['Percent Aligned Exactly 1 Time'] = match3.group(2) if match4: tmpdat['Aligned >1 Time'] = match4.group(1) tmpdat['Percent Aligned >1 Time'] = match4.group(1) if match5: tmpdat['Overall Alignment Rate'] = match5.group(1) self.summary_data = tmpdat
def postProcessOutput(self): AnalysisUtils.postProcessOutput(self) output_dat = self.readOutputFastqcData() encoding = None readlen = None numseqs = None filename = None percentgc = None status = output_dat['Basic Statistics']['status'] for row in output_dat['Basic Statistics']['moddata']: key = row[0] value = row[1] if key == "Encoding": encoding = value elif key == "Sequence length": readlen = value elif key == "Total Sequences": numseqs = value elif key == "Filename": filename = value elif key == "%GC": percentgc = value tmpdat = {} tmpdat['Encoding'] = encoding tmpdat['Sequence Length'] = readlen tmpdat['Filename'] = filename tmpdat['%GC'] = percentgc tmpdat['Total Sequences'] = numseqs self.summary_data = tmpdat self.output_status = status status = output_dat['Basic Statistics']['status']
def init(self): super(FastQCAnalysis, self).init() if len(self.input_files) == 0: raise Exception( "No input files for FastQCAnalysis module. Can't init") fileparts = FileUtils.getFileParts(self.input_files[0].input_file) if fileparts['fileext'] == ".fastq": dir = fileparts['filestub'] + "_fastqc/" elif fileparts['fileext'] == ".gz": dir = fileparts['filestub'].replace(".fastq", "") + "_fastqc/" else: dir = fileparts['basename'] + "_fastqc/" self.fastqc_dir = dir tmp = [] for i, f in enumerate(self.expected_output_filelist): #tmp.append(dir + f) AnalysisUtils.addExpectedOutputFile(self, dir + f)
def testRunMummer(self): mummer = Mummer() self.assertTrue(mummer) self.assertTrue(AnalysisUtils.setInputFiles(mummer,self.input_files,['fasta','fasta'])) runner = AnalysisRunner(mummer) self.assertTrue(runner.run()) self.assertTrue(len(mummer.output_strings) == 1) self.assertTrue(mummer.output_strings[0].output_string.index('4: FINISHING DATA') > 0) self.assertTrue(FileUtils.fileExists('../testout/mummer.delta'))
def setInputFiles(self, input_files, input_types): AnalysisUtils.setInputFiles(self, input_files, input_types) self.init()
def testSetInputFiles(self): self.assertTrue( AnalysisUtils.setInputFiles(self.anaobj, self.inputs, ['dir']))
def getCommands(self): if self.commands and len(self.commands) > 0: return self.commands logging.info(" ========> Analysis %20s Getting commands" % (self.name)) self.commands = [] self.expected_output_files = [] self.temp_output_files = [] outdir = self.output_dir tmpdir = self.working_dir btbin = self.bowtiebindir + self.bowtiebinname stbin = self.samtoolsbindir + self.samtoolsbinname self.calculateSpaceNeeded() if FileUtils.fileExists(btbin) == False: raise Exception("Binary file [%s] doesn't exist = can't continue" % btbin) if FileUtils.fileExists(stbin) == False: raise Exception("Binary file [%s] doesn't exist = can't continue" % stbin) if AnalysisUtils.checkInputFiles(self) == False: raise Exception("Input files [%s] don't exist = can't continue" % (self.input_files)) AnalysisUtils.checkDiskSpace(self) for fobj in self.input_files: f = fobj.input_file try: if f.endswith(".gz"): # f = "<( zcat -c " + f + " )" tmpf = f.replace(".gz", "") fparts = FileUtils.getFileParts(tmpf) command = "gunzip -c " + f + " > " + tmpdir + "/" + fparts[ 'basename'] self.commands.append(command) self.temp_output_files.append(tmpf) f = tmpdir + "/" + fparts['basename'] fparts = FileUtils.getFileParts(f) fstub = fparts['filestub'] bowtieoutfile = tmpdir + "/" + fstub + ".sam" samtoolsoutfile = tmpdir + "/" + fstub + ".bam" if self.param == None: raise Exception( "No parameters entered for bowtie = needs -x <genomeindex>" ) command1 = btbin + " " + self.param + " " + f + " | " + stbin + " view -bS - | " + stbin + " sort - " + tmpdir + "/" + fstub logging.info(" ========> Analysis %20s command 1 : %s" % (self.name, command1)) #command2 = stbin + " view -bS " + bowtieoutfile + "| " + stbin + " sort - " + tmpdir + "/" + fstub # logging.info(" ========> Analysis %20s command 2 : %s" % (self.name,command2)) command2 = stbin + " index " + samtoolsoutfile logging.info(" ========> Analysis %20s command 3 : %s" % (self.name, command2)) # self.expected_output_files.append(fstub + ".sam") self.expected_output_files.append( AnalysisExpectedOutputFile(expected_output_file=fstub + ".bam")) self.expected_output_files.append( AnalysisExpectedOutputFile(expected_output_file=fstub + ".bam.bai")) self.commands.append(AnalysisCommand(command=command1)) self.commands.append(AnalysisCommand(command=command2)) #self.commands.append(command3) except Exception as e: logging.info( " ========> Analysis %20s Failed building command list [%s]" % (self.name, e)) raise return self.commands