예제 #1
0
    def testSaveAndQueryAnalysis(self):

        obj1 = Analysis(name="pog1")
        obj2 = Analysis(name="pog2", currentstatus="COMPLETE")
        obj3 = Analysis(name="pog3")

        AnalysisUtils.setInputFiles(obj1, self.input_files, self.input_types)

        self.session.add(obj1)
        self.session.add(obj2)
        self.session.add(obj3)

        self.session.commit()

        obj = self.session.query(Analysis).filter_by(name='pog1').all()

        self.assertTrue(len(obj) == 1)
        self.assertTrue(obj[0].id == 1)

        self.assertTrue(obj[0].currentstatus == "NEW")

        obj = self.session.query(Analysis).filter_by(currentstatus='NEW').all()

        self.assertTrue(len(obj) == 2)

        for key, value in obj[0].__dict__.items():
            print key, value

        print obj[0].status
예제 #2
0
    def run(self):

        logging.info(" ========> AnalysisRunner for %20s called run" %
                     (self.analysis.name))

        self.analysis.output_strings = []

        # We may want to put the output into an array for multiple commands.

        cmds = self.analysis.getCommands()

        logging.info(
            " ========> AnalysisRunner for %20s called run for %s commands" %
            (self.analysis.name, len(self.analysis.commands)))

        for cmdobj in cmds:

            cmd = cmdobj.command
            logging.info(
                " ========> AnalysisRunner for %20s running comand %s" %
                (self.analysis.name, cmd))

            # Open a pipe

            p = Popen([cmd],
                      shell=True,
                      stdout=PIPE,
                      stderr=PIPE,
                      close_fds=True)

            # Loop over the output - Johnny B likely has something to say about this

            while p.poll() == None:

                (out, err) = p.communicate()

                #print "OUT - %s"%out
                #print "ERR - %s"%err

                if out != '':
                    AnalysisUtils.addOutputString(self.analysis, out)
                    sys.stdout.flush()

                if err != '':
                    AnalysisUtils.addOutputString(self.analysis, err)
                    sys.stderr.flush()

        logging.info(
            " ========> AnalysisRunner for %20s finished command: Output is" %
            (self.analysis.name))

        for tmp in self.analysis.output_strings:
            tmp2 = tmp.output_string.split("\n")

            for t in tmp2:
                logging.info(" ========> Analysis %20s Output %s" %
                             (self.analysis.name, t))

        return True
예제 #3
0
    def testGetInput(self):

        AnalysisUtils.setInputFiles(self.anaobj, self.inputs, ['dir'])

        tmpinputs = self.anaobj.input_files

        self.assertEqual(len(tmpinputs), len(
            self.inputs))  # Assertion that the test framework collates
예제 #4
0
    def testGetOutput(self):
        AnalysisUtils.setInputFiles(self.anaobj, self.inputs, ['dir'])

        self.runner = AnalysisRunner(self.anaobj)

        self.runner.run()

        out = AnalysisUtils.getOutputStrings(self.runner.analysis)

        self.assertTrue(len(out) > 0)
예제 #5
0
    def testCreateAnalysis(self):

        obj1 = Analysis(name="pog1")
        obj2 = Analysis(name="pog2", currentstatus="COMPLETE")
        obj3 = Analysis(name="pog3")

        AnalysisUtils.setInputFiles(obj1, self.input_files, self.input_types)
        obj1.output_dir = "/tmp"
        obj1.working_dir = "/tmp"
        obj1.init()
예제 #6
0
    def setUp(self):

        self.factory = AnalysisFactory()
        self.ana = self.factory.createAnalysisFromModuleName("Bowtie2")
        self.ana.param = " -x ../testdata/databases/Arabidopsis_TAIR.9.171 "

        AnalysisUtils.setInputFiles(self.ana,
                                    ["../testdata/FoxP2_SL167.fastq"],
                                    ['fastq'])

        self.ana.init()
예제 #7
0
    def testRun(self):

        AnalysisUtils.setInputFiles(self.anaobj, self.inputs, ['dir'])

        self.runner = AnalysisRunner(self.anaobj)

        self.runner.run()

        tmpstr = AnalysisUtils.getOutputStrings(self.anaobj)

        print tmpstr
        self.assertTrue(len(AnalysisUtils.getOutputStrings(self.anaobj)) > 0)
예제 #8
0
    def getCommands(self):

        AnalysisUtils.checkDiskSpace(self)

        if AnalysisUtils.checkInputFiles(self) == False:
            raise Exception("Input files [%s] don't exist = can't continue" %
                            (self.input_files))

        command = "java -Xmx1024M -Djava.awt.headless=true -Djava.awt.headlesslib=true -classpath " + self.classpath + " " + " -Dfastqc.output_dir=" + self.working_dir + " uk.ac.babraham.FastQC.FastQCApplication " + self.input_files[
            0].input_file

        self.commands.append(
            AnalysisCommand(command=command,
                            command_rank=len(self.commands) + 1))

        return self.commands
예제 #9
0
    def testCreateNewAnalysisRunner(
            self):  # Function gets called automatically
        """New instance should create successfully"""

        anarun = AnalysisRunner(self.ana)

        tmpinputs = AnalysisUtils.getInputFiles(anarun.analysis)

        self.assertTrue(anarun)
예제 #10
0
    def testCreateMummer(self):

        mummer = Mummer()

        self.assertTrue(mummer)
        self.assertTrue(AnalysisUtils.setInputFiles(mummer,self.input_files,['fasta','fasta']))

        mummer.init()
        tmpfiles = AnalysisUtils.getInputFiles(mummer)

        self.assertTrue(len(tmpfiles) ==2)

        commands = mummer.getCommands()

        print commands

        self.assertTrue(len(commands) == 1)

        self.assertTrue(commands[0].command.index('tools/macosx/MUMmer3.23/nucmer --maxgap=500 --mincluster=100') > 0)
예제 #11
0
    def testCreateAnalysis(self):
        
        input_files = ['pog1.fa','pog2.fa','pog3.fa']
        input_types = ['fasta','fasta','fasta']
        
        obj1 = Analysis(name="pog1")
        obj2 = Analysis(name="pog2",currentstatus="COMPLETE")
        obj3 = Analysis(name="pog3")

        if1 = AnalysisInputFile(input_file='pog1.fa',input_file_rank=1)
        if2 = AnalysisInputFile(input_file='pog2.fa',input_file_rank=2)
        
        obj1.input_files.append(if1)
        obj1.input_files.append(if2)
        
        AnalysisUtils.setInputFiles(obj1,input_files,input_types)
        
        self.session.add(obj1)
        self.session.add(obj2)
        self.session.add(obj3)

        self.session.commit()

        self.assertTrue(obj1.id   == 1)
        self.assertTrue(obj2.name == "pog2")
        self.assertTrue(obj2.currentstatus == "COMPLETE")

        obj = self.session.query(Analysis).filter_by(name='pog1').all()

        self.assertTrue(len(obj) ==1)
        self.assertTrue(obj[0].id ==1)

        self.assertTrue(obj[0].currentstatus == "NEW")

        obj = self.session.query(Analysis).filter_by(currentstatus='NEW').all()

        self.assertTrue(len(obj) ==2)

        for key,value in obj[0].__dict__.items():
            print key,value

        print obj[0].status
예제 #12
0
    def postProcessOutput(self):
        AnalysisUtils.postProcessOutput(self)

        #3 reads; of these:
        #  3 (100.00%) were unpaired; of these:
        #    3 (100.00%) aligned 0 times
        #    0 (0.00%) aligned exactly 1 time
        #    0 (0.00%) aligned >1 times
        #  0.00% overall alignment rate

        tmpdat = {}

        for str1obj in self.output_strings:
            str1 = str1obj.output_string
            tmpstr = str1.split("\n")

            for str in tmpstr:
                match1 = re.match('(\d+) reads', str)
                match2 = re.match(' +(\d+) (.*?) aligned 0 times', str)
                match3 = re.match(' +(\d+) (.*?) aligned exactly 1 time', str)
                match4 = re.match(' +(\d+) (.*?) aligned >1 times', str)
                match5 = re.match('(.*) overall alignment rate', str)

                if match1:
                    tmpdat['Number_of_Reads'] = match1.group(1)

                if match2:
                    tmpdat['Aligned 0 Times'] = match2.group(1)
                    tmpdat['Percent Aligned 0 Times'] = match2.group(2)

                if match3:
                    tmpdat['Aligned Exactly 1 Time'] = match3.group(1)
                    tmpdat['Percent Aligned Exactly 1 Time'] = match3.group(2)

                if match4:
                    tmpdat['Aligned >1 Time'] = match4.group(1)
                    tmpdat['Percent Aligned >1 Time'] = match4.group(1)

                if match5:
                    tmpdat['Overall Alignment Rate'] = match5.group(1)

        self.summary_data = tmpdat
예제 #13
0
    def postProcessOutput(self):

        AnalysisUtils.postProcessOutput(self)

        output_dat = self.readOutputFastqcData()

        encoding = None
        readlen = None
        numseqs = None
        filename = None
        percentgc = None

        status = output_dat['Basic Statistics']['status']

        for row in output_dat['Basic Statistics']['moddata']:
            key = row[0]
            value = row[1]

            if key == "Encoding":
                encoding = value
            elif key == "Sequence length":
                readlen = value
            elif key == "Total Sequences":
                numseqs = value
            elif key == "Filename":
                filename = value
            elif key == "%GC":
                percentgc = value

        tmpdat = {}

        tmpdat['Encoding'] = encoding
        tmpdat['Sequence Length'] = readlen
        tmpdat['Filename'] = filename
        tmpdat['%GC'] = percentgc
        tmpdat['Total Sequences'] = numseqs

        self.summary_data = tmpdat
        self.output_status = status

        status = output_dat['Basic Statistics']['status']
예제 #14
0
    def init(self):
        super(FastQCAnalysis, self).init()

        if len(self.input_files) == 0:
            raise Exception(
                "No input files for FastQCAnalysis module. Can't init")

        fileparts = FileUtils.getFileParts(self.input_files[0].input_file)

        if fileparts['fileext'] == ".fastq":
            dir = fileparts['filestub'] + "_fastqc/"
        elif fileparts['fileext'] == ".gz":
            dir = fileparts['filestub'].replace(".fastq", "") + "_fastqc/"
        else:
            dir = fileparts['basename'] + "_fastqc/"

        self.fastqc_dir = dir

        tmp = []

        for i, f in enumerate(self.expected_output_filelist):
            #tmp.append(dir + f)
            AnalysisUtils.addExpectedOutputFile(self, dir + f)
예제 #15
0
    def testRunMummer(self):

        
        mummer = Mummer()

        self.assertTrue(mummer)
        self.assertTrue(AnalysisUtils.setInputFiles(mummer,self.input_files,['fasta','fasta']))

        runner = AnalysisRunner(mummer)
        

        self.assertTrue(runner.run())

        self.assertTrue(len(mummer.output_strings) == 1)

        self.assertTrue(mummer.output_strings[0].output_string.index('4: FINISHING DATA') > 0)


        self.assertTrue(FileUtils.fileExists('../testout/mummer.delta'))
예제 #16
0
    def setInputFiles(self, input_files, input_types):
        AnalysisUtils.setInputFiles(self, input_files, input_types)

        self.init()
예제 #17
0
    def testSetInputFiles(self):

        self.assertTrue(
            AnalysisUtils.setInputFiles(self.anaobj, self.inputs, ['dir']))
예제 #18
0
    def getCommands(self):

        if self.commands and len(self.commands) > 0:
            return self.commands

        logging.info(" ========> Analysis %20s Getting commands" % (self.name))

        self.commands = []
        self.expected_output_files = []
        self.temp_output_files = []

        outdir = self.output_dir
        tmpdir = self.working_dir

        btbin = self.bowtiebindir + self.bowtiebinname
        stbin = self.samtoolsbindir + self.samtoolsbinname

        self.calculateSpaceNeeded()

        if FileUtils.fileExists(btbin) == False:
            raise Exception("Binary file [%s] doesn't exist = can't continue" %
                            btbin)

        if FileUtils.fileExists(stbin) == False:
            raise Exception("Binary file [%s] doesn't exist = can't continue" %
                            stbin)

        if AnalysisUtils.checkInputFiles(self) == False:
            raise Exception("Input files [%s] don't exist = can't continue" %
                            (self.input_files))

        AnalysisUtils.checkDiskSpace(self)

        for fobj in self.input_files:
            f = fobj.input_file
            try:

                if f.endswith(".gz"):
                    #  f = "<( zcat -c " + f + " )"
                    tmpf = f.replace(".gz", "")
                    fparts = FileUtils.getFileParts(tmpf)
                    command = "gunzip -c " + f + " > " + tmpdir + "/" + fparts[
                        'basename']
                    self.commands.append(command)
                    self.temp_output_files.append(tmpf)
                    f = tmpdir + "/" + fparts['basename']

                fparts = FileUtils.getFileParts(f)
                fstub = fparts['filestub']

                bowtieoutfile = tmpdir + "/" + fstub + ".sam"
                samtoolsoutfile = tmpdir + "/" + fstub + ".bam"

                if self.param == None:
                    raise Exception(
                        "No parameters entered for bowtie = needs -x <genomeindex>"
                    )

                command1 = btbin + " " + self.param + " " + f + " | " + stbin + " view -bS - | " + stbin + " sort - " + tmpdir + "/" + fstub

                logging.info(" ========> Analysis %20s command 1 : %s" %
                             (self.name, command1))

                #command2 = stbin + " view -bS " + bowtieoutfile + "| " + stbin + " sort - " + tmpdir + "/" + fstub

                #                logging.info(" ========> Analysis %20s command 2 : %s" % (self.name,command2))

                command2 = stbin + " index " + samtoolsoutfile

                logging.info(" ========> Analysis %20s command 3 : %s" %
                             (self.name, command2))

                # self.expected_output_files.append(fstub + ".sam")
                self.expected_output_files.append(
                    AnalysisExpectedOutputFile(expected_output_file=fstub +
                                               ".bam"))
                self.expected_output_files.append(
                    AnalysisExpectedOutputFile(expected_output_file=fstub +
                                               ".bam.bai"))

                self.commands.append(AnalysisCommand(command=command1))
                self.commands.append(AnalysisCommand(command=command2))
                #self.commands.append(command3)

            except Exception as e:
                logging.info(
                    " ========> Analysis %20s Failed building command list [%s]"
                    % (self.name, e))
                raise

        return self.commands