Esempio n. 1
0
    def testCreateMummerDeltaFile(self):

        """ We need the sequences to create the alignments """

        refseqs = FastaFile.getSequenceDict(self.input_files[0])
        qryseqs = FastaFile.getSequenceDict(self.input_files[1])

        self.assertTrue(len(refseqs) == 87)
        self.assertTrue(len(qryseqs) == 34)

        mdf = MummerDeltaFile(self.deltafile,refseqs,qryseqs)

        self.assertTrue(mdf)

        mdf.parse()
        
        alns = mdf.alns

        self.assertTrue(len(alns) == 54)
        self.assertTrue('GG739631.1' in alns)

        idalns = alns['GG739631.1']

        self.assertTrue(len(idalns) == 14)

        self.assertTrue(idalns[13].qstart == 293765)
Esempio n. 2
0
    def getCommands(self):
        self.commands = []
        self.output_files = []

        self.checkDiskSpace()

        seqs = FastaFile.getSequenceDict(self.refgenome, False)

        if self.checkInputFiles() == False:
            raise Exception("Input files [%s] don't exist = can't continue" %
                            (self.input_files))

        fileparts = FileUtils.getFileParts(self.input_files[0])

        self.basename = fileparts['basename']

        # Need to set dbtype somewhere

        outfile1 = self.working_dir + "/" + self.basename + ".raw.vcf"
        outfile2 = self.working_dir + "/" + self.basename + ".flt.vcf"

        regstr = ""

        if self.regionstr != "":
            regstr = " -r " + self.regionstr
            outfile1 = self.working_dir + "/" + self.basename + "." + self.regionstr + ".raw.vcf"
            outfile2 = self.working_dir + "/" + self.basename + "." + self.regionstr + ".flt.vcf"

        self.expected_output_files.append(outfile1)
        self.expected_output_files.append(outfile2)

        command1 = self.samtools + " mpileup -uf " + self.refgenome + " " + self.input_files[
            0] + " " + regstr + " | " + self.bcftools + " view " + " -bvcg -  > " + outfile1

        command2 = self.bcftools + " view " + outfile1 + " | " + self.vcfutils + " varFilter -D100 > " + outfile2

        print "Command %s" % command1
        print "Command %s" % command2

        self.commands.append(
            AnalysisCommand(command=command1,
                            command_rank=len(self.commands) + 1))
        self.commands.append(
            AnalysisCommand(command=command2,
                            command_rank=len(self.commands) + 1))

        return self.commands
Esempio n. 3
0
    def getCommands(self):
        self.commands = []
        self.output_files = []

        self.checkDiskSpace()

        print "Reading genome file"
        seqs = FastaFile.getSequenceDict(self.refgenome,False)

        if self.checkInputFiles() == False:
            raise Exception("Input files [%s] don't exist = can't continue"%(self.input_files))


        fileparts = FileUtils.getFileParts(self.input_files[0])

        self.basename = fileparts['basename']

        for seq in seqs:

            len =  seqs[seq]['len']
        
            i = 1

            while i < len:
                end = i + self.chunk -1

                if end > len:
                    end = len

                regionstr = "%s:%d-%d"%(seq,i,end)

                tmpana = AnalysisFactory.createAnalysisFromModuleName("SamtoolsMpileup")

                tmpana.setInputFiles(self.input_files,self.input_types)

                tmpana.refgenome = self.refgenome
                tmpana.regionstr = regionstr
                tmpana.init()

                tmpcmds = tmpana.getCommands()

                for cmd in tmpcmds:
                    self.commands.append(cmd)

                i = i + self.chunk

        return self.commands
Esempio n. 4
0
def read_sequences(seqfile):
    seqs = FastaFile.getSequenceDict(seqfile)

    return seqs