Esempio n. 1
0
    def protoexoniterator(self, name):
        core, flanks = name

        match_out = flanks + ".flankfilt"
        # .atram.cdhit.flanks
        cdhitout = match_out + ".cdhit"
        # .atram.cdhit.flanks.cdhit

        self.matchExonerateCdhit(taxon=core,
                                 flanks=flanks,
                                 fasta=flanks + ".exonerate",
                                 output=match_out)
        runShell(
            self.cdhitest.format(cd_input=match_out,
                                 cd_output=cdhitout,
                                 identity=self.identity,
                                 memory=self.memory).split())
        count = countheaders(cdhitout)

        exonname = re.sub("^.+.%s.(.+).fq.+" % core, "\\1", flanks)

        if not self.keep:
            os.remove(cdhitout + ".clstr")

        if not count:
            os.remove(match_out)
            os.remove(cdhitout)
            return None

        elif count > 1:
            return "%s,%s\n" % (exonname, "failed")

        else:
            return "%s,%s\n" % (exonname, "passed")
Esempio n. 2
0
    def protoexoniterator(self, path):

        runShell(
            self.cdhitest.format(filt_cons=path,
                                 identity=self.identity,
                                 memory=self.memory).split())

        return path + ".cdhit"
Esempio n. 3
0
    def protoexoniterator(self, core_filename):

        core, filename = core_filename

        missarg = self.ryo
        nuclex, mitoex, filex = self.extentions
        exonname, content = self.checkExon(filename)
        # print(exonname)

        if not exonname:
            return None

        created_file = self.checkAndCreateFile(exonname[1], content)
        # ./.tmp_ReadingFramesPercomorph/E0013.fasta
        exonera_out = filename + nuclex if exonname[
            0] == 'nucl' else filename + mitoex
        # .cdhit.exonerateMito
        filter_out = exonera_out + filex
        # .cdhit.exonerateMito.cdhit_o
        cdhitest_out = filename + ".exonerate.cdhit"
        # .cdhit.exonerate.cdhit

        cmd = self.exonerate.format(cdhit_out=filename,
                                    exon_file=created_file).split() + missarg

        runShell((cmd, exonera_out), type="stdout")

        self.getoriented(fasta=exonera_out, taxon=core, output=filter_out)

        cmd2 = self.cdhitest.format(identity=self.identity,
                                    input=filter_out,
                                    output=cdhitest_out)

        runShell(cmd2.split())

        count = countheaders(cdhitest_out)

        if not self.keep:
            # os.remove(exonera_out)
            os.remove(filter_out)
            os.remove(cdhitest_out + ".clstr")
            os.remove(filename + ".clstr")

        if count > 1:
            return "%s,%s\n" % (exonname[1].replace(".fasta", ""), "failed")

        elif count == 0:
            os.remove(filename)
            os.remove(exonera_out)
            os.remove(cdhitest_out)
            return None

        else:
            return "%s,%s\n" % (exonname[1].replace(".fasta", ""), "passed")
Esempio n. 4
0
    def protoexoniterator(self, item):
        exon, spps = item
        # print(exon)
        exon = exon.replace('"', '')
        spps = [i.replace('"', '') for i in spps]

        addn = self.threads - 1

        runShell((self.bam1.format(stem=self.stem, addn=addn).split() + spps,
                  self.bam2.format(addn=addn).split(),
                  self.bam12o.format(stem=self.stem, exon=exon)),
                 type="pipestdout")
Esempio n. 5
0
    def run(self):
        # tc = TollCheck(path = self.path, step = self.step)
        for k, v in self.listofopts.items():

            runShell(v['args'])

            if not self.keep:
                for unpair in v['deletion']:
                    if os.path.exists(unpair):
                        os.remove(unpair)

            self.tc_class.label(k)
Esempio n. 6
0
    def proto_processor(self, name=None):
        # p_name, name = d_name

        if os.path.getsize(name):
            runShell(self.velveth.format(file=name, assem=self.assem).split())
            runShell(self.velvetg.format(file=name).split())

            out_dirvelvet = "{file}.initial".format(file=name)
            contigs_f = ospj(out_dirvelvet, "contigs.fa")

            if os.path.getsize(contigs_f):
                self.writeLong(fasta=contigs_f,
                               output="{file}.initialVelvet".format(file=name))

            shutil.rmtree(out_dirvelvet)
        else:
            os.remove(name)
Esempio n. 7
0
    def proto_run(self, completecore):
        #         cleancore    = core.replace(" ", "")
        #         completecore = ospj(self.path, cleancore)
        trimnt_out = completecore + ".unaligned.NT_trimNonHomologousFragments.fasta"
        trimaa_out = completecore + ".unaligned.AA_trimNonHomologousFragments.fasta"

        alnnt_out = completecore + ".NT_aligned.fasta"
        alnaa_out = completecore + ".AA_aligned.fasta"

        is_mito = re.findall(self.mitopatt, os.path.basename(completecore))
        runShell(
            self.cmd.format(jar_compiled=self.jar_comp,
                            program="trimNonHomologousFragments",
                            seq=completecore + self.suffix,
                            h**o="-min_homology_to_keep_seq %s" %
                            self.homovalue,
                            NT=trimnt_out,
                            AA=trimaa_out,
                            gc_def="" if not is_mito else "-gc_def %s" %
                            self.gc_def).strip().split())

        runShell(
            self.cmd.format(jar_compiled=self.jar_comp,
                            program="alignSequences",
                            seq=trimnt_out,
                            h**o="",
                            NT=alnnt_out,
                            AA=alnaa_out,
                            gc_def="" if not is_mito else "-gc_def %s" %
                            self.gc_def).strip().split())

        if not self.keep:
            try:
                os.remove(completecore + ".unaligned_stats.csv")
                os.remove(completecore + ".unaligned_mask_detail_NT.fasta")
                os.remove(completecore +
                          ".unaligned.NT_trimNonHomologousFragments.fasta")
                os.remove(completecore +
                          ".unaligned.AA_trimNonHomologousFragments.fasta")

            except FileNotFoundError:
                pass

        sys.stdout.write("processed: %s\n" % completecore)
Esempio n. 8
0
    def preparebwaDB(self, masterfasta, fore, reve):

        masterfasta = ospj(fishlifedat.__path__[0], masterfasta)
        addn = self.threads - 1
        runShell((self.mem1.format(masterfasta=masterfasta,
                                   stem=self.stem,
                                   threads=self.threads,
                                   fore=fore,
                                   reve=reve).split(),
                  self.mem2.format(stem=self.stem, addn=addn).split()),
                 type="pipe")
        runShell((self.sort.format(stem=self.stem, addn=addn).split(),
                  self.sorto.format(stem=self.stem)),
                 type="stdout")

        runShell(self.rmdup.format(stem=self.stem).split())
        runShell(
            self.index.format(stem=self.stem, threads=self.threads).split())

        runShell((self.bam2fq.format(stem=self.stem, addn=addn).split(),
                  self.bam2fqo.format(stem=self.stem)),
                 type="stdout")
Esempio n. 9
0
    def run(self):

        if not self.check_corenames:
            # sys.stdout.write("\n")
            # sys.stdout.write("No files found\n")
            exit()

        # Experimental
        if self.runat is not None:
            if re.findall("linux", sys.platform):
                ### requested by C1 staff
                getstripes = "lfs setstripe {} -c 1".format(self.path)
                runShell(getstripes.split())
        # Experimental

        for core, path in self.check_corenames:

            fastqs = [(core, path, f) for f in os.listdir(path)
                      if re.findall(self.fastq, f)]
            initfas = [(core, path, f) for f in os.listdir(path)
                       if re.findall(self.velvet, f)]

            if not fastqs or not initfas:
                self.tc_class.label(core)
                continue

            with Pool(processes=self.threads) as p:
                # move files if there
                # a given directory at
                # self.runat variable
                fastq_tar = p.map(self.addPathRunAt, fastqs)
                initfas = p.map(self.addPathRunAt, initfas)

            if self.runat is not None:
                path = ospj(self.runat, core)

            db_prefix = ospj(path, core)

            # preprocessing
            cmdpre = self.preprocess.format(db_prefix=db_prefix,
                                            tmp_path=path).split() + fastq_tar
            # print(cmdpre)
            runShell(cmdpre)

            # atram
            for exon in initfas:
                # init_exon = ospj(path, exon)
                cmdatram = self.atram.format(
                    tmp_path=path,
                    db_prefix=db_prefix,
                    init_combi_fa=exon,
                    prefix=ospj(path, self.assambler)).split()

                # print(cmdatram)
                runShell(cmdatram)

            toshort = [(core, s) for s in glob.glob(
                ospj(path, "%s.%s" % (self.assambler, core + "*")))]

            with Pool(processes=self.threads) as p:
                # it also moves files
                # if self.runat variable
                # is provided
                [*p.map(self.moveRunAt, toshort)]

            if self.runat is None:

                if not self.keep:
                    blasts = glob.glob(ospj(path, "*blast*"))
                    sqlite = glob.glob(ospj(path, "*sqlite*"))
                    logs = glob.glob(ospj(path, "*.atram.log"))
                    prelogs = glob.glob(ospj(path, "*.atram_preprocessor.log"))

                    allcont = glob.glob(ospj(path, "*.all_contigs.fasta"))

                    to_rm = blasts + sqlite + logs + prelogs + allcont

                    with Pool(processes=self.threads) as p:
                        [*p.map(os.remove, to_rm)]

            else:
                shutil.rmtree(path)

            self.tc_class.label(core)