コード例 #1
0
 def screen(self, srrs=[]):
     vrs_ctgs = {}
     for i in srrs:
         print("Screening {0}".format(i), file=sys.stderr)
         s = screener.Screener(self.wd, i, self.dbs['virusdb'],
                               self.dbs['cdd'])
         srr_alignments = s.screen_srr(s.srr, s.virus_db.path)
         vdb_parser = s.vdbdump.run(s.srr, srr_alignments)
         contigs = s.assemble(vdb_parser.dump_to_file())
         putative_virus_contigs = s.cdd_screen(contigs, s.cdd_db.path,
                                               os.path.join(s.wd, 'rpst'))
         if len(putative_virus_contigs) > 0:
             for j in putative_virus_contigs:
                 c = virus_contig.VirusContig(
                     "ctg_" + str(len(vrs_ctgs)),
                     s.assembler.parser.sequences[j].sequence, i,
                     s.assembler.parser.sequences[j].header, self.flank_len,
                     s.wd)
                 vrs_ctgs[c.name] = c
                 print("Prepared {} for budding".format(c.name))
             print("Budding {} contigs".format(len(vrs_ctgs)))
             s.bud(vrs_ctgs)
         else:
             print("No contigs with virus motifs detected")
             sys.exit()
コード例 #2
0
 def screen(self, srrs=[]):
     vrs_ctgs = {}
     for i in srrs:
         print("Screening {0}".format(i), file=sys.stderr)
         s = screener.Screener(self.wd, i, self.dbs['virusdb'],
                               self.dbs['cdd'])
         srr_alignments = s.screen_srr(s.srr, s.virus_db.path)
         #vdb_parser = s.vdbdump.run(s.srr, srr_alignments)
         sortit = s.sort_matches(self.sam, self.wd)
コード例 #3
0
# use the argument as pattern
for index, filename in enumerate(args.in_files):
    print('index = ', index, filename)
    in_filename_phase.append(filename)

for index, filename in enumerate(args.out_files):
    print('index = ', index, filename)
    out_filename_phase.append(filename)

if debug_level > 1:
    print('args :', len(sys.argv))

if debug_level > 1:
    print('args :', len(sys.argv))

screener = screener.Screener()

screener.set_debug_level(debug_level)

if truncate_table:
    screener.screener_table_reload(truncate_table)

screener.amfi_load_db()
screener.isin_load_db()
# screener.load_amfi_data(in_amfi_filename)

screener.screener_load_data(in_filename_phase[0])

screener.screener_dump_phase1(out_filename_phase[0])
screener.screener_dump_phase2(out_filename_phase[1])
screener.screener_dump_phase3(out_filename_phase[2], out_filename_phase[3],
コード例 #4
0
    def screen(self, inputs=[], intype="srr"):
        vrs_ctgs = {}
        blastcmds = []
        if (intype == "srr"):
            blastcmds.append("-srr")
        elif (intype == "fasta"):
            blastcmds.append("-query")
        elif (intype == "fastq"):
            blastcmds.append(["infmt", "fastq", "-query"])
        else:
            raise ValueError(
                "Input type %s isn't supported; try srr, fasta or fastq, please."
                % intype)
        for i in inputs:
            print("Screening {0}".format(i), file=sys.stderr)
            s = screener.Screener(self.wd, i, self.dbs['virusdb'],
                                  self.dbs['cdd'])
            wd = os.path.join(self.wd, i)
            ### Need to figure out if the current input (i) is a file or an srr accession #.
            intype = "srr"
            if ('.' in i or not "SRR" in i[0:2]):
                print("Input file detected: %s" % i)
                intype = "query"
            ### Added logic here that checks for the existence of the sam file,
            ###  and runs magicblast if it isn't there or is size 0
            sambasename = "%s.sam" % i
            samfile = os.path.join(wd, sambasename)
            if (os.path.isfile(samfile) > 0):
                print(
                    "There's already a sam file at %s, skipping magicblast." %
                    samfile)
            else:
                print(
                    "Could not find samfile at location %s, running magicblast."
                    % samfile)
                s.screen_srr(s.srr, s.virus_db.path, samfile)
                #srr_alignments = s.screen_srr(i, s.virus_db.path)
                #vdb_parser = s.vdbdump.run(s.srr, srr_alignments)

            samfile = os.path.join(wd, "magicblast.sam")

            print("sam is %s and current working dir is %s" % (samfile, wd))
            sortit = s.sort_matches(samfile, wd)

            # vdb_parser = s.vdbdump.run(s.srr, srr_alignments)
            # contigs = s.assemble(vdb_parser.dump_to_file())
            weak_fasta = os.path.join(wd, "weak_%s.fasta" % i)
            contigs = s.assemble(weak_fasta)
            #print("contigs returns %s" % contigs)
            sys.exit(0)

            putative_virus_contigs = s.cdd_screen(contigs, s.cdd_db.path,
                                                  os.path.join(s.wd, 'rpst'))

            if len(putative_virus_contigs) > 1000:
                for j in putative_virus_contigs:

                    print("working on contig # j %s" %
                          s.assembler.parser.sequences)
                    c = virus_contig.VirusContig(
                        "ctg_" + str(len(vrs_ctgs)),
                        s.assembler.parser.sequences[j].sequence, i,
                        s.assembler.parser.sequences[j].header, self.flank_len,
                        s.wd)
                    vrs_ctgs[c.name] = c
                    print("Prepared {} for budding".format(c.name))
                print("Budding {} contigs".format(len(vrs_ctgs)))
                s.bud(vrs_ctgs)
            else:
                print("No contigs with virus motifs detected")
                sys.exit()