def screen(self, srrs=[]): vrs_ctgs = {} for i in srrs: print("Screening {0}".format(i), file=sys.stderr) s = screener.Screener(self.wd, i, self.dbs['virusdb'], self.dbs['cdd']) srr_alignments = s.screen_srr(s.srr, s.virus_db.path) vdb_parser = s.vdbdump.run(s.srr, srr_alignments) contigs = s.assemble(vdb_parser.dump_to_file()) putative_virus_contigs = s.cdd_screen(contigs, s.cdd_db.path, os.path.join(s.wd, 'rpst')) if len(putative_virus_contigs) > 0: for j in putative_virus_contigs: c = virus_contig.VirusContig( "ctg_" + str(len(vrs_ctgs)), s.assembler.parser.sequences[j].sequence, i, s.assembler.parser.sequences[j].header, self.flank_len, s.wd) vrs_ctgs[c.name] = c print("Prepared {} for budding".format(c.name)) print("Budding {} contigs".format(len(vrs_ctgs))) s.bud(vrs_ctgs) else: print("No contigs with virus motifs detected") sys.exit()
def screen(self, srrs=[]): vrs_ctgs = {} for i in srrs: print("Screening {0}".format(i), file=sys.stderr) s = screener.Screener(self.wd, i, self.dbs['virusdb'], self.dbs['cdd']) srr_alignments = s.screen_srr(s.srr, s.virus_db.path) #vdb_parser = s.vdbdump.run(s.srr, srr_alignments) sortit = s.sort_matches(self.sam, self.wd)
# use the argument as pattern for index, filename in enumerate(args.in_files): print('index = ', index, filename) in_filename_phase.append(filename) for index, filename in enumerate(args.out_files): print('index = ', index, filename) out_filename_phase.append(filename) if debug_level > 1: print('args :', len(sys.argv)) if debug_level > 1: print('args :', len(sys.argv)) screener = screener.Screener() screener.set_debug_level(debug_level) if truncate_table: screener.screener_table_reload(truncate_table) screener.amfi_load_db() screener.isin_load_db() # screener.load_amfi_data(in_amfi_filename) screener.screener_load_data(in_filename_phase[0]) screener.screener_dump_phase1(out_filename_phase[0]) screener.screener_dump_phase2(out_filename_phase[1]) screener.screener_dump_phase3(out_filename_phase[2], out_filename_phase[3],
def screen(self, inputs=[], intype="srr"): vrs_ctgs = {} blastcmds = [] if (intype == "srr"): blastcmds.append("-srr") elif (intype == "fasta"): blastcmds.append("-query") elif (intype == "fastq"): blastcmds.append(["infmt", "fastq", "-query"]) else: raise ValueError( "Input type %s isn't supported; try srr, fasta or fastq, please." % intype) for i in inputs: print("Screening {0}".format(i), file=sys.stderr) s = screener.Screener(self.wd, i, self.dbs['virusdb'], self.dbs['cdd']) wd = os.path.join(self.wd, i) ### Need to figure out if the current input (i) is a file or an srr accession #. intype = "srr" if ('.' in i or not "SRR" in i[0:2]): print("Input file detected: %s" % i) intype = "query" ### Added logic here that checks for the existence of the sam file, ### and runs magicblast if it isn't there or is size 0 sambasename = "%s.sam" % i samfile = os.path.join(wd, sambasename) if (os.path.isfile(samfile) > 0): print( "There's already a sam file at %s, skipping magicblast." % samfile) else: print( "Could not find samfile at location %s, running magicblast." % samfile) s.screen_srr(s.srr, s.virus_db.path, samfile) #srr_alignments = s.screen_srr(i, s.virus_db.path) #vdb_parser = s.vdbdump.run(s.srr, srr_alignments) samfile = os.path.join(wd, "magicblast.sam") print("sam is %s and current working dir is %s" % (samfile, wd)) sortit = s.sort_matches(samfile, wd) # vdb_parser = s.vdbdump.run(s.srr, srr_alignments) # contigs = s.assemble(vdb_parser.dump_to_file()) weak_fasta = os.path.join(wd, "weak_%s.fasta" % i) contigs = s.assemble(weak_fasta) #print("contigs returns %s" % contigs) sys.exit(0) putative_virus_contigs = s.cdd_screen(contigs, s.cdd_db.path, os.path.join(s.wd, 'rpst')) if len(putative_virus_contigs) > 1000: for j in putative_virus_contigs: print("working on contig # j %s" % s.assembler.parser.sequences) c = virus_contig.VirusContig( "ctg_" + str(len(vrs_ctgs)), s.assembler.parser.sequences[j].sequence, i, s.assembler.parser.sequences[j].header, self.flank_len, s.wd) vrs_ctgs[c.name] = c print("Prepared {} for budding".format(c.name)) print("Budding {} contigs".format(len(vrs_ctgs))) s.bud(vrs_ctgs) else: print("No contigs with virus motifs detected") sys.exit()