def main(): readoids = options.output + ".readoids.fasta" rc = RunCommand([ 'create_readoids.py', '-l', str(options.rd_ln), '-d', str(options.distance), '-w', str(options.window_size), '-o', readoids, args[1] ]) sys.stderr.write("Running: " + rc.get_command()) rc.run_command() detail_output = options.output + ".scaffold_accuracy" rc = RunCommand(['run_nucmer.py', '-p', detail_output, args[0], readoids]) sys.stderr.write("Running: " + rc.get_command()) rc.run_command() coords_file = detail_output + ".coords" table_output = options.table + ".scaffold_accuracy" rc = RunCommand([ 'assess_readoids_coords.py', '-c', coords_file, '-p', readoids, '-f', args[1], '-r', args[0], '-v', detail_output, '-l', str(options.rd_ln), '-d', str(options.distance), '-e', str(options.error), '-o', table_output ]) sys.stderr.write("Running: " + rc.get_command()) rc.run_command() return 0
def main(): # set up command cmd = None task = options.task if options.make_blastdb: type = 'nucl' if options.is_protein: type = 'prot' cmd_list = [constant.MAKEBLASTDB, '-in', args[0], '-dbtype', type] rc = RunCommand(cmd_list) print "Running command: " + rc.get_command() + '\n' rc.run_command() if options.vec_screen: cmd = NcbiblastnCommandline(query=args[1], db=args[0], evalue=700, outfmt=options.outfmt, reward=1, penalty=-5, gapopen=3, gapextend=3, dust='yes', searchsp=1750000000000, out=options.output, task=task, num_threads=options.threads) elif options.ncbi_screen: # Note: new NCBI requirements say to use lcase_masking (See GAAG-510 for documentation) cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes', perc_identity=90, lcase_masking='true', task='megablast', out=options.output, num_threads=options.threads) #Blast documentation says soft_masking option if not specified defaults to true so even though #this biopython class does not support it the feature should be enabled by default elif options.mito_screen: # Note: new NCBI requirements say to use lcase_masking (See GAAG-510 for documentation) cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes', perc_identity=98.6, lcase_masking='true', task='blastn', out=options.output, num_threads=options.threads) #Blast documentation says soft_masking option if not specified defaults to true so even though #this biopython class does not support it the feature should be enabled by default elif options.rRNA_screen: cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes', perc_identity=95, lcase_masking='', task='megablast', out=options.output, num_threads=options.threads, evalue=1e-9, window_size=120, gapextend=2, gapopen=4, no_greedy='', penalty=-4, reward=3, word_size=12, xdrop_gap=20) #Options not supported: in_pssm='',soft_masking='true', matrix=5000000, max_intron_length=18, db_gencode=3, else: if options.task in TASKS: program = TASKS[options.task] if re.search("Ncbiblastn", str(program)): cmd = program(query=args[1], db=args[0], evalue=options.evalue, outfmt=options.outfmt, out=options.output, num_threads=options.threads, max_target_seqs=options.max_targets, task=task) else: cmd = program(query=args[1], db=args[0], evalue=options.evalue, outfmt=options.outfmt, out=options.output, num_threads=options.threads, max_target_seqs=options.max_targets) else: print "Unrecognized blast task, " + options.task sys.exit(-1) print "Running BLAST command: " + str(cmd) + '\n' out, err = cmd() # print "out: out\terr: err" return 0
def main(): if __check_inputs(options.classify, options.rdp_out): rnammer_cmd = __build_rnammer_cmd(args[0], options.rnammer_out, options.gene, options.superkingdom) rc = RunCommand(rnammer_cmd) print "RUNNING: " + rc.get_command() rc.run_command() if options.classify: rdp_cmd = __build_rdp_cmd(options.rnammer_out, options.rdp_out) rc = RunCommand(rdp_cmd) print "RUNNING: " + rc.get_command() rc.run_command() return 0 else: print "If classifying hits, must supply classify flag and rdp output file." sys.exit(-1)
def main(): delta = args[0] # get information about our inputs reference, query = _get_query_and_reference_from_delta_file(delta) query_seqs = _get_query_sequences(query) reference_list, reference_lengths = _get_reference_details(reference) # get alignment information command = _get_show_tilings_command(options.output, options.id, options.coverage, options.circular, delta) reference_tilings = _get_tilings_information(command) # see if we need circular query information circular_queries = {} if options.circular: command = _get_circular_show_tilings_command(options.output, delta) circular_queries = _get_circular_alignments(command) print "Ordering and orienting using", delta print "Reference", reference print "Query", query # get ono information from our gathered data ono_sequences_list, ono_details_list = _parse_tilings( reference_list, reference_lengths, reference_tilings, query_seqs, circular_queries) # print out our output _write_details_to_file(ono_details_list, options.output + ".ono.details.txt") interim_fasta = options.output + ".interim.fasta" SeqIO.write(ono_sequences_list, interim_fasta, "fasta") make_assembly_command = [ "make_standard_assembly_files.py", "-S", interim_fasta, "-o", options.output + ".ono" ] if options.rename: make_assembly_command += ['-r'] rc = RunCommand(make_assembly_command) print "Executing", rc.get_command() rc.run_command() return 0
def main(): delta = args[0] delta_file = open(delta, 'r') reference, query = delta_file.readline().rstrip().split(" ") query_fasta = SimpleFastaFile(query) query_seqs = query_fasta.get_sequence() if os.path.isfile(options.output + ".interim.fasta"): remove_command = "rm", options.output + ".interim.fasta" rc = RunCommand(remove_command) print rc.get_command() rc.run_command() ono_fasta = SimpleFastaFile(options.output + ".interim.fasta") print "Ordering and orienting using", delta print "Reference", reference print "Query", query arg_list = ['-R', '-v 5', '-V 0', '-u', options.output + '.unplaced'] if options.circular: arg_list += ["-c"] rc = RunCommand(__build_showtiling_command(delta, arg_list)) print "Executing:", rc.get_command() print "" left_cut = 0 for line in rc.run_command().splitlines(): wraps = False if (line.startswith(">")): #IF THERE WAS A WRAP ON THE PREVIOUS REFERENCE SEQUENCE if (left_cut): print left_text ono_fasta.addSeq(left_query + "_left", left_side) ono_fasta.reverseSeq(left_query + "_left") query_seqs[left_query] = 0 ref, bases, null = line.rstrip().split(" ") print "Ordering to", ref left_cut = 0 else: rstart, rend, qstart, qend, cov, id, ori, query = line.rstrip( ).split("\t") #CHECK TO SEE IF THIS IS AN ALIGNMENT WHICH WRAPS AROUND THE END OF A REFERENCE SEQUENCE if (int(rstart) < 0 and options.circular): #print "WRAP", line.rstrip().split("\t") wraps = True arg_list = [ '-R', '-a', '-v 5', '-g -1', '-V 0', '-u', options.output + '.unplaced', '-c' ] rc = RunCommand(__build_showtiling_command(delta, arg_list)) #print "Executing:",rc.get_command() for line in (rc.run_command().splitlines()): align = line.rstrip().split("\t") #print align #print align[0], align[12], query if (int(align[0]) == 1 and align[12] == query): #print "CLEAN WRAP FOUND!", align #PRINT RIGHT SIDE OF QUERY AND STORE LEFT CUT SITE if (ori == "+"): print "\tStoring left side of the query to print at the end of alignments to this reference sequence" left_cut = int(align[2]) - 1 left_side = query_seqs[query][:left_cut] left_query = query left_text = "\tWriting left side of " + query + " (1 to " + str( left_cut ) + ") aligned " + cov + " at " + id + "% identity in the forward orientation." right_cut = int(align[2]) - 1 print "\tWriting right side of " + query + " (" + str( right_cut + 1 ) + " to end) aligned " + cov + " at " + id + "% identity starting at 1 in the forward orientation." right_side = query_seqs[query][right_cut:] ono_fasta.addSeq(query + "_right", right_side) else: print "\tStoring left side of the query to print at the end of alignments to this reference sequence" left_cut = int(align[2]) left_side = query_seqs[query][left_cut:] left_query = query left_text = "\tWriting left side of " + query + "(" + str( left_cut + 1 ) + " to end) aligned " + cov + " at " + id + "% identity in the reverse orientation." right_cut = int(align[2]) print "\tWriting right side of " + query + " (1 to " + str( right_cut ) + ") aligned " + cov, "at", id + "% identity starting at 1 in the reverse orientation." right_side = query_seqs[query][:right_cut] ono_fasta.addSeq(query + "_right", right_side) ono_fasta.reverseSeq(query + "_right") if not left_cut: print "\tIt appears this query sequence overlaps, but no clean cut site was found. Printing as normal." wraps = False #IF THERE IS NO WRAP if (not wraps): if ori == "+": print "\tWriting", query, "aligned", cov, "at", id + "% identity starting at", rstart, "in the forward orientation." ono_fasta.addSeq(query, query_seqs[query]) else: print "\tWriting", query, "aligned", cov, "at", id + "% identity starting at", rstart, "in the reverse orientation." ono_fasta.addSeq(query, query_seqs[query]) ono_fasta.reverseSeq(query) query_seqs[query] = 0 #IF THE LAST REFERENCE HAD A WRAP, PRINT LEFT SIDE if (left_cut): print left_text ono_fasta.addSeq(left_query + "_left", left_side) if "reverse" in left_text: ono_fasta.reverseSeq(left_query + "_left") query_seqs[left_query] = 0 #PRINT ANY UNALIGNED SEQUENCES for contig in query_seqs: if query_seqs[contig]: print "Writing unaligned sequence:", contig ono_fasta.addSeq(contig, query_seqs[contig]) ono_fasta.writeSeqFile() make_assembly_command = [ "make_standard_assembly_files.py", "-S", options.output + ".interim.fasta", "-o", options.output + ".ono" ] if options.rename: make_assembly_command += ['-r'] rc = RunCommand(make_assembly_command) print "Executing", rc.get_command() rc.run_command()
def main(): # set up command cmd = None task = options.task if options.make_blastdb: type = 'nucl' if options.is_protein: type = 'prot' cmd_list = [constant.MAKEBLASTDB, '-in', args[0], '-dbtype', type] rc = RunCommand(cmd_list) print "Running command: " + rc.get_command() + '\n' rc.run_command() if options.vec_screen: cmd = NcbiblastnCommandline(query=args[1], db=args[0], evalue=700, outfmt=options.outfmt, reward=1, penalty=-5, gapopen=3, gapextend=3, dust='yes', searchsp=1750000000000, out=options.output, task=task, num_threads=options.threads) elif options.ncbi_screen: cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes', perc_identity=90, lcase_masking='', task='megablast', out=options.output, num_threads=options.threads) #soft_masking='true', elif options.rRNA_screen: cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes', perc_identity=95, lcase_masking='', task='megablast', out=options.output, num_threads=options.threads, evalue=1e-9, window_size=120, gapextend=2, gapopen=4, no_greedy='', penalty=-4, reward=3, word_size=12, xdrop_gap=20) #Options not supported: in_pssm='',soft_masking='true', matrix=5000000, max_intron_length=18, db_gencode=3, else: if options.task in TASKS: program = TASKS[options.task] if re.search("Ncbiblastn", str(program)): cmd = program(query=args[1], db=args[0], evalue=options.evalue, outfmt=options.outfmt, out=options.output, num_threads=options.threads, max_target_seqs=options.max_targets, task=task) else: cmd = program(query=args[1], db=args[0], evalue=options.evalue, outfmt=options.outfmt, out=options.output, num_threads=options.threads, max_target_seqs=options.max_targets) else: print "Unrecognized blast task, " + options.task sys.exit(-1) print "Running BLAST command: " + str(cmd) + '\n' out, err = cmd() return 0