Beispiel #1
0
def main():

    readoids = options.output + ".readoids.fasta"
    rc = RunCommand([
        'create_readoids.py', '-l',
        str(options.rd_ln), '-d',
        str(options.distance), '-w',
        str(options.window_size), '-o', readoids, args[1]
    ])
    sys.stderr.write("Running:  " + rc.get_command())
    rc.run_command()

    detail_output = options.output + ".scaffold_accuracy"
    rc = RunCommand(['run_nucmer.py', '-p', detail_output, args[0], readoids])
    sys.stderr.write("Running:  " + rc.get_command())
    rc.run_command()

    coords_file = detail_output + ".coords"
    table_output = options.table + ".scaffold_accuracy"
    rc = RunCommand([
        'assess_readoids_coords.py', '-c', coords_file, '-p', readoids, '-f',
        args[1], '-r', args[0], '-v', detail_output, '-l',
        str(options.rd_ln), '-d',
        str(options.distance), '-e',
        str(options.error), '-o', table_output
    ])
    sys.stderr.write("Running:  " + rc.get_command())
    rc.run_command()

    return 0
Beispiel #2
0
def main():
    # set up command
    cmd = None
    task = options.task

    if options.make_blastdb:
        type = 'nucl'
        if options.is_protein:
            type = 'prot'
        cmd_list = [constant.MAKEBLASTDB, '-in', args[0], '-dbtype', type]
        rc = RunCommand(cmd_list)
        print "Running command:  " + rc.get_command() + '\n'
        rc.run_command()

    if options.vec_screen:
        cmd = NcbiblastnCommandline(query=args[1], db=args[0], evalue=700, outfmt=options.outfmt, reward=1,
            penalty=-5, gapopen=3, gapextend=3, dust='yes', searchsp=1750000000000,
            out=options.output, task=task, num_threads=options.threads)
    elif options.ncbi_screen:
        # Note: new NCBI requirements say to use lcase_masking (See GAAG-510 for documentation)
        cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes',
            perc_identity=90, lcase_masking='true', task='megablast',
            out=options.output, num_threads=options.threads)
        #Blast documentation says soft_masking option if not specified defaults to true so even though
        #this biopython class does not support it the feature should be enabled by default
    elif options.mito_screen:
        # Note: new NCBI requirements say to use lcase_masking (See GAAG-510 for documentation)
        cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes',
            perc_identity=98.6, lcase_masking='true', task='blastn',
            out=options.output, num_threads=options.threads)
        #Blast documentation says soft_masking option if not specified defaults to true so even though
        #this biopython class does not support it the feature should be enabled by default
    elif options.rRNA_screen:
        cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes',
            perc_identity=95, lcase_masking='', task='megablast',
            out=options.output, num_threads=options.threads, evalue=1e-9, window_size=120, gapextend=2, gapopen=4,
            no_greedy='', penalty=-4, reward=3, word_size=12, xdrop_gap=20)
            #Options not supported: in_pssm='',soft_masking='true', matrix=5000000, max_intron_length=18, db_gencode=3,
    else:
        if options.task in TASKS:
            program = TASKS[options.task]
            if re.search("Ncbiblastn", str(program)):
                cmd = program(query=args[1], db=args[0], evalue=options.evalue,
                    outfmt=options.outfmt, out=options.output,
                    num_threads=options.threads, max_target_seqs=options.max_targets,
                    task=task)
            else:
                cmd = program(query=args[1], db=args[0], evalue=options.evalue,
                    outfmt=options.outfmt, out=options.output,
                    num_threads=options.threads, max_target_seqs=options.max_targets)
        else:
            print "Unrecognized blast task, " + options.task
            sys.exit(-1)

    print "Running BLAST command:  " + str(cmd) + '\n'
    out, err = cmd()
#    print "out: out\terr: err"

    return 0
Beispiel #3
0
def main():

    if __check_inputs(options.classify, options.rdp_out):
        rnammer_cmd = __build_rnammer_cmd(args[0], options.rnammer_out,
                                          options.gene, options.superkingdom)
        rc = RunCommand(rnammer_cmd)
        print "RUNNING:  " + rc.get_command()
        rc.run_command()

        if options.classify:
            rdp_cmd = __build_rdp_cmd(options.rnammer_out, options.rdp_out)
            rc = RunCommand(rdp_cmd)
            print "RUNNING:  " + rc.get_command()
            rc.run_command()

        return 0

    else:
        print "If classifying hits, must supply classify flag and rdp output file."
        sys.exit(-1)
Beispiel #4
0
def main():

    delta = args[0]

    # get information about our inputs
    reference, query = _get_query_and_reference_from_delta_file(delta)
    query_seqs = _get_query_sequences(query)
    reference_list, reference_lengths = _get_reference_details(reference)

    # get alignment information
    command = _get_show_tilings_command(options.output, options.id,
                                        options.coverage, options.circular,
                                        delta)
    reference_tilings = _get_tilings_information(command)

    # see if we need circular query information
    circular_queries = {}
    if options.circular:
        command = _get_circular_show_tilings_command(options.output, delta)
        circular_queries = _get_circular_alignments(command)

    print "Ordering and orienting using", delta
    print "Reference", reference
    print "Query", query

    # get ono information from our gathered data
    ono_sequences_list, ono_details_list = _parse_tilings(
        reference_list, reference_lengths, reference_tilings, query_seqs,
        circular_queries)

    # print out our output
    _write_details_to_file(ono_details_list,
                           options.output + ".ono.details.txt")
    interim_fasta = options.output + ".interim.fasta"
    SeqIO.write(ono_sequences_list, interim_fasta, "fasta")
    make_assembly_command = [
        "make_standard_assembly_files.py", "-S", interim_fasta, "-o",
        options.output + ".ono"
    ]

    if options.rename:
        make_assembly_command += ['-r']

    rc = RunCommand(make_assembly_command)
    print "Executing", rc.get_command()
    rc.run_command()

    return 0
Beispiel #5
0
def main():

    delta = args[0]
    delta_file = open(delta, 'r')
    reference, query = delta_file.readline().rstrip().split(" ")
    query_fasta = SimpleFastaFile(query)
    query_seqs = query_fasta.get_sequence()

    if os.path.isfile(options.output + ".interim.fasta"):
        remove_command = "rm", options.output + ".interim.fasta"
        rc = RunCommand(remove_command)
        print rc.get_command()
        rc.run_command()

    ono_fasta = SimpleFastaFile(options.output + ".interim.fasta")

    print "Ordering and orienting using", delta
    print "Reference", reference
    print "Query", query

    arg_list = ['-R', '-v 5', '-V 0', '-u', options.output + '.unplaced']
    if options.circular:
        arg_list += ["-c"]
    rc = RunCommand(__build_showtiling_command(delta, arg_list))
    print "Executing:", rc.get_command()
    print ""

    left_cut = 0

    for line in rc.run_command().splitlines():

        wraps = False
        if (line.startswith(">")):

            #IF THERE WAS A WRAP ON THE PREVIOUS REFERENCE SEQUENCE
            if (left_cut):
                print left_text
                ono_fasta.addSeq(left_query + "_left", left_side)
                ono_fasta.reverseSeq(left_query + "_left")
                query_seqs[left_query] = 0

            ref, bases, null = line.rstrip().split(" ")
            print "Ordering to", ref
            left_cut = 0

        else:

            rstart, rend, qstart, qend, cov, id, ori, query = line.rstrip(
            ).split("\t")
            #CHECK TO SEE IF THIS IS AN ALIGNMENT WHICH WRAPS AROUND THE END OF A REFERENCE SEQUENCE
            if (int(rstart) < 0 and options.circular):
                #print "WRAP", line.rstrip().split("\t")
                wraps = True

                arg_list = [
                    '-R', '-a', '-v 5', '-g -1', '-V 0', '-u',
                    options.output + '.unplaced', '-c'
                ]
                rc = RunCommand(__build_showtiling_command(delta, arg_list))
                #print "Executing:",rc.get_command()

                for line in (rc.run_command().splitlines()):
                    align = line.rstrip().split("\t")
                    #print align
                    #print align[0], align[12], query
                    if (int(align[0]) == 1 and align[12] == query):
                        #print "CLEAN WRAP FOUND!", align
                        #PRINT RIGHT SIDE OF QUERY AND STORE LEFT CUT SITE
                        if (ori == "+"):
                            print "\tStoring left side of the query to print at the end of alignments to this reference sequence"
                            left_cut = int(align[2]) - 1
                            left_side = query_seqs[query][:left_cut]
                            left_query = query
                            left_text = "\tWriting left side of " + query + " (1 to " + str(
                                left_cut
                            ) + ") aligned " + cov + " at " + id + "% identity in the forward orientation."

                            right_cut = int(align[2]) - 1
                            print "\tWriting right side of " + query + " (" + str(
                                right_cut + 1
                            ) + " to end) aligned " + cov + " at " + id + "% identity starting at 1 in the forward orientation."
                            right_side = query_seqs[query][right_cut:]
                            ono_fasta.addSeq(query + "_right", right_side)
                        else:
                            print "\tStoring left side of the query to print at the end of alignments to this reference sequence"
                            left_cut = int(align[2])
                            left_side = query_seqs[query][left_cut:]
                            left_query = query
                            left_text = "\tWriting left side of " + query + "(" + str(
                                left_cut + 1
                            ) + " to end) aligned " + cov + " at " + id + "% identity in the reverse orientation."

                            right_cut = int(align[2])
                            print "\tWriting right side of " + query + " (1 to " + str(
                                right_cut
                            ) + ") aligned " + cov, "at", id + "% identity starting at 1 in the reverse orientation."
                            right_side = query_seqs[query][:right_cut]
                            ono_fasta.addSeq(query + "_right", right_side)
                            ono_fasta.reverseSeq(query + "_right")

                if not left_cut:
                    print "\tIt appears this query sequence overlaps, but no clean cut site was found.  Printing as normal."
                    wraps = False

            #IF THERE IS NO WRAP
            if (not wraps):
                if ori == "+":
                    print "\tWriting", query, "aligned", cov, "at", id + "% identity starting at", rstart, "in the forward orientation."
                    ono_fasta.addSeq(query, query_seqs[query])
                else:
                    print "\tWriting", query, "aligned", cov, "at", id + "% identity starting at", rstart, "in the reverse orientation."
                    ono_fasta.addSeq(query, query_seqs[query])
                    ono_fasta.reverseSeq(query)
                query_seqs[query] = 0

    #IF THE LAST REFERENCE HAD A WRAP, PRINT LEFT SIDE
    if (left_cut):
        print left_text
        ono_fasta.addSeq(left_query + "_left", left_side)
        if "reverse" in left_text:
            ono_fasta.reverseSeq(left_query + "_left")
        query_seqs[left_query] = 0

    #PRINT ANY UNALIGNED SEQUENCES
    for contig in query_seqs:
        if query_seqs[contig]:
            print "Writing unaligned sequence:", contig
            ono_fasta.addSeq(contig, query_seqs[contig])

    ono_fasta.writeSeqFile()
    make_assembly_command = [
        "make_standard_assembly_files.py", "-S",
        options.output + ".interim.fasta", "-o", options.output + ".ono"
    ]
    if options.rename:
        make_assembly_command += ['-r']
    rc = RunCommand(make_assembly_command)
    print "Executing", rc.get_command()
    rc.run_command()
Beispiel #6
0
def main():
    # set up command
    cmd = None
    task = options.task

    if options.make_blastdb:
        type = 'nucl'
        if options.is_protein:
            type = 'prot'
        cmd_list = [constant.MAKEBLASTDB, '-in', args[0], '-dbtype', type]
        rc = RunCommand(cmd_list)
        print "Running command:  " + rc.get_command() + '\n'
        rc.run_command()

    if options.vec_screen:
        cmd = NcbiblastnCommandline(query=args[1],
                                    db=args[0],
                                    evalue=700,
                                    outfmt=options.outfmt,
                                    reward=1,
                                    penalty=-5,
                                    gapopen=3,
                                    gapextend=3,
                                    dust='yes',
                                    searchsp=1750000000000,
                                    out=options.output,
                                    task=task,
                                    num_threads=options.threads)
    elif options.ncbi_screen:
        cmd = NcbiblastnCommandline(query=args[1],
                                    db=args[0],
                                    outfmt=options.outfmt,
                                    dust='yes',
                                    perc_identity=90,
                                    lcase_masking='',
                                    task='megablast',
                                    out=options.output,
                                    num_threads=options.threads)
        #soft_masking='true',
    elif options.rRNA_screen:
        cmd = NcbiblastnCommandline(query=args[1],
                                    db=args[0],
                                    outfmt=options.outfmt,
                                    dust='yes',
                                    perc_identity=95,
                                    lcase_masking='',
                                    task='megablast',
                                    out=options.output,
                                    num_threads=options.threads,
                                    evalue=1e-9,
                                    window_size=120,
                                    gapextend=2,
                                    gapopen=4,
                                    no_greedy='',
                                    penalty=-4,
                                    reward=3,
                                    word_size=12,
                                    xdrop_gap=20)
        #Options not supported: in_pssm='',soft_masking='true', matrix=5000000, max_intron_length=18, db_gencode=3,
    else:
        if options.task in TASKS:
            program = TASKS[options.task]
            if re.search("Ncbiblastn", str(program)):
                cmd = program(query=args[1],
                              db=args[0],
                              evalue=options.evalue,
                              outfmt=options.outfmt,
                              out=options.output,
                              num_threads=options.threads,
                              max_target_seqs=options.max_targets,
                              task=task)
            else:
                cmd = program(query=args[1],
                              db=args[0],
                              evalue=options.evalue,
                              outfmt=options.outfmt,
                              out=options.output,
                              num_threads=options.threads,
                              max_target_seqs=options.max_targets)
        else:
            print "Unrecognized blast task, " + options.task
            sys.exit(-1)

    print "Running BLAST command:  " + str(cmd) + '\n'
    out, err = cmd()

    return 0