Ejemplo n.º 1
0
def main():
    time0 = time.time()
    sys.stdout.write(
        "\nThis script would join the spades fastg contigs according to the reference."
        "\nIt would add extra gap nodes (N) and/or overlap nodes (?) in between the connectible nodes and generate "
        " a new fastg file."
        "\n"
        "\nThis is a BETA version:"
        "\nAlthough it will not produce error connections, it usually replicates the same right connection."
        "\nDon't be surprised if you find any other bugs.\n")
    require_commands()
    global options
    if not options.which_blast:
        try_this_bin = os.path.join(GO_DEP_PATH, "ncbi-blast", "blastn")
        if os.path.isfile(try_this_bin) and executable(try_this_bin):
            options.which_blast = os.path.split(try_this_bin)[0]
    if not executable(os.path.join(options.which_blast, "blastn")):
        sys.stdout.write(os.path.join(options.which_blast, "blastn") + " not accessible!")
        exit()
    if not executable(os.path.join(options.which_blast, "makeblastdb")):
        sys.stdout.write(os.path.join(options.which_blast, "makeblastdb") + " not accessible!")
        exit()
    # fastg to fasta
    fasta_file = options.in_fastg_file
    del_complementary(fasta_file)
    # make blast database if not made
    include_index = check_db(which_blast=options.which_blast)
    len_db = len(read_fasta(options.reference_fa_base)[1][0])
    # make blast
    new_fasta_matrix = blast_and_call_new_matrix(fasta_file=fasta_file, index_files=include_index, out_file=fasta_file + '.blast_in', len_db=len_db, which_blast=options.which_blast)
    # write out fastg
    write_fasta(out_file=fasta_file + '.Ncontigs_added.' + fasta_file.split('.')[-1], matrix=new_fasta_matrix, overwrite=False)
    remove_temp_files(fasta_file)
    sys.stdout.write('\n\nTotal cost: '+str(time.time()-time0)+'\n\n')
def require_options():
    usage = "Usage: rm_low_coverage_duplicated_contigs.py *.fastg"
    parser = OptionParser(usage=usage)
    parser.add_option('--cov-t', dest='coverage_threshold', default=0.12,
                      help='With ratio (coverage of query/coverage of subject) below which, '
                           'the query would be exposed to discarded. Default: 0.12')
    parser.add_option('--len-t', dest='length_threshold', default=0.9,
                      help='With overlap (length of hit of query/ length of query) above which, '
                           'the query would be exposed to discarded. Default: 0.9')
    parser.add_option('--blur', dest='blur_bases', default=False, action='store_true',
                      help='Replace hit low-coverage bases with N.')
    parser.add_option('--keep-temp', dest='keep_temp', default=False, action='store_true',
                      help='Keep temp blast files.')
    parser.add_option("--which-blast", dest="which_blast", default="",
                      help="Assign the path to BLAST binary files if not added to the path.")
    parser.add_option('-o', dest='output_dir',
                      help='Output directory. Default: along with the original file')
    parser.add_option('-t', '--threads', dest="threads", default=4, type=int,
                      help="Threads of blastn.")
    options, args = parser.parse_args()
    if not args:
        parser.print_help()
        sys.stdout.write('\n######################################\nERROR: Insufficient REQUIRED arguments!\n\n')
        exit()
    if not options.which_blast:
        try_this_bin = os.path.join(GO_DEP_PATH, "ncbi-blast", "blastn")
        if os.path.isfile(try_this_bin) and executable(try_this_bin):
            options.which_blast = os.path.split(try_this_bin)[0]
    if not executable(os.path.join(options.which_blast, "blastn")):
        sys.stdout.write(os.path.join(options.which_blast, "blastn") + " not accessible!")
        exit()
    if not executable(os.path.join(options.which_blast, "makeblastdb")):
        sys.stdout.write(os.path.join(options.which_blast, "makeblastdb") + " not accessible!")
        exit()
    if options.treat_no_hits not in ["ex_no_con", "ex_no_hit", "keep_all"]:
        sys.stdout.write('\n\nOption Error: you should choose assign one of "ex_no_con", "ex_no_hit"'
                         ' and "keep_all" to variable treat_no_hits\n')
        exit()
    return options, args
def require_options():
    usage = "Usage: rm_low_coverage_duplicated_contigs.py *.fastg"
    parser = ArgumentParser(usage=usage)
    parser.add_argument(
        'assemblies',
        metavar='assemblies',
        type=str,
        nargs='+',
        help=
        "Input FASTG format assembly graph files (split the files by spaces).")
    parser.add_argument(
        '--cov-t',
        dest='coverage_threshold',
        default=0.12,
        help='With ratio (coverage of query/coverage of subject) below which, '
        'the query would be exposed to discarded. Default: 0.12')
    parser.add_argument(
        '--len-t',
        dest='length_threshold',
        default=0.9,
        help=
        'With overlap (length of hit of query/ length of query) above which, '
        'the query would be exposed to discarded. Default: 0.9')
    parser.add_argument('--blur',
                        dest='blur_bases',
                        default=False,
                        action='store_true',
                        help='Replace hit low-coverage bases with N.')
    parser.add_argument('--keep-temp',
                        dest='keep_temp',
                        default=False,
                        action='store_true',
                        help='Keep temp blast files.')
    parser.add_argument(
        "--which-blast",
        dest="which_blast",
        default="",
        help="Assign the path to BLAST binary files if not added to the path.")
    parser.add_argument(
        '-o',
        dest='output_dir',
        help='Output directory. Default: along with the original file')
    parser.add_argument('-t',
                        '--threads',
                        dest="threads",
                        default=4,
                        type=int,
                        help="Threads of blastn.")
    parser.add_argument(
        "-v",
        "--version",
        action="version",
        version="GetOrganelle v{version}".format(version=get_versions()))
    options = parser.parse_args()
    if not options.assemblies:
        parser.print_help()
        sys.stdout.write(
            '\n######################################\nERROR: Insufficient REQUIRED arguments!\n\n'
        )
        exit()
    if not options.which_blast:
        try_this_bin = os.path.join(GO_DEP_PATH, "ncbi-blast", "blastn")
        if os.path.isfile(try_this_bin) and executable(try_this_bin):
            output, err = subprocess.Popen(try_this_bin + " -version",
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.STDOUT,
                                           shell=True).communicate()
            if "not found" in output.decode("utf8"):
                sys.stdout.write(output.decode("utf8") + "\n")
            else:
                options.which_blast = os.path.split(try_this_bin)[0]
    if not executable(os.path.join(options.which_blast, "blastn")):
        sys.stdout.write(
            os.path.join(options.which_blast, "blastn") + " not accessible!")
        exit()
    if not executable(os.path.join(options.which_blast, "makeblastdb")):
        sys.stdout.write(
            os.path.join(options.which_blast, "makeblastdb") +
            " not accessible!")
        exit()
    if options.treat_no_hits not in ["ex_no_con", "ex_no_hit", "keep_all"]:
        sys.stdout.write(
            '\n\nOption Error: you should choose assign one of "ex_no_con", "ex_no_hit"'
            ' and "keep_all" to variable treat_no_hits\n')
        exit()
    return options, options.assemblies
    dest="which_spades",
    default="",
    help="Assign the path to SPAdes binary files if not added to the path. "
    "Default: try GetOrganelleDep/" + SYSTEM_NAME +
    "/SPAdes first, then $PATH")
options, args = parser.parse_args()
if not (options.seed_dir and options.fastq_file_1 and options.fastq_file_2
        and options.output_sh_file):
    parser.print_help()
    sys.stdout.write('\nERROR: Insufficient arguments!\n')
    exit()
if options.fastq_file_1 == options.fastq_file_2:
    raise IOError("1st fastq file should NOT be the same with 2nd fastq file!")
if not options.which_bowtie2:
    try_this_bin = os.path.join(GO_DEP_PATH, "bowtie2", "bowtie2")
    if os.path.isfile(try_this_bin) and executable(try_this_bin):
        options.which_bowtie2 = os.path.split(try_this_bin)[0]
if not options.which_spades:
    try_this_bin = os.path.join(GO_DEP_PATH, "SPAdes", "bin", "spades.py")
    if os.path.isfile(try_this_bin) and executable(try_this_bin):
        options.which_spades = os.path.split(try_this_bin)[0]
if not executable(os.path.join(options.which_bowtie2, "bowtie2")):
    sys.stdout.write("Warning: " +
                     os.path.join(options.which_bowtie2, "bowtie2") +
                     " not accessible!")
if not executable(os.path.join(options.which_spades, "spades.py")):
    sys.stdout.write("Warning: " +
                     os.path.join(options.which_spades, "spades.py") +
                     " not accessible!")

out_f_h = open(options.output_sh_file + '.sh', 'w')
Ejemplo n.º 5
0
            else:
                sys.stdout.write("skipped\n")
            if changed:
                os.remove(seed_file)


# check BLAST and make blast database
if os.path.exists(os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast")):
    files_to_check = ["blastn", "makeblastdb"]
    for check_f in files_to_check:
        check_file_path = os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast", check_f)
        if not os.path.exists(check_file_path):
            raise EnvironmentError(check_file_path + " not exists!")
        os.chmod(check_file_path, 0o755)
# TODO set overwrite=True
if executable(os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast", "makeblastdb")):
    initialize_notation_database(which_blast=os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast"), overwrite=not RESUME)
elif executable("blastn"):
    initialize_notation_database(which_blast="", overwrite=not RESUME)
else:
    raise EnvironmentError("makeblastdb not found in the $PATH nor in " +
                           os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast") + "!\n"
                           "change directory to GetOrganelle and git clone git://github.com/Kinggerm/GetOrganelleDep\n"
                           "or get BLAST via http://ftp.ncbi.nlm.nih.gov/blast/executables/magicblast/LATEST")

# check Bowtie2 and build seed index
if os.path.exists(os.path.join(DEP_DIR, SYSTEM_NAME, "bowtie2")):
    files_to_check = ["bowtie2", "bowtie2-align-l", "bowtie2-build", "bowtie2-build-l"]
    for check_f in files_to_check:
        check_file_path = os.path.join(DEP_DIR, SYSTEM_NAME, "bowtie2", check_f)
        if not os.path.exists(check_file_path):