def main(): time0 = time.time() sys.stdout.write( "\nThis script would join the spades fastg contigs according to the reference." "\nIt would add extra gap nodes (N) and/or overlap nodes (?) in between the connectible nodes and generate " " a new fastg file." "\n" "\nThis is a BETA version:" "\nAlthough it will not produce error connections, it usually replicates the same right connection." "\nDon't be surprised if you find any other bugs.\n") require_commands() global options if not options.which_blast: try_this_bin = os.path.join(GO_DEP_PATH, "ncbi-blast", "blastn") if os.path.isfile(try_this_bin) and executable(try_this_bin): options.which_blast = os.path.split(try_this_bin)[0] if not executable(os.path.join(options.which_blast, "blastn")): sys.stdout.write(os.path.join(options.which_blast, "blastn") + " not accessible!") exit() if not executable(os.path.join(options.which_blast, "makeblastdb")): sys.stdout.write(os.path.join(options.which_blast, "makeblastdb") + " not accessible!") exit() # fastg to fasta fasta_file = options.in_fastg_file del_complementary(fasta_file) # make blast database if not made include_index = check_db(which_blast=options.which_blast) len_db = len(read_fasta(options.reference_fa_base)[1][0]) # make blast new_fasta_matrix = blast_and_call_new_matrix(fasta_file=fasta_file, index_files=include_index, out_file=fasta_file + '.blast_in', len_db=len_db, which_blast=options.which_blast) # write out fastg write_fasta(out_file=fasta_file + '.Ncontigs_added.' + fasta_file.split('.')[-1], matrix=new_fasta_matrix, overwrite=False) remove_temp_files(fasta_file) sys.stdout.write('\n\nTotal cost: '+str(time.time()-time0)+'\n\n')
def require_options(): usage = "Usage: rm_low_coverage_duplicated_contigs.py *.fastg" parser = OptionParser(usage=usage) parser.add_option('--cov-t', dest='coverage_threshold', default=0.12, help='With ratio (coverage of query/coverage of subject) below which, ' 'the query would be exposed to discarded. Default: 0.12') parser.add_option('--len-t', dest='length_threshold', default=0.9, help='With overlap (length of hit of query/ length of query) above which, ' 'the query would be exposed to discarded. Default: 0.9') parser.add_option('--blur', dest='blur_bases', default=False, action='store_true', help='Replace hit low-coverage bases with N.') parser.add_option('--keep-temp', dest='keep_temp', default=False, action='store_true', help='Keep temp blast files.') parser.add_option("--which-blast", dest="which_blast", default="", help="Assign the path to BLAST binary files if not added to the path.") parser.add_option('-o', dest='output_dir', help='Output directory. Default: along with the original file') parser.add_option('-t', '--threads', dest="threads", default=4, type=int, help="Threads of blastn.") options, args = parser.parse_args() if not args: parser.print_help() sys.stdout.write('\n######################################\nERROR: Insufficient REQUIRED arguments!\n\n') exit() if not options.which_blast: try_this_bin = os.path.join(GO_DEP_PATH, "ncbi-blast", "blastn") if os.path.isfile(try_this_bin) and executable(try_this_bin): options.which_blast = os.path.split(try_this_bin)[0] if not executable(os.path.join(options.which_blast, "blastn")): sys.stdout.write(os.path.join(options.which_blast, "blastn") + " not accessible!") exit() if not executable(os.path.join(options.which_blast, "makeblastdb")): sys.stdout.write(os.path.join(options.which_blast, "makeblastdb") + " not accessible!") exit() if options.treat_no_hits not in ["ex_no_con", "ex_no_hit", "keep_all"]: sys.stdout.write('\n\nOption Error: you should choose assign one of "ex_no_con", "ex_no_hit"' ' and "keep_all" to variable treat_no_hits\n') exit() return options, args
def require_options(): usage = "Usage: rm_low_coverage_duplicated_contigs.py *.fastg" parser = ArgumentParser(usage=usage) parser.add_argument( 'assemblies', metavar='assemblies', type=str, nargs='+', help= "Input FASTG format assembly graph files (split the files by spaces).") parser.add_argument( '--cov-t', dest='coverage_threshold', default=0.12, help='With ratio (coverage of query/coverage of subject) below which, ' 'the query would be exposed to discarded. Default: 0.12') parser.add_argument( '--len-t', dest='length_threshold', default=0.9, help= 'With overlap (length of hit of query/ length of query) above which, ' 'the query would be exposed to discarded. Default: 0.9') parser.add_argument('--blur', dest='blur_bases', default=False, action='store_true', help='Replace hit low-coverage bases with N.') parser.add_argument('--keep-temp', dest='keep_temp', default=False, action='store_true', help='Keep temp blast files.') parser.add_argument( "--which-blast", dest="which_blast", default="", help="Assign the path to BLAST binary files if not added to the path.") parser.add_argument( '-o', dest='output_dir', help='Output directory. Default: along with the original file') parser.add_argument('-t', '--threads', dest="threads", default=4, type=int, help="Threads of blastn.") parser.add_argument( "-v", "--version", action="version", version="GetOrganelle v{version}".format(version=get_versions())) options = parser.parse_args() if not options.assemblies: parser.print_help() sys.stdout.write( '\n######################################\nERROR: Insufficient REQUIRED arguments!\n\n' ) exit() if not options.which_blast: try_this_bin = os.path.join(GO_DEP_PATH, "ncbi-blast", "blastn") if os.path.isfile(try_this_bin) and executable(try_this_bin): output, err = subprocess.Popen(try_this_bin + " -version", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate() if "not found" in output.decode("utf8"): sys.stdout.write(output.decode("utf8") + "\n") else: options.which_blast = os.path.split(try_this_bin)[0] if not executable(os.path.join(options.which_blast, "blastn")): sys.stdout.write( os.path.join(options.which_blast, "blastn") + " not accessible!") exit() if not executable(os.path.join(options.which_blast, "makeblastdb")): sys.stdout.write( os.path.join(options.which_blast, "makeblastdb") + " not accessible!") exit() if options.treat_no_hits not in ["ex_no_con", "ex_no_hit", "keep_all"]: sys.stdout.write( '\n\nOption Error: you should choose assign one of "ex_no_con", "ex_no_hit"' ' and "keep_all" to variable treat_no_hits\n') exit() return options, options.assemblies
dest="which_spades", default="", help="Assign the path to SPAdes binary files if not added to the path. " "Default: try GetOrganelleDep/" + SYSTEM_NAME + "/SPAdes first, then $PATH") options, args = parser.parse_args() if not (options.seed_dir and options.fastq_file_1 and options.fastq_file_2 and options.output_sh_file): parser.print_help() sys.stdout.write('\nERROR: Insufficient arguments!\n') exit() if options.fastq_file_1 == options.fastq_file_2: raise IOError("1st fastq file should NOT be the same with 2nd fastq file!") if not options.which_bowtie2: try_this_bin = os.path.join(GO_DEP_PATH, "bowtie2", "bowtie2") if os.path.isfile(try_this_bin) and executable(try_this_bin): options.which_bowtie2 = os.path.split(try_this_bin)[0] if not options.which_spades: try_this_bin = os.path.join(GO_DEP_PATH, "SPAdes", "bin", "spades.py") if os.path.isfile(try_this_bin) and executable(try_this_bin): options.which_spades = os.path.split(try_this_bin)[0] if not executable(os.path.join(options.which_bowtie2, "bowtie2")): sys.stdout.write("Warning: " + os.path.join(options.which_bowtie2, "bowtie2") + " not accessible!") if not executable(os.path.join(options.which_spades, "spades.py")): sys.stdout.write("Warning: " + os.path.join(options.which_spades, "spades.py") + " not accessible!") out_f_h = open(options.output_sh_file + '.sh', 'w')
else: sys.stdout.write("skipped\n") if changed: os.remove(seed_file) # check BLAST and make blast database if os.path.exists(os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast")): files_to_check = ["blastn", "makeblastdb"] for check_f in files_to_check: check_file_path = os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast", check_f) if not os.path.exists(check_file_path): raise EnvironmentError(check_file_path + " not exists!") os.chmod(check_file_path, 0o755) # TODO set overwrite=True if executable(os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast", "makeblastdb")): initialize_notation_database(which_blast=os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast"), overwrite=not RESUME) elif executable("blastn"): initialize_notation_database(which_blast="", overwrite=not RESUME) else: raise EnvironmentError("makeblastdb not found in the $PATH nor in " + os.path.join(DEP_DIR, SYSTEM_NAME, "ncbi-blast") + "!\n" "change directory to GetOrganelle and git clone git://github.com/Kinggerm/GetOrganelleDep\n" "or get BLAST via http://ftp.ncbi.nlm.nih.gov/blast/executables/magicblast/LATEST") # check Bowtie2 and build seed index if os.path.exists(os.path.join(DEP_DIR, SYSTEM_NAME, "bowtie2")): files_to_check = ["bowtie2", "bowtie2-align-l", "bowtie2-build", "bowtie2-build-l"] for check_f in files_to_check: check_file_path = os.path.join(DEP_DIR, SYSTEM_NAME, "bowtie2", check_f) if not os.path.exists(check_file_path):