dest="label_ids", help="Label ids by species names. Default - don't label") parser.add_argument("-g", "--separator_for_labeling", action="store", dest="separator_for_labeling", default="@", help="Separator to use for labeling. Default - '@'") parser.add_argument("-r", "--label_last", action="store_false", dest="label_first", default=True, help="Place label at the end of id") args = parser.parse_args() FileRoutines.safe_mkdir(args.output_dir) SequenceClusterRoutines.extract_sequences_by_clusters( args.input_cluster_dir, args.input_seq_dir, args.output_dir, file_with_white_list_cluster_ids=args.white_list_ids, mode=args.mode, sequence_file_extension=args.seq_extension, sequence_file_format=args.format, label_species=args.label_ids, separator_for_labeling=args.separator_for_labeling, species_label_first=args.label_first)
print("Drawing histograms...") for stat_file in output_evidence_stats, output_supported_stats, \ output_swissprot_pfam_or_hints_supported_transcripts_longest_pep_evidence, \ output_swissprot_pfam_and_hints_supported_transcripts_longest_pep_evidence, \ output_swissprot_pfam_or_hints_supported_transcripts_evidence, \ output_swissprot_pfam_and_hints_supported_transcripts_evidence: MatplotlibRoutines.percent_histogram_from_file( stat_file, stat_file, data_type=None, column_list=(2, ), comments="#", n_bins=20, title="Transcript support by hints", extensions=("png", "svg"), legend_location="upper center", stats_as_legend=True) print("Creating final directories...") if args.pfam_db and args.swissprot_db: db_or_hints_dir = "supported_by_db_or_hints/" db_and_hints_dir = "supported_by_db_and_hints/" for directory in db_and_hints_dir, db_or_hints_dir: FileRoutines.safe_mkdir(directory) os.system("mv %s.supported.transcripts.swissprot_or_pfam_or_hints* %s" % (args.output, db_or_hints_dir)) os.system("mv %s.supported.transcripts.swissprot_or_pfam_and_hints* %s" % (args.output, db_and_hints_dir))
"--min_species_number", action="store", dest="min_species_number", default=1, type=int, help="Minimum number of species with family to retain family. Default: 1") parser.add_argument("-f", "--filtered_families_directory", action="store", dest="filtered_family_dir", default="filtered_fam", type=FileRoutines.check_path, help="Directory to write filtered_families") args = parser.parse_args() FileRoutines.safe_mkdir(args.filtered_family_dir) species_list = sorted(args.species_set) if args.white_list_file and args.black_list_file: raise ValueError("Black list and white list cant be set simultaneously") black_list = IdList() white_list = IdList() if args.black_list_file: black_list.read(args.black_list_file) if args.white_list_file: white_list.read(args.white_list_file) out_fd = open(args.cafe_file, "w") filtered_fd = open("%sfiltered_families.cafe" % args.filtered_family_dir, "w") out_fd.write("FAMILYDESC\tFAMILY\t%s\n" % ("\t".join(species_list))) filtered_fd.write("FAMILYDESC\tFAMILY\t%s\n" % ("\t".join(species_list))) species_filtered_fd_list = OrderedDict()
parser.add_argument("-d", "--top_hits_dir", action="store", dest="top_hits_dir", default="top_hits_dir/", type=FileRoutines.check_path, help="Directory to write intermediate(splited) output") parser.add_argument("-r", "--retain_splited_output", action="store_true", dest="retain", help="Retain splited output") args = parser.parse_args() FileRoutines.safe_mkdir(args.top_hits_dir) def handle_input(filename): sys.stdout.write("Handling %s\n" % filename) not_significant_ids = IdList() not_found_ids = IdList() prefix = FileRoutines.split_filename(filename)[1] index_file = "%s.tmp.idx" % prefix hmm_dict = SearchIO.index_db(index_file, filename, args.format) if args.output == "stdout": out_fd = sys.stdout else: out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w") out_fd.write("#query\thit\tevalue\tbitscore\n")
action="store", dest="max_memory_per_thread", default="1G", help="Maximum memory per thread. Default - 1G") args = parser.parse_args() if args.prepare_bam and ((not args.prepared_bam_prefix) or (not args.temp_dir)): raise ValueError( "Options -e/--prepared_bam_prefix and -m/--temp_dir must be set if -p/--prepare_bam option is used" ) SamtoolsV1.threads = args.threads if args.prepare_bam or args.mix_ends: FileRoutines.safe_mkdir(FileRoutines.check_path(args.temp_dir)) prepared_pe_bam_file = "%s.bam" % args.prepared_bam_prefix prepared_unpaired_bam_file = ( "%s.unpaired.bam" % args.prepared_bam_prefix) if args.mix_ends else None """ SamtoolsV1.prepare_bam_for_read_extraction(args.input, args.prepared_bam, temp_file_prefix=args.temp_dir, max_memory_per_thread=args.max_memory_per_thread) """ SamtoolsV1.prepare_bam_for_read_extraction( args.input, prepared_pe_bam_file, temp_file_prefix=args.temp_dir, max_memory_per_thread=args.max_memory_per_thread, bam_file_to_write_unpaired_reads=prepared_unpaired_bam_file) if args.paired:
dest="output", type=FileRoutines.check_path, help="Output directory") #parser.add_argument("-p", "--convert_options", action="store", dest="convert_options", # help="Options for convert") parser.add_argument("-d", "--dont_make_negative", action="store_true", dest="dont_negative", help="Dont make negative") args = parser.parse_args() temp_dir = "temp/" FileRoutines.safe_mkdir(temp_dir) BioConvert.threads = args.threads Convert.threads = args.threads BioConvert.parallel_convert(args.input, temp_dir) if args.dont_negative: os.rename(temp_dir, args.output) else: converted_files = os.listdir(temp_dir) converted_files = list( map(lambda s: "%s%s" % (temp_dir, s), converted_files)) Convert.parallel_convert( converted_files, args.output,
STAR.path = args.star_dir if args.genome_fasta: STAR.index(args.genome_dir, args.genome_fasta, annotation_gtf=args.annotation_gtf, junction_tab_file=args.junction_tab_file, sjdboverhang=None, genomeSAindexNbases=None, genomeChrBinNbits=None, genome_size=args.genome_size) sample_list = args.samples if args.samples else Pipeline.get_sample_list( args.samples_dir) FileRoutines.safe_mkdir(args.output_dir) for sample in sample_list: print("Handling %s" % sample) sample_dir = "%s/%s/" % (args.samples_dir, sample) alignment_sample_dir = "%s/%s/" % (args.output_dir, sample) FileRoutines.safe_mkdir(alignment_sample_dir) filetypes, forward_files, reverse_files, se_files = FileRoutines.make_lists_forward_and_reverse_files( sample_dir) print("\tAligning reads...") STAR.align_miRNA( args.genome_dir, se_files, output_dir=alignment_sample_dir,
args = parser.parse_args() Trimmomatic.jar_path = args.path_to_trimmomatic_dir Trimmomatic.threads = args.threads #print(Trimmomatic.path) #print(Trimmomatic.jar_path) samples = args.samples.split(",") if args.samples else os.listdir(args.samples_dir) for sample in samples: print("Handling %s" % sample) sample_dir = "%s%s/" % (args.samples_dir, sample) sample_out_dir = "%s%s/" % (args.output_dir, sample) FileRoutines.safe_mkdir(sample_out_dir) trimmomatic_log = "%s/trimmomatic.log" % sample_out_dir trimmomatic_time_log = "%s/trimmomatic.time.log" % sample_out_dir output_prefix = "%s%s.TMF" % (sample_out_dir, sample) files_from_sample_dir = os.listdir(sample_dir) left_reads_file = None right_reads_file = None for filename in files_from_sample_dir: if ("_1.fq" in filename) or ("_1.fastq" in filename): left_reads_file = filename elif ("_2.fq" in filename) or ("_2.fastq" in filename): right_reads_file = filename if (left_reads_file is None) and (right_reads_file is None):
dest="blast_dir", default="", help="Directory with BLAST+ binaries") args = parser.parse_args() input_filename_list = FileRoutines.split_filename(args.input) input_filename = input_filename_list[1] + input_filename_list[2] workdir_dir = "%s.transdecoder_dir/" % input_filename pep_from_longest_orfs = "%s/longest_orfs.pep" % workdir_dir hmmscan_dir = "hmmscan_vs_pfam/" blastp_dir = "blastp_vs_uniref/" FileRoutines.safe_mkdir(hmmscan_dir) FileRoutines.safe_mkdir(blastp_dir) hmmscan_splited_fasta_dir = "%ssplited_fasta_dir/" % hmmscan_dir splited_domtblout_dir = "%ssplited_domtblout_dir/" % hmmscan_dir hmmscan_vs_pfam_prefix = "%s.pfam" % input_filename hmmscan_vs_pfam_output = "%s/%s.hits" % (hmmscan_dir, hmmscan_vs_pfam_prefix) domtblout_outfile = "%s/%s.domtblout" % (hmmscan_dir, hmmscan_vs_pfam_prefix) blastp_outfile = "%s%s.blastp.hits" % ( blastp_dir, input_filename) if args.blast_database else None blastp_split_dir = "%ssplited_fasta_dir/" % blastp_dir blastp_splited_output_dir = "%ssplited_output_dir" % blastp_dir HMMER3.path = args.hmmer_dir HMMER3.threads = args.threads