default="./", help="Directory to write output") parser.add_argument("-m", "--min_len", action="store", dest="min_len", type=int, default=1, help="Minimum length of read to output") args = parser.parse_args() n_regexp = re.compile("N+$") if args.input_se: se_directory, se_prefix, se_extension = split_filename(args.input_se) se_in_fd = open(args.input_se, "r") se_out_file = "%s%s.filtered%s" % (check_path( args.out_dir), se_prefix, se_extension) se_out_fd = open(se_out_file, "w") while True: name, sequence, separator, quality = read_entry(se_in_fd) if name is None: break match = n_regexp.search(sequence) if match is None: se_out_fd.write("%s\n%s\n%s\n%s\n" % (name, sequence, separator, quality)) elif match.start() >= args.min_len: se_out_fd.write("%s\n%s\n%s\n%s\n" %
help="Remove nucleotide substitutions from output(preserve only AA substitutions)") parser.add_argument("-c", "--convert_aa_to_single_letter", action="store_true", dest="convert_to_single_letter", help="Convert aminoacids to single letters") args = parser.parse_args() args.input = make_list_of_path_to_files(args.input) gene_alias_dict = SynDict() if args.gene_alias_file: gene_alias_dict.read(args.gene_alias_file, split_values=False) out_fd = sys.stdout if args.output == "stdout" else open(args.output, "w") summary_dict = TwoLvlDict() for filename in args.input: directory, prefix, extension = split_filename(filename) if args.write_dir_path and args.write_ext: name = filename elif args.write_dir_path: name = (directory + prefix) if directory else prefix elif args.write_ext: name = prefix + extension else: name = prefix if args.suffix_to_remove in name: name = name.replace(args.suffix_to_remove, "") summary_dict[name] = OrderedDict() with open(filename, "r") as file_fd: file_fd.readline() for line in file_fd:
if args.threads == 1: TRF.search_tandem_repeats( args.input_file, matching_weight=args.matching_weight, mismatching_penalty=args.mismatching_penalty, indel_penalty=args.indel_penalty, match_probability=args.matching_probability, indel_probability=args.indel_probability, min_alignment_score=args.min_score, max_period=args.max_period_size, report_flanking_sequences=args.report_flanking_sequences, make_dat_file=True, disable_html_output=args.enable_html_output) trf_report = "%s.%i.%i.%i.%i.%i.%i.%i.dat" % ( split_filename(args.input_file)[1] + split_filename(args.input_file)[2], args.matching_weight, args.mismatching_penalty, args.indel_penalty, args.matching_probability, args.indel_probability, args.min_score, args.max_period_size) TRF.convert_trf_report(trf_report, args.output_prefix) else: TRF.parallel_search_tandem_repeat( args.input_file, args.output_prefix, matching_weight=args.matching_weight, mismatching_penalty=args.mismatching_penalty, indel_penalty=args.indel_penalty, match_probability=args.matching_probability,
parser = argparse.ArgumentParser() parser.add_argument("-i", "--tree_dir", action="store", dest="tree_dir", required=True, type=check_path, help="Directory with trees") parser.add_argument("-f", "--tree_format", action="store", dest="tree_format", default=1, type=int, help="Format of input trees") parser.add_argument("-o", "--output_file", action="store", dest="output_file", default="stdout", help="Output file with leaves of trees. Default: stdout") args = parser.parse_args() out_fd = sys.stdout if args.output_file == "stdout" else open(args.output_file, "w") tree_files_list = os.listdir(args.tree_dir) names_dict = SynDict() for tree_file in tree_files_list: tree_name = split_filename(tree_file)[1] with open("%s%s" % (args.tree_dir, tree_file), "r") as tree_fd: tree = Tree(tree_fd.readline().strip(), format=args.tree_format) leaves_list = [] for node in tree.traverse(): if node.is_leaf(): leaves_list.append(node.name) names_dict[tree_name] = leaves_list names_dict.write(args.outp_fd, splited_values=True) if args.output_file != "stdout": out_fd.close()
abs_path_source_reads.append("%s/%s" % (working_dir, filename)) """ for iteration_index in range(1, args.number_of_iterations): os.chdir(working_dir) iteration = "iteration_%i" % iteration_index iteration_dir = "%s/%s" % (working_dir, iteration) iteration_ref = "%s/%s_reference.fasta" % (iteration_dir, iteration) iteration_ref_index = "%s/%s_reference.idx" % (iteration_dir, iteration) base_prefix = "%s/%s_reference_with_rev_com" % (iteration_dir, iteration) iteration_ref_with_rev_com = "%s/%s_reference_with_rev_com.fasta" % (iteration_dir, iteration) kmer_file = "%s_%i_mer.kmer" % (base_prefix, args.kmer_length) masurca_config_file = "masurca_%s.config" % iteration left_reads_prefix = split_filename(abs_path_left_source_reads)[1] right_reads_prefix = split_filename(abs_path_right_source_reads)[1] left_reads_se = "%s.se.fastq" % left_reads_prefix right_reads_se = "%s.se.fastq" % right_reads_prefix left_reads_filtered = "%s.filtered.fastq" % left_reads_prefix right_reads_filtered = "%s.filtered.fastq" % right_reads_prefix try: os.mkdir(iteration_dir) except OSError: pass shutil.copyfile(iteration_reference_file, iteration_ref) os.chdir(iteration_dir) iteration_reference_dict = SeqIO.index_db(iteration_ref_index, iteration_ref, format="fasta")
help="Suffix of fam files. Default: .fam") parser.add_argument("-o", "--output", action="store", dest="output", default="stdout", help="Suffix of fam files") args = parser.parse_args() out_fd = sys.stdout if args.output == "stdout" else open(args.output, "w") species_list = [] suffix_list = [] if args.use_basename: for filename in sorted(os.listdir(args.input)): dir, basename, ext = split_filename(filename) species_list.append(basename) suffix_list.append("%s" % ext) else: species_list = sorted(args.species_set) suffix_list = [args.suffix for i in range(0, len(species_list))] out_fd.write("#species\tnumber_of_families\tnumber_of_proteins\n") for species, suffix in zip(species_list, suffix_list): fam_dict = SynDict() fam_dict.read("%s%s%s" % (args.input, species, suffix), separator="\t", split_values=True, values_separator=",", key_index=0, value_index=1)