parser.add_argument("-s", "--suffix_to_remove", action="store", dest="suffix_to_remove", help="Suffix to remove from filename(not count extension)") parser.add_argument("-g", "--gene_alias_file", action="store", dest="gene_alias_file", help="File with aliases of genes (tab-separated, one alias per gene, no header)") parser.add_argument("-w", "--write_dir_path", action="store_true", dest="write_dir_path", help="write directory name(if directory is source of vcf files) in output file. Default: false") parser.add_argument("-e", "--write_ext", action="store_true", dest="write_ext", help="write extensions of vcf files in output file. Default: false") parser.add_argument("-r", "--remove_nucleotide_substitutions", action="store_true", dest="rem_nuc_sub", help="Remove nucleotide substitutions from output(preserve only AA substitutions)") parser.add_argument("-c", "--convert_aa_to_single_letter", action="store_true", dest="convert_to_single_letter", help="Convert aminoacids to single letters") args = parser.parse_args() args.input = make_list_of_path_to_files(args.input) gene_alias_dict = SynDict() if args.gene_alias_file: gene_alias_dict.read(args.gene_alias_file, split_values=False) out_fd = sys.stdout if args.output == "stdout" else open(args.output, "w") summary_dict = TwoLvlDict() for filename in args.input: directory, prefix, extension = split_filename(filename) if args.write_dir_path and args.write_ext: name = filename elif args.write_dir_path: name = (directory + prefix) if directory else prefix elif args.write_ext:
#!/usr/bin/env python __author__ = 'Sergei F. Kliver' import argparse from RouToolPa.Tools.Abstract import Tool from RouToolPa.Routines.File import make_list_of_path_to_files parser = argparse.ArgumentParser() parser.add_argument( "-a", "--group_a_list", action="store", dest="group_a_list", required=True, type=lambda s: make_list_of_path_to_files(s.split(",")), help="Comma-separated list of files/directories with ids from group A") parser.add_argument( "-b", "--group_b_list", action="store", dest="group_b_list", required=True, type=lambda s: make_list_of_path_to_files(s.split(",")), help="Comma-separated list of files/directories with ids from group B") parser.add_argument("-o", "--output", action="store", dest="output", help="Output file with allowed ids. Default - stdout") parser.add_argument( "-m",
"--write_dir_path", action="store_true", dest="write_dir_path", help= "Write directory name(if directory is source of files) in output file. Default: false" ) parser.add_argument( "-e", "--write_ext", action="store_true", dest="write_ext", help= "Write extensions of files with sequences in output file. Default: false") args = parser.parse_args() files_list = sorted(make_list_of_path_to_files(args.input, file_filter)) out_fd = sys.stdout if args.output == "stdout" else open(args.output, "w") if args.write_header: out_fd.write("#file/sample\tnumber_of_sequences\n") for filename in files_list: if args.output != "stdout": print("Counting variants in %s ..." % filename) directory, prefix, extension = split_filename(filename) filetype = detect_filetype_by_extension(filename) number_of_sequences = 0 with open(filename, "r") as seq_fd: try: for record in SeqIO.parse(seq_fd, filetype): number_of_sequences += 1