def handle_input(filename): sys.stdout.write("Handling %s\n" % filename) not_significant_ids = IdList() not_found_ids = IdList() prefix = FileRoutines.split_filename(filename)[1] index_file = "%s.tmp.idx" % prefix hmm_dict = SearchIO.index_db(index_file, filename, args.format) if args.output == "stdout": out_fd = sys.stdout else: out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w") out_fd.write("#query\thit\tevalue\tbitscore\n") for query in hmm_dict: if hmm_dict[query].hits: if hmm_dict[query][0].is_included: out_fd.write( "%s\t%s\t%s\t%s\n" % (query, hmm_dict[query][0].id, hmm_dict[query][0].evalue, hmm_dict[query][0].bitscore)) else: not_significant_ids.append(query) else: not_found_ids.append(query) if args.output != "stdout": out_fd.close() os.remove(index_file) return not_significant_ids, not_found_ids
def handle_input(filename): sys.stdout.write("Handling %s\n" % filename) prefix = FileRoutines.split_filename(filename)[1] index_file = "%s.tmp.idx" % prefix hmm_dict = SearchIO.index_db(index_file, filename, args.format) if args.output == "stdout": out_fd = sys.stdout else: out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w") out_fd.write("#query\thit\tevalue\tbitscore\n") for family in hmm_dict: #print hmm_dict[key] for hit in hmm_dict[family]: if hit.is_included: out_fd.write("%s\t%s\t%s\t%s\n" % (family, hit.id, hit.evalue, hit.bitscore)) if args.output != "stdout": out_fd.close() os.remove(index_file)
action="store_true", dest="header", default=False, help="Set if header is present in input file") parser.add_argument("-u", "--use_column_value_as_prefix", action="store_true", dest="use_column_value_as_prefix", default=False, help="Use column value as prefix for output files") parser.add_argument( "-r", "--sorted_input", action="store_true", dest="sorted_input", default=False, help= "Input file is sorted. Do it to reduce number of simultaneously opened files" ) args = parser.parse_args() FileRoutines.split_by_column( args.input_file, args.column_number, separator=args.separator, header=args.header, outfile_prefix=args.output_prefix, use_column_value_as_prefix=args.use_column_value_as_prefix, sorted_input=args.sorted_input)
print("Drawing histograms...") for stat_file in output_evidence_stats, output_supported_stats, \ output_swissprot_pfam_or_hints_supported_transcripts_longest_pep_evidence, \ output_swissprot_pfam_and_hints_supported_transcripts_longest_pep_evidence, \ output_swissprot_pfam_or_hints_supported_transcripts_evidence, \ output_swissprot_pfam_and_hints_supported_transcripts_evidence: MatplotlibRoutines.percent_histogram_from_file( stat_file, stat_file, data_type=None, column_list=(2, ), comments="#", n_bins=20, title="Transcript support by hints", extensions=("png", "svg"), legend_location="upper center", stats_as_legend=True) print("Creating final directories...") if args.pfam_db and args.swissprot_db: db_or_hints_dir = "supported_by_db_or_hints/" db_and_hints_dir = "supported_by_db_and_hints/" for directory in db_and_hints_dir, db_or_hints_dir: FileRoutines.safe_mkdir(directory) os.system("mv %s.supported.transcripts.swissprot_or_pfam_or_hints* %s" % (args.output, db_or_hints_dir)) os.system("mv %s.supported.transcripts.swissprot_or_pfam_and_hints* %s" % (args.output, db_and_hints_dir))
dest="label_ids", help="Label ids by species names. Default - don't label") parser.add_argument("-g", "--separator_for_labeling", action="store", dest="separator_for_labeling", default="@", help="Separator to use for labeling. Default - '@'") parser.add_argument("-r", "--label_last", action="store_false", dest="label_first", default=True, help="Place label at the end of id") args = parser.parse_args() FileRoutines.safe_mkdir(args.output_dir) SequenceClusterRoutines.extract_sequences_by_clusters( args.input_cluster_dir, args.input_seq_dir, args.output_dir, file_with_white_list_cluster_ids=args.white_list_ids, mode=args.mode, sequence_file_extension=args.seq_extension, sequence_file_format=args.format, label_species=args.label_ids, separator_for_labeling=args.separator_for_labeling, species_label_first=args.label_first)
import os import argparse from RouToolPa.Tools.Filter import FaCut #from RouToolPa.Tools.Filter import FastQC from RouToolPa.Routines import FileRoutines parser = argparse.ArgumentParser() parser.add_argument("-d", "--sample_directory", action="store", dest="samples_dir", required=True, type=lambda s: FileRoutines.check_path(os.path.abspath(s)), help="Directory with samples") parser.add_argument( "-s", "--samples", action="store", dest="samples", help="Comma-separated list of subdirectories(one per sample) to handle. " "If not set all subdirectories will be considered as containing samples." "In sample directory should one(in case SE reads) or two(in case PE reads) files." "Filenames should should contain '_1.fq' or '_1.fastq' for forward(left) reads, " " '_2.fq' or '_2.fastq' for reverse(right) reads and '.fq' or '.fastq' for SE reads" ) parser.add_argument( "-o", "--output_dir",
import sys import argparse from RouToolPa.Routines import FileRoutines parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", required=True, help="Input file") parser.add_argument("-o", "--output", action="store", dest="output", required=True, help="Output file") parser.add_argument("-c", "--column_index", action="store", dest="column_index", required=True, type=int, help="Column to label") parser.add_argument("-l", "--label", action="store", dest="label", required=True, help="Label to use") parser.add_argument("-s", "--column_separator", action="store", dest="column_separator", default="\t", help="Separator used in input file. Default: TAB") parser.add_argument("-a", "--label_separator", action="store", dest="label_separator", default="@", help="Label separator. Default: @") parser.add_argument("-p", "--label_position", action="store", dest="label_position", default="first", help="Label position. Allowed: first(default), last") args = parser.parse_args() FileRoutines.label_column_in_file(args.input, args.label, args.column_index, args.output, column_separator=args.column_separator, label_position=args.label_position, label_separator=args.label_separator, comments_prefix="#")
parser.add_argument("-d", "--top_hits_dir", action="store", dest="top_hits_dir", default="top_hits_dir/", type=FileRoutines.check_path, help="Directory to write intermediate(splited) output") parser.add_argument("-r", "--retain_splited_output", action="store_true", dest="retain", help="Retain splited output") args = parser.parse_args() FileRoutines.safe_mkdir(args.top_hits_dir) def handle_input(filename): sys.stdout.write("Handling %s\n" % filename) not_significant_ids = IdList() not_found_ids = IdList() prefix = FileRoutines.split_filename(filename)[1] index_file = "%s.tmp.idx" % prefix hmm_dict = SearchIO.index_db(index_file, filename, args.format) if args.output == "stdout": out_fd = sys.stdout else: out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w") out_fd.write("#query\thit\tevalue\tbitscore\n")
parser = argparse.ArgumentParser() parser.add_argument("-i", "--input_file", action="store", dest="input_file", help="Input file") parser.add_argument("-c", "--column_number", action="store", dest="column_number", type=int, help="Column number(0-based)") parser.add_argument("-v", "--values", action="store", dest="values", help="Lines with this values in corresponding column will be removed." "Values should be separated by commas") parser.add_argument("-f", "--values_file", action="store", dest="values_file", help="File with values. Ignored if -v/--values is set") parser.add_argument("-s", "--separator", action="store", dest="separator", default="\t", help="Separator used in input file. Default: '\t'") parser.add_argument("-o", "--output_prefix", action="store", dest="output_prefix", default=None, help="Output_prefix") parser.add_argument("-e", "--header", action="store_true", dest="header", default=False, help="Set if header is present in input file") args = parser.parse_args() if (not args.values) and (not args.values_file): raise ValueError("Neither -v/--values nor -f/--values_file was set") values = args.values.split(",") if args.values else IdList(filename=args.values_file) FileRoutines.tsv_remove_by_column_value(args.input_file, args.column_number, values, separator=args.separator, header=args.header, outfile_prefix=args.output_prefix)
default=sys.stdin, help="Input file") parser.add_argument("-o", "--output", action="store", dest="output", default=sys.stdout, help="Output file") parser.add_argument("-k", "--column_number", action="store", dest="column_number", type=int, required=True, help="Column to be get values from (0-based).") parser.add_argument("-s", "--separator", action="store", dest="separator", default="\t", help="Separator used in input file. Default: TAB") args = parser.parse_args() FileRoutines.count_column_values_from_file(args.input, args.column_number, output_file=args.output, separator=args.separator, comments_prefix="#", verbose=False)
"--output", action="store", dest="output", required=True, help="File to write clusters with single-copy clusters") parser.add_argument( "-p", "--label position", action="store", dest="label_position", default="first", help="Position of label. Allowed - first, last. Default - first") parser.add_argument("-s", "--separator", action="store", dest="separator", default="@", help="Separator to use. default - '@'") args = parser.parse_args() list_of_cluster_files = FileRoutines.make_list_of_path_to_files(args.input) single_copy_clusters = SequenceClusterRoutines.extract_single_copy_clusters_from_files( list_of_cluster_files, args.output, label_elements=args.label, separator=args.separator, label_position=args.label_position) print("Was found %i single-copy clusters" % len(single_copy_clusters))
action="store", dest="max_memory_per_thread", default="1G", help="Maximum memory per thread. Default - 1G") args = parser.parse_args() if args.prepare_bam and ((not args.prepared_bam_prefix) or (not args.temp_dir)): raise ValueError( "Options -e/--prepared_bam_prefix and -m/--temp_dir must be set if -p/--prepare_bam option is used" ) SamtoolsV1.threads = args.threads if args.prepare_bam or args.mix_ends: FileRoutines.safe_mkdir(FileRoutines.check_path(args.temp_dir)) prepared_pe_bam_file = "%s.bam" % args.prepared_bam_prefix prepared_unpaired_bam_file = ( "%s.unpaired.bam" % args.prepared_bam_prefix) if args.mix_ends else None """ SamtoolsV1.prepare_bam_for_read_extraction(args.input, args.prepared_bam, temp_file_prefix=args.temp_dir, max_memory_per_thread=args.max_memory_per_thread) """ SamtoolsV1.prepare_bam_for_read_extraction( args.input, prepared_pe_bam_file, temp_file_prefix=args.temp_dir, max_memory_per_thread=args.max_memory_per_thread, bam_file_to_write_unpaired_reads=prepared_unpaired_bam_file) if args.paired:
__author__ = 'Sergei F. Kliver' import os import shutil import argparse from RouToolPa.Tools.ImageMagick import Convert from RouToolPa.Tools.BioFormats import BioConvert from RouToolPa.Routines import FileRoutines parser = argparse.ArgumentParser() parser.add_argument( "-i", "--input", action="store", dest="input", type=lambda x: FileRoutines.make_list_of_path_to_files(x.split(",")), help="Comma-separated list of scn files") parser.add_argument("-t", "--threads", action="store", dest="threads", type=int, default=1, help="Number of threads") parser.add_argument("-o", "--output_directory", action="store", dest="output", type=FileRoutines.check_path, help="Output directory")
STAR.path = args.star_dir if args.genome_fasta: STAR.index(args.genome_dir, args.genome_fasta, annotation_gtf=args.annotation_gtf, junction_tab_file=args.junction_tab_file, sjdboverhang=None, genomeSAindexNbases=None, genomeChrBinNbits=None, genome_size=args.genome_size) sample_list = args.samples if args.samples else Pipeline.get_sample_list( args.samples_dir) FileRoutines.safe_mkdir(args.output_dir) for sample in sample_list: print("Handling %s" % sample) sample_dir = "%s/%s/" % (args.samples_dir, sample) alignment_sample_dir = "%s/%s/" % (args.output_dir, sample) FileRoutines.safe_mkdir(alignment_sample_dir) filetypes, forward_files, reverse_files, se_files = FileRoutines.make_lists_forward_and_reverse_files( sample_dir) print("\tAligning reads...") STAR.align_miRNA( args.genome_dir, se_files, output_dir=alignment_sample_dir,
type=float, help="Maximum number of iterations") parser.add_argument("-f", "--offset", action="store", dest="offset", type=float, help="Offset (works like gap extension penalty)") parser.add_argument("-g", "--gap_open_penalty", action="store", dest="gap_open_penalty", type=float, help="Gap open penalty") args = parser.parse_args() FileRoutines.safe_mkdir(args.output) MAFFT.threads = args.threads MAFFT.parallel_align(FileRoutines.make_list_of_path_to_files(args.input), args.output, output_suffix="alignment", gap_open_penalty=args.gap_open_penalty, offset=args.offset, maxiterate=args.maxiterate, quiet=args.quiet, mode=args.mode, number_of_processes=args.processes, anysymbol=True)
) parser.add_argument("-e", "--header", action="store_true", dest="header", help="Files contain headers. Default - False") parser.add_argument( "-n", "--column_names_list", action="store", dest="column_names_list", type=lambda s: s.split(","), help="Comma-separated list of new names for columns used for file merging." "Necessary if -h/--header option is specified, otherwise it is ignored") parser.add_argument("-o", "--output", action="store", dest="output", default=sys.stdout, help="Output file with allowed ids. Default - stdout") args = parser.parse_args() FileRoutines.merge_files_by_columns(args.file_list, args.column_index_list, args.output, separator="\t", column_names_list=args.column_names_list, comment_prefix="#", header=args.header)
__author__ = 'Sergei F. Kliver' import argparse from collections import OrderedDict from RouToolPa.Routines import MultipleAlignmentRoutines, FileRoutines, MatplotlibRoutines from RouToolPa.Collections.General import TwoLvlDict parser = argparse.ArgumentParser() parser.add_argument( "-i", "--input", action="store", dest="input", required=True, type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")), help="Comma-separated list of files/directories with alignments") parser.add_argument( "-o", "--output_directory", action="store", dest="output_dir", default="./", help="Output directory to write count files. Default - current directory") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments") parser.add_argument("-g",
parser.add_argument("-a", "--starting_chunks_number", action="store", dest="starting_chunks_number", type=int, help="Starting chunk number") parser.add_argument("-b", "--ending_chunks_number", action="store", dest="ending_chunks_number", type=int, help="Ending chunk number") parser.add_argument("-n", "--chunks_number_list", action="store", dest="chunks_number_list", type=lambda s: s.split(","), help="Comma-separated list of chunk numbers") parser.add_argument("-o", "--output", action="store", dest="output", default=sys.stdout, help="Output merged file") parser.add_argument("-e", "--header_prefix", action="store", dest="header_prefix", default="#", help="Header prefix") parser.add_argument("-s", "--sorting_options", action="store", dest="sorting_options", help="Sorting options for sort utility. Default: not set, i.e. no sort") parser.add_argument("-r", "--separator", action="store", dest="separator", default="_", help="Separator between prefix and chunk number in chunk filename. Default: '_'") args = parser.parse_args() FileRoutines.combine_chunks_with_header(args.chunks_dir, args.chunks_prefix, args.output, starting_chunk=args.starting_chunks_number, end_chunk=args.ending_chunks_number, chunk_number_list=args.chunks_number_list, chunks_suffix=args.chunks_suffix, header_prefix=args.header_prefix, sorting_options=args.sorting_options, separator=args.separator)
def make_list_of_path_to_files_from_comma_sep_string(string): return FileRoutines.make_list_of_path_to_files(string.split(","))
action="store", dest="input", required=True, type=FileRoutines.make_list_of_path_to_files_from_string, help="Comma-separated list of files/directories") parser.add_argument("-o", "--output", action="store", dest="output", default=sys.stdout, help="Output merged file") parser.add_argument("-e", "--header_prefix", action="store", dest="header_prefix", default="#", help="Header prefix") parser.add_argument( "-s", "--sorting_options", action="store", dest="sorting_options", help="Sorting options for sort utility. Default: not set, i.e. no sort") args = parser.parse_args() FileRoutines.combine_files_with_header(args.input, args.output, header_prefix=args.header_prefix, sorting_options=args.sorting_options)
type=FileRoutines.check_path, help="Directory to write fam files named by species names") parser.add_argument("-d", "--syn_file", action="store", dest="syn_file", required=True, help="File with taxa ids and species names") parser.add_argument("-k", "--key_index", action="store", dest="key_index", type=int, default=0, help="Key column in file with synonyms(0-based). Default: 0") parser.add_argument("-v", "--value_index", action="store", dest="value_index", type=int, default=1, help="Value column in file with synonyms(0-based). Default: 1") parser.add_argument("-c", "--comments_prefix", action="store", dest="comments_prefix", default="#", help="Prefix of comments in synonyms file. Default - '#'") parser.add_argument("-m", "--columns_separator", action="store", dest="separator", default="\t", help="Column separator in file with synonyms") parser.add_argument("-e", "--header", action="store_true", dest="header", default=False, help="Header is present in synonyms file. Default - False") args = parser.parse_args() syn_dict = SynDict() syn_dict.read(args.syn_file, header=args.header, separator=args.separator, key_index=args.key_index, value_index=args.value_index, comments_prefix=args.comments_prefix) FileRoutines.safe_mkdir(args.output_files_dir) input_files = os.listdir(args.input_files_dir) for filename in input_files: directory, taxon_id, extension = FileRoutines.split_filename(filename) if taxon_id not in syn_dict: print("Species name was not found for taxon %s" % taxon_id) continue shutil.copy("%s%s" % (args.input_files_dir, filename), "%s%s%s" % (args.output_files_dir, syn_dict[taxon_id], extension))
action="store", dest="syn_file", required=True, help="Synonym file") parser.add_argument("-e", "--separator", action="store", dest="separator", default="\t", help="Separator used in input file") parser.add_argument("-o", "--output", action="store", dest="output", default=None, help="Output file") args = parser.parse_args() FileRoutines.replace_column_value_by_syn(args.input, args.syn_file, args.output, column=args.column_number, comment_prefix=None, separator=args.separator, syn_header=False, syn_separator="\t", syn_key_index=0, syn_value_index=1, syn_comment_prefix=None)
#!/usr/bin/env python import os from Bio import SeqIO from RouToolPa.Routines import FileRoutines workdir = "/home/mahajrod/Genetics/Projects/nxf/nxf_arthropoda/" data_dir = "/home/mahajrod/Genetics/Projects/nxf/nxf_arthropoda/data/" os.chdir(workdir) data_files = FileRoutines.make_list_of_path_to_files([data_dir]) record_dict = SeqIO.index_db("tmp.idx", data_files, format="genbank") print("#organism\ttaxonomy\tregion_id\ttranscript_id\tproduct\texon_len") for record_id in record_dict: for feature in record_dict[record_id].features: if feature.type == "mRNA": mRNA_string = "" mRNA_string += "%s" % record_dict[record_id].annotations["organism"] mRNA_string += "\t%s" % (";".join( record_dict[record_id].annotations["taxonomy"])) mRNA_string += "\t%s" % record_id mRNA_string += "\t%s" % (feature.qualifiers["transcript_id"][0] if "transcript_id" in feature.qualifiers else ".") mRNA_string += "\t%s" % (feature.qualifiers["product"][0] if "product" in feature.qualifiers else ".") location_lenths = []
#!/usr/bin/env python __author__ = 'Sergei F. Kliver' import argparse from RouToolPa.Routines import MultipleAlignmentRoutines, FileRoutines parser = argparse.ArgumentParser() parser.add_argument( "-i", "--input", action="store", dest="input", required=True, type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")), help="Comma-separated list of files/directories with alignments") parser.add_argument( "-o", "--output_directory", action="store", dest="output_dir", default="./", help= "Output directory to write resulting files. Default - current directory") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments") parser.add_argument("-g",
dest="separator", default="@", help="Separator to use. default - '@'") parser.add_argument("-k", "--key_column_index", action="store", dest="key_column_index", type=int, default=0, help="Index of key column in synonym file. Default: 0") parser.add_argument("-v", "--value_column_index", action="store", dest="value_column_index", type=int, default=1, help="Index of value column in synonym file.Default: 1") args = parser.parse_args() label = args.label if args.label else FileRoutines.split_filename( args.cluster_file)[1] SequenceClusterRoutines.label_cluster_elements_from_file( args.cluster_file, label, args.output, separator=args.separator, label_position=args.label_position, key_index=args.key_column_index, value_index=args.value_column_index)
"--min_species_number", action="store", dest="min_species_number", default=1, type=int, help="Minimum number of species with family to retain family. Default: 1") parser.add_argument("-f", "--filtered_families_directory", action="store", dest="filtered_family_dir", default="filtered_fam", type=FileRoutines.check_path, help="Directory to write filtered_families") args = parser.parse_args() FileRoutines.safe_mkdir(args.filtered_family_dir) species_list = sorted(args.species_set) if args.white_list_file and args.black_list_file: raise ValueError("Black list and white list cant be set simultaneously") black_list = IdList() white_list = IdList() if args.black_list_file: black_list.read(args.black_list_file) if args.white_list_file: white_list.read(args.white_list_file) out_fd = open(args.cafe_file, "w") filtered_fd = open("%sfiltered_families.cafe" % args.filtered_family_dir, "w") out_fd.write("FAMILYDESC\tFAMILY\t%s\n" % ("\t".join(species_list))) filtered_fd.write("FAMILYDESC\tFAMILY\t%s\n" % ("\t".join(species_list))) species_filtered_fd_list = OrderedDict()
#!/usr/bin/env python __author__ = 'Sergei F. Kliver' import argparse from RouToolPa.Tools.MultipleAlignment import GUIDANCE2 from RouToolPa.Routines import FileRoutines parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", required=True, type=lambda x: FileRoutines.make_list_of_path_to_files(x.split(",")), help="Comma-separated list of files or directory with files containing sequences to be aligned") parser.add_argument("-p", "--processes", action="store", dest="processes", type=int, default=1, help="Number of simultaneously running alignments") parser.add_argument("-o", "--output_directory", action="store", dest="output", type=FileRoutines.check_path, required=True, help="Output directory") parser.add_argument("-d", "--handling_mode", action="store", dest="handling_mode", default="local", help="Handling mode. Allowed: local(default), slurm") parser.add_argument("-j", "--slurm_job_name", action="store", dest="slurm_job_name", default="JOB", help="Slurm job name. Default: JOB") parser.add_argument("-m", "--slurm_max_jobs", action="store", dest="slurm_max_jobs", default=1000, type=int, help="Slurm max jobs. Default: 1000") parser.add_argument("-y", "--slurm_log_prefix", action="store", dest="slurm_log_prefix", help="Slurm log prefix. ") parser.add_argument("-z", "--slurm_cmd_log_file", action="store", dest="slurm_cmd_log_file", help="Slurm cmd logfile") parser.add_argument("-l", "--slurm_error_log_prefix", action="store", dest="slurm_error_log_prefix", help="Slurm error log prefix") parser.add_argument("-e", "--max_memory_per_task", action="store", dest="max_memory_per_task", default="5000",
dest="indel_ReadPosRankSum", type=float, default=-20.0, help="Indel ReadPosRankSum threshold. Default - -20.0") #parser.add_argument("--indel_InbreedingCoeff", action="store", dest="indel_InbreedingCoeff", type=float, default=-0.8, # help="Indel InbreedingCoeff threshold. Default - -0.8") parser.add_argument("--indel_FS", action="store", dest="indel_FS", type=float, default=200.0, help="Indel FS threshold. Default - 200.0") args = parser.parse_args() VariantFiltration.jar_path = FileRoutines.check_path(args.gatk_dir) VariantFiltration.filter_bad_variants( args.reference, args.input_vcf, args.output_prefix, snp_filter_name=args.snp_filter_name, snp_QD=args.snp_QD, snp_FS=args.snp_FS, snp_MQ=args.snp_MQ, #snp_HaplotypeScore=args.snp_HaplotypeScore, snp_MappingQualityRankSum=args.snp_MappingQualityRankSum, snp_ReadPosRankSum=args.snp_ReadPosRankSum, indel_filter_name=args.indel_filter_name, indel_QD=args.indel_QD, indel_ReadPosRankSum=args.indel_ReadPosRankSum,
from RouToolPa.Routines import FileRoutines parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", required=True, help="Input file") parser.add_argument("-o", "--output", action="store", dest="output", default=sys.stdout, help="Output file") parser.add_argument("-k", "--key_column", action="store", dest="key_column", type=int, required=True, help="Column to be used as key(0-based). ") parser.add_argument("-y", "--syn_file", action="store", dest="syn_file", required=True, help="Synonym file with new column values") parser.add_argument("-s", "--separator", action="store", dest="separator", default="\t", help="Separator used in input file. Default: TAB") parser.add_argument("-n", "--new_column_name", action="store", dest="new_column_name", required=True, help="Name of new column") args = parser.parse_args() FileRoutines.add_add_new_column_by_key_column(args.input, args.syn_file, args.key_column, args.output, new_column_name=args.new_column_name, separator=args.separator, absent_value=".")
#!/usr/bin/env python __author__ = 'Sergei F. Kliver' import os import argparse from Pipelines.TenX import TenXAlignmentPipeline from RouToolPa.Routines import FileRoutines parser = argparse.ArgumentParser() parser.add_argument("-d", "--sample_directory", action="store", dest="samples_dir", required=True, type=lambda s: FileRoutines.check_path(os.path.abspath(s)), help="Directory with samples") parser.add_argument( "-s", "--samples", action="store", dest="samples", type=lambda s: s.split(","), help="Comma-separated list of subdirectories(one per sample) to handle. " "If not set all subdirectories will be considered as containing samples") parser.add_argument( "-o", "--output_dir", action="store", dest="output_dir", type=lambda s: FileRoutines.check_path(os.path.abspath(s)), default="./",