action="store", dest="output_cluster_file", required=True, help="File to write clusters with renamed elements") parser.add_argument( "-a", "--elements_without_synonyms_file", action="store", dest="elements_without_synonyms_file", help="File to write cluster elements without synonyms. Default: don't write" ) parser.add_argument( "-r", "--remove_clusters_with_not_renamed_elements", action="store_true", dest="remove_clusters_with_not_renamed_elements", help="Remove clusters with not renamed elements. Default: false ") args = parser.parse_args() SequenceClusterRoutines.rename_elements_in_clusters( args.input_cluster_file, args.syn_file, args.output_cluster_file, remove_clusters_with_not_renamed_elements=args. remove_clusters_with_not_renamed_elements, syn_file_key_column_index=args.key_column_index, syn_file_value_column_index=args.value_column_index, syn_file_column_separator=args.column_separator, elements_with_absent_synonyms_file=args.elements_without_synonyms_file)
dest="separator", default="@", help="Separator to use. default - '@'") parser.add_argument("-k", "--key_column_index", action="store", dest="key_column_index", type=int, default=0, help="Index of key column in synonym file. Default: 0") parser.add_argument("-v", "--value_column_index", action="store", dest="value_column_index", type=int, default=1, help="Index of value column in synonym file.Default: 1") args = parser.parse_args() label = args.label if args.label else FileRoutines.split_filename( args.cluster_file)[1] SequenceClusterRoutines.label_cluster_elements_from_file( args.cluster_file, label, args.output, separator=args.separator, label_position=args.label_position, key_index=args.key_column_index, value_index=args.value_column_index)
dest="label_ids", help="Label ids by species names. Default - don't label") parser.add_argument("-g", "--separator_for_labeling", action="store", dest="separator_for_labeling", default="@", help="Separator to use for labeling. Default - '@'") parser.add_argument("-r", "--label_last", action="store_false", dest="label_first", default=True, help="Place label at the end of id") args = parser.parse_args() FileRoutines.safe_mkdir(args.output_dir) SequenceClusterRoutines.extract_sequences_by_clusters( args.input_cluster_dir, args.input_seq_dir, args.output_dir, file_with_white_list_cluster_ids=args.white_list_ids, mode=args.mode, sequence_file_extension=args.seq_extension, sequence_file_format=args.format, label_species=args.label_ids, separator_for_labeling=args.separator_for_labeling, species_label_first=args.label_first)
parser.add_argument( "-a", "--output_label position", action="store", dest="output_label_position", default="first", help= "Position of label in output file. Allowed - first, last. Default - first") parser.add_argument( "-r", "--output_separator", action="store", dest="output_separator", default="@", help="Separator between label and element id in output file. Default - '@'" ) args = parser.parse_args() label = args.label if args.label else FileRoutines.split_filename( args.cluster_file)[1] SequenceClusterRoutines.replace_label_from_file( args.input_file, args.output_file, args.syn_file, old_separator=args.input_separator, old_label_position=args.input_label_position, new_separator=args.output_separator, new_label_position=args.output_label_position)
"--output", action="store", dest="output", required=True, help="File to write extracted clusters") parser.add_argument("-c", "--id_column_index", action="store", dest="id_column_index", type=int, help="Index(0-based) of id column in id file. ") parser.add_argument( "-m", "--mode", action="store", dest="mode", default="w", help= "extraction mode. Allowed - 'w' - if elements from element_id_list are present " "in cluster extracts only that elements; 'a' - if elements from element_id_list are present " "in cluster extracts all elements. Default - 'w'") args = parser.parse_args() SequenceClusterRoutines.extract_clusters_by_element_ids_from_file( args.cluster_file, args.element_file, args.output, mode=args.mode, id_column=args.id_column_index)
"--output", action="store", dest="output", required=True, help="File to write clusters with single-copy clusters") parser.add_argument( "-p", "--label position", action="store", dest="label_position", default="first", help="Position of label. Allowed - first, last. Default - first") parser.add_argument("-s", "--separator", action="store", dest="separator", default="@", help="Separator to use. default - '@'") args = parser.parse_args() list_of_cluster_files = FileRoutines.make_list_of_path_to_files(args.input) single_copy_clusters = SequenceClusterRoutines.extract_single_copy_clusters_from_files( list_of_cluster_files, args.output, label_elements=args.label, separator=args.separator, label_position=args.label_position) print("Was found %i single-copy clusters" % len(single_copy_clusters))
action="store", dest="cluster_file", required=True, help="File with clusters") parser.add_argument("-d", "--black_list_file", action="store", dest="black_list_file", required=True, help="File with ids of elements") parser.add_argument("-o", "--output", action="store", dest="output", required=True, help="File to write extracted clusters") parser.add_argument( "-m", "--mode", action="store", dest="mode", default="full", help="Element id comparison mode. Allowed: partial, full(comparison)") args = parser.parse_args() SequenceClusterRoutines.remove_elements_by_ids_from_files(args.cluster_file, args.output, args.black_list_file, mode=args.mode)
parser.add_argument("-s", "--species-gene_fam_file", action="store", dest="species_gene_fam_file", required=True, help="Input species-gene fam file") parser.add_argument("-g", "--gene-GO_fam_file", action="store", dest="gene_GO_fam_file", required=True, help="Input gene-GO fam file") #parser.add_argument("-c", "--cluster_column_index", action="store", dest="cluster_column_index", type=int, default=0, # help="Index of cluster column in synonym file. Default: 0") #parser.add_argument("-v", "--element_column_index", action="store", dest="element_column_index", type=int, default=1, # help="Index of element column in synonym file.Default: 1") #parser.add_argument("-e", "--separator", action="store", dest="column_separator", default='\t', # help="Column separator in synonym file. Default: \\t") parser.add_argument("-o", "--output_directory", action="store", dest="output_dir", required=True, help="Output directory") args = parser.parse_args() SequenceClusterRoutines.create_gvf_files_from_species_gene_fam_and_gene_GO_fam( args.species_gene_fam_file, args.gene_GO_fam_file, args.output_dir)
action="store", dest="label_position", default="first", help="Position of label. Allowed - first, last. Default - first") parser.add_argument("-s", "--separator", action="store", dest="separator", default="@", help="Separator to use. default - '@'") parser.add_argument("-l", "--label_list", action="store", dest="label_list", type=lambda s: s.split(","), help="Comma-separated list of element labels to extract") args = parser.parse_args() if args.label_file and args.label_list: raise ValueError("Both --label_file and --label_list were set") elif (not args.label_file) and (not args.label_list): raise ValueError("Neither --label_file or --label_list was set") SequenceClusterRoutines.extract_clusters_and_elements_by_labels_from_files( args.input, args.label_file if args.label_file else args.label_list, args.output, separator=args.separator, label_position=args.label_position)
import argparse from RouToolPa.Routines import SequenceClusterRoutines, FileRoutines parser = argparse.ArgumentParser() parser.add_argument("-i", "--input_cluster_dir", action="store", dest="input_cluster_dir", required=True, type=FileRoutines.check_path, help="Directory with files with clusters") parser.add_argument("-o", "--output", action="store", dest="output", required=True, help="File to write ids of monoclusters") parser.add_argument("-w", "--white_list_ids", action="store", dest="white_list_ids", help="File with ids from white list. ") args = parser.parse_args() SequenceClusterRoutines.extract_monocluster_ids_from_file( args.input_cluster_dir, args.output, file_with_white_list_ids=args.white_list_ids)
parser.add_argument("-i", "--cluster_id_file", action="store", dest="cluster_id_file", help="File with ids of clusters to extract. Extract all clusters if not set") parser.add_argument("-f", "--cluster_file", action="store", dest="cluster_file", required=True, help="File with clusters") parser.add_argument("-p", "--seq_file", action="store", dest="seq_file", required=True, type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")), help="List of comma-separated files/directories with sequences") parser.add_argument("-r", "--seq_file_format", action="store", dest="seq_file_format", default="fasta", help="Format of file with sequences") parser.add_argument("-c", "--create_dir_for_each_cluster", action="store_true", dest="create_dir_for_each_cluster", help="Create separate directory for each cluster") parser.add_argument("-o", "--output_prefix", action="store", dest="output", help="Output prefix to use") parser.add_argument("-d", "--output_directory", action="store", dest="out_dir", default="./", help="Directory to write output") parser.add_argument("-n", "--dont_skip_cluster_if_absent_element", action="store_true", dest="dont_skip_cluster_if_absent_element", default=False, help="Don't skip cluster with absent sequences") args = parser.parse_args() SequenceClusterRoutines.extract_sequences_from_selected_clusters(args.cluster_id_file, args.cluster_file, args.seq_file, output_dir=args.out_dir, seq_format=args.seq_file_format, out_prefix=args.output, create_dir_for_each_cluster=args.create_dir_for_each_cluster, skip_cluster_if_no_sequence_for_element=not args.dont_skip_cluster_if_absent_element, parsing_mode="parse")
parser.add_argument("-v", "--element_column_index", action="store", dest="element_column_index", type=int, default=1, help="Index of element column in synonym file.Default: 1") parser.add_argument("-e", "--separator", action="store", dest="column_separator", default='\t', help="Column separator in synonym file. Default: \\t") parser.add_argument("-o", "--output_directory", action="store", dest="output_dir", required=True, help="Output directory") args = parser.parse_args() SequenceClusterRoutines.create_per_cluster_element_id_files_from_file( args.input_cluster_file, args.output_dir, cluster_column=args.cluster_column_index, element_column=args.element_column_index, column_separator=args.column_separator, element_separator=",")