Ejemplo n.º 1
0
                    action="store",
                    dest="output_cluster_file",
                    required=True,
                    help="File to write clusters with renamed elements")
parser.add_argument(
    "-a",
    "--elements_without_synonyms_file",
    action="store",
    dest="elements_without_synonyms_file",
    help="File to write cluster elements without synonyms. Default: don't write"
)
parser.add_argument(
    "-r",
    "--remove_clusters_with_not_renamed_elements",
    action="store_true",
    dest="remove_clusters_with_not_renamed_elements",
    help="Remove clusters with not renamed elements. Default: false ")

args = parser.parse_args()

SequenceClusterRoutines.rename_elements_in_clusters(
    args.input_cluster_file,
    args.syn_file,
    args.output_cluster_file,
    remove_clusters_with_not_renamed_elements=args.
    remove_clusters_with_not_renamed_elements,
    syn_file_key_column_index=args.key_column_index,
    syn_file_value_column_index=args.value_column_index,
    syn_file_column_separator=args.column_separator,
    elements_with_absent_synonyms_file=args.elements_without_synonyms_file)
Ejemplo n.º 2
0
                    dest="separator",
                    default="@",
                    help="Separator to use. default - '@'")
parser.add_argument("-k",
                    "--key_column_index",
                    action="store",
                    dest="key_column_index",
                    type=int,
                    default=0,
                    help="Index of key column in synonym file. Default: 0")
parser.add_argument("-v",
                    "--value_column_index",
                    action="store",
                    dest="value_column_index",
                    type=int,
                    default=1,
                    help="Index of value column in synonym file.Default: 1")
args = parser.parse_args()

label = args.label if args.label else FileRoutines.split_filename(
    args.cluster_file)[1]

SequenceClusterRoutines.label_cluster_elements_from_file(
    args.cluster_file,
    label,
    args.output,
    separator=args.separator,
    label_position=args.label_position,
    key_index=args.key_column_index,
    value_index=args.value_column_index)
Ejemplo n.º 3
0
                    dest="label_ids",
                    help="Label ids by species names. Default - don't label")

parser.add_argument("-g",
                    "--separator_for_labeling",
                    action="store",
                    dest="separator_for_labeling",
                    default="@",
                    help="Separator to use for labeling. Default - '@'")
parser.add_argument("-r",
                    "--label_last",
                    action="store_false",
                    dest="label_first",
                    default=True,
                    help="Place label at the end of id")

args = parser.parse_args()

FileRoutines.safe_mkdir(args.output_dir)
SequenceClusterRoutines.extract_sequences_by_clusters(
    args.input_cluster_dir,
    args.input_seq_dir,
    args.output_dir,
    file_with_white_list_cluster_ids=args.white_list_ids,
    mode=args.mode,
    sequence_file_extension=args.seq_extension,
    sequence_file_format=args.format,
    label_species=args.label_ids,
    separator_for_labeling=args.separator_for_labeling,
    species_label_first=args.label_first)
Ejemplo n.º 4
0
parser.add_argument(
    "-a",
    "--output_label position",
    action="store",
    dest="output_label_position",
    default="first",
    help=
    "Position of label in output file. Allowed - first, last. Default - first")
parser.add_argument(
    "-r",
    "--output_separator",
    action="store",
    dest="output_separator",
    default="@",
    help="Separator between label and element id in output file. Default - '@'"
)

args = parser.parse_args()

label = args.label if args.label else FileRoutines.split_filename(
    args.cluster_file)[1]

SequenceClusterRoutines.replace_label_from_file(
    args.input_file,
    args.output_file,
    args.syn_file,
    old_separator=args.input_separator,
    old_label_position=args.input_label_position,
    new_separator=args.output_separator,
    new_label_position=args.output_label_position)
Ejemplo n.º 5
0
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write extracted clusters")
parser.add_argument("-c",
                    "--id_column_index",
                    action="store",
                    dest="id_column_index",
                    type=int,
                    help="Index(0-based) of id column in id file. ")
parser.add_argument(
    "-m",
    "--mode",
    action="store",
    dest="mode",
    default="w",
    help=
    "extraction mode. Allowed - 'w' - if elements from element_id_list are present "
    "in cluster extracts only that elements; 'a' - if elements from element_id_list are present "
    "in cluster extracts all elements. Default - 'w'")

args = parser.parse_args()

SequenceClusterRoutines.extract_clusters_by_element_ids_from_file(
    args.cluster_file,
    args.element_file,
    args.output,
    mode=args.mode,
    id_column=args.id_column_index)
Ejemplo n.º 6
0
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write clusters with single-copy clusters")
parser.add_argument(
    "-p",
    "--label position",
    action="store",
    dest="label_position",
    default="first",
    help="Position of label. Allowed - first, last. Default - first")
parser.add_argument("-s",
                    "--separator",
                    action="store",
                    dest="separator",
                    default="@",
                    help="Separator to use. default - '@'")

args = parser.parse_args()

list_of_cluster_files = FileRoutines.make_list_of_path_to_files(args.input)

single_copy_clusters = SequenceClusterRoutines.extract_single_copy_clusters_from_files(
    list_of_cluster_files,
    args.output,
    label_elements=args.label,
    separator=args.separator,
    label_position=args.label_position)

print("Was found %i single-copy clusters" % len(single_copy_clusters))
Ejemplo n.º 7
0
                    action="store",
                    dest="cluster_file",
                    required=True,
                    help="File with clusters")
parser.add_argument("-d",
                    "--black_list_file",
                    action="store",
                    dest="black_list_file",
                    required=True,
                    help="File with ids of elements")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write extracted clusters")
parser.add_argument(
    "-m",
    "--mode",
    action="store",
    dest="mode",
    default="full",
    help="Element id comparison mode. Allowed: partial, full(comparison)")

args = parser.parse_args()

SequenceClusterRoutines.remove_elements_by_ids_from_files(args.cluster_file,
                                                          args.output,
                                                          args.black_list_file,
                                                          mode=args.mode)
Ejemplo n.º 8
0
parser.add_argument("-s",
                    "--species-gene_fam_file",
                    action="store",
                    dest="species_gene_fam_file",
                    required=True,
                    help="Input species-gene fam file")
parser.add_argument("-g",
                    "--gene-GO_fam_file",
                    action="store",
                    dest="gene_GO_fam_file",
                    required=True,
                    help="Input gene-GO fam file")
#parser.add_argument("-c", "--cluster_column_index", action="store", dest="cluster_column_index", type=int, default=0,
#                    help="Index of cluster column in synonym file. Default: 0")
#parser.add_argument("-v", "--element_column_index", action="store", dest="element_column_index", type=int, default=1,
#                    help="Index of element column in synonym file.Default: 1")
#parser.add_argument("-e", "--separator", action="store", dest="column_separator", default='\t',
#                    help="Column separator in synonym file. Default: \\t")

parser.add_argument("-o",
                    "--output_directory",
                    action="store",
                    dest="output_dir",
                    required=True,
                    help="Output directory")

args = parser.parse_args()

SequenceClusterRoutines.create_gvf_files_from_species_gene_fam_and_gene_GO_fam(
    args.species_gene_fam_file, args.gene_GO_fam_file, args.output_dir)
Ejemplo n.º 9
0
    action="store",
    dest="label_position",
    default="first",
    help="Position of label. Allowed - first, last. Default - first")
parser.add_argument("-s",
                    "--separator",
                    action="store",
                    dest="separator",
                    default="@",
                    help="Separator to use. default - '@'")
parser.add_argument("-l",
                    "--label_list",
                    action="store",
                    dest="label_list",
                    type=lambda s: s.split(","),
                    help="Comma-separated list of element labels to extract")

args = parser.parse_args()

if args.label_file and args.label_list:
    raise ValueError("Both --label_file and --label_list were set")
elif (not args.label_file) and (not args.label_list):
    raise ValueError("Neither --label_file or --label_list was set")

SequenceClusterRoutines.extract_clusters_and_elements_by_labels_from_files(
    args.input,
    args.label_file if args.label_file else args.label_list,
    args.output,
    separator=args.separator,
    label_position=args.label_position)
Ejemplo n.º 10
0
import argparse
from RouToolPa.Routines import SequenceClusterRoutines, FileRoutines

parser = argparse.ArgumentParser()

parser.add_argument("-i",
                    "--input_cluster_dir",
                    action="store",
                    dest="input_cluster_dir",
                    required=True,
                    type=FileRoutines.check_path,
                    help="Directory with files with clusters")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write ids of monoclusters")
parser.add_argument("-w",
                    "--white_list_ids",
                    action="store",
                    dest="white_list_ids",
                    help="File with ids from white list. ")

args = parser.parse_args()

SequenceClusterRoutines.extract_monocluster_ids_from_file(
    args.input_cluster_dir,
    args.output,
    file_with_white_list_ids=args.white_list_ids)
Ejemplo n.º 11
0
parser.add_argument("-i", "--cluster_id_file", action="store", dest="cluster_id_file",
                    help="File with ids of clusters to extract. Extract all clusters if not set")
parser.add_argument("-f", "--cluster_file", action="store", dest="cluster_file", required=True,
                    help="File with clusters")
parser.add_argument("-p", "--seq_file", action="store", dest="seq_file", required=True,
                    type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")),
                    help="List of comma-separated files/directories with sequences")
parser.add_argument("-r", "--seq_file_format", action="store", dest="seq_file_format", default="fasta",
                    help="Format of file with sequences")
parser.add_argument("-c", "--create_dir_for_each_cluster", action="store_true", dest="create_dir_for_each_cluster",
                    help="Create separate directory for each cluster")
parser.add_argument("-o", "--output_prefix", action="store", dest="output",
                    help="Output prefix to use")
parser.add_argument("-d", "--output_directory", action="store", dest="out_dir", default="./",
                    help="Directory to write output")
parser.add_argument("-n", "--dont_skip_cluster_if_absent_element", action="store_true",
                    dest="dont_skip_cluster_if_absent_element", default=False,
                    help="Don't skip cluster with absent sequences")


args = parser.parse_args()

SequenceClusterRoutines.extract_sequences_from_selected_clusters(args.cluster_id_file, args.cluster_file,
                                                                 args.seq_file, output_dir=args.out_dir,
                                                                 seq_format=args.seq_file_format,
                                                                 out_prefix=args.output,
                                                                 create_dir_for_each_cluster=args.create_dir_for_each_cluster,
                                                                 skip_cluster_if_no_sequence_for_element=not args.dont_skip_cluster_if_absent_element,
                                                                 parsing_mode="parse")
Ejemplo n.º 12
0
parser.add_argument("-v",
                    "--element_column_index",
                    action="store",
                    dest="element_column_index",
                    type=int,
                    default=1,
                    help="Index of element column in synonym file.Default: 1")
parser.add_argument("-e",
                    "--separator",
                    action="store",
                    dest="column_separator",
                    default='\t',
                    help="Column separator in synonym file. Default: \\t")

parser.add_argument("-o",
                    "--output_directory",
                    action="store",
                    dest="output_dir",
                    required=True,
                    help="Output directory")

args = parser.parse_args()

SequenceClusterRoutines.create_per_cluster_element_id_files_from_file(
    args.input_cluster_file,
    args.output_dir,
    cluster_column=args.cluster_column_index,
    element_column=args.element_column_index,
    column_separator=args.column_separator,
    element_separator=",")