예제 #1
0
def handle_input(filename):
    sys.stdout.write("Handling %s\n" % filename)
    not_significant_ids = IdList()
    not_found_ids = IdList()

    prefix = FileRoutines.split_filename(filename)[1]
    index_file = "%s.tmp.idx" % prefix
    hmm_dict = SearchIO.index_db(index_file, filename, args.format)
    if args.output == "stdout":
        out_fd = sys.stdout
    else:
        out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w")
        out_fd.write("#query\thit\tevalue\tbitscore\n")

    for query in hmm_dict:
        if hmm_dict[query].hits:
            if hmm_dict[query][0].is_included:
                out_fd.write(
                    "%s\t%s\t%s\t%s\n" %
                    (query, hmm_dict[query][0].id, hmm_dict[query][0].evalue,
                     hmm_dict[query][0].bitscore))
            else:
                not_significant_ids.append(query)
        else:
            not_found_ids.append(query)

    if args.output != "stdout":
        out_fd.close()

    os.remove(index_file)
    return not_significant_ids, not_found_ids
예제 #2
0
def handle_input(filename):
    sys.stdout.write("Handling %s\n" % filename)
    prefix = FileRoutines.split_filename(filename)[1]
    index_file = "%s.tmp.idx" % prefix
    hmm_dict = SearchIO.index_db(index_file, filename, args.format)
    if args.output == "stdout":
        out_fd = sys.stdout
    else:
        out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w")
        out_fd.write("#query\thit\tevalue\tbitscore\n")
    for family in hmm_dict:
        #print hmm_dict[key]
        for hit in hmm_dict[family]:
            if hit.is_included:
                out_fd.write("%s\t%s\t%s\t%s\n" %
                             (family, hit.id, hit.evalue, hit.bitscore))
    if args.output != "stdout":
        out_fd.close()

    os.remove(index_file)
예제 #3
0
                    action="store_true",
                    dest="header",
                    default=False,
                    help="Set if header is present in input file")
parser.add_argument("-u",
                    "--use_column_value_as_prefix",
                    action="store_true",
                    dest="use_column_value_as_prefix",
                    default=False,
                    help="Use column value as prefix for output files")
parser.add_argument(
    "-r",
    "--sorted_input",
    action="store_true",
    dest="sorted_input",
    default=False,
    help=
    "Input file is sorted. Do it to reduce number of simultaneously opened files"
)

args = parser.parse_args()

FileRoutines.split_by_column(
    args.input_file,
    args.column_number,
    separator=args.separator,
    header=args.header,
    outfile_prefix=args.output_prefix,
    use_column_value_as_prefix=args.use_column_value_as_prefix,
    sorted_input=args.sorted_input)
예제 #4
0
print("Drawing histograms...")

for stat_file in output_evidence_stats, output_supported_stats, \
                 output_swissprot_pfam_or_hints_supported_transcripts_longest_pep_evidence, \
                 output_swissprot_pfam_and_hints_supported_transcripts_longest_pep_evidence, \
                 output_swissprot_pfam_or_hints_supported_transcripts_evidence, \
                 output_swissprot_pfam_and_hints_supported_transcripts_evidence:

    MatplotlibRoutines.percent_histogram_from_file(
        stat_file,
        stat_file,
        data_type=None,
        column_list=(2, ),
        comments="#",
        n_bins=20,
        title="Transcript support by hints",
        extensions=("png", "svg"),
        legend_location="upper center",
        stats_as_legend=True)
print("Creating final directories...")
if args.pfam_db and args.swissprot_db:
    db_or_hints_dir = "supported_by_db_or_hints/"
    db_and_hints_dir = "supported_by_db_and_hints/"
    for directory in db_and_hints_dir, db_or_hints_dir:
        FileRoutines.safe_mkdir(directory)

    os.system("mv %s.supported.transcripts.swissprot_or_pfam_or_hints* %s" %
              (args.output, db_or_hints_dir))
    os.system("mv %s.supported.transcripts.swissprot_or_pfam_and_hints* %s" %
              (args.output, db_and_hints_dir))
예제 #5
0
                    dest="label_ids",
                    help="Label ids by species names. Default - don't label")

parser.add_argument("-g",
                    "--separator_for_labeling",
                    action="store",
                    dest="separator_for_labeling",
                    default="@",
                    help="Separator to use for labeling. Default - '@'")
parser.add_argument("-r",
                    "--label_last",
                    action="store_false",
                    dest="label_first",
                    default=True,
                    help="Place label at the end of id")

args = parser.parse_args()

FileRoutines.safe_mkdir(args.output_dir)
SequenceClusterRoutines.extract_sequences_by_clusters(
    args.input_cluster_dir,
    args.input_seq_dir,
    args.output_dir,
    file_with_white_list_cluster_ids=args.white_list_ids,
    mode=args.mode,
    sequence_file_extension=args.seq_extension,
    sequence_file_format=args.format,
    label_species=args.label_ids,
    separator_for_labeling=args.separator_for_labeling,
    species_label_first=args.label_first)
예제 #6
0
import os
import argparse
from RouToolPa.Tools.Filter import FaCut

#from RouToolPa.Tools.Filter import FastQC

from RouToolPa.Routines import FileRoutines

parser = argparse.ArgumentParser()

parser.add_argument("-d",
                    "--sample_directory",
                    action="store",
                    dest="samples_dir",
                    required=True,
                    type=lambda s: FileRoutines.check_path(os.path.abspath(s)),
                    help="Directory with samples")
parser.add_argument(
    "-s",
    "--samples",
    action="store",
    dest="samples",
    help="Comma-separated list of subdirectories(one per sample) to handle. "
    "If not set all subdirectories will be considered as containing samples."
    "In sample directory should one(in case SE reads) or two(in case PE reads) files."
    "Filenames should should contain '_1.fq' or '_1.fastq' for forward(left) reads, "
    " '_2.fq' or '_2.fastq' for reverse(right) reads and '.fq' or '.fastq' for SE reads"
)
parser.add_argument(
    "-o",
    "--output_dir",
예제 #7
0
import sys
import argparse

from RouToolPa.Routines import FileRoutines


parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input", action="store", dest="input", required=True,
                    help="Input file")
parser.add_argument("-o", "--output", action="store", dest="output", required=True,
                    help="Output file")
parser.add_argument("-c", "--column_index", action="store", dest="column_index", required=True, type=int,
                    help="Column to label")
parser.add_argument("-l", "--label", action="store", dest="label", required=True,
                    help="Label to use")
parser.add_argument("-s", "--column_separator", action="store", dest="column_separator", default="\t",
                    help="Separator used in input file. Default: TAB")
parser.add_argument("-a", "--label_separator", action="store", dest="label_separator", default="@",
                    help="Label separator. Default: @")
parser.add_argument("-p", "--label_position", action="store", dest="label_position", default="first",
                    help="Label position. Allowed: first(default), last")

args = parser.parse_args()

FileRoutines.label_column_in_file(args.input, args.label, args.column_index, args.output,
                                  column_separator=args.column_separator,
                                  label_position=args.label_position,
                                  label_separator=args.label_separator,
                                  comments_prefix="#")
예제 #8
0
parser.add_argument("-d",
                    "--top_hits_dir",
                    action="store",
                    dest="top_hits_dir",
                    default="top_hits_dir/",
                    type=FileRoutines.check_path,
                    help="Directory to write intermediate(splited) output")
parser.add_argument("-r",
                    "--retain_splited_output",
                    action="store_true",
                    dest="retain",
                    help="Retain splited output")

args = parser.parse_args()

FileRoutines.safe_mkdir(args.top_hits_dir)


def handle_input(filename):
    sys.stdout.write("Handling %s\n" % filename)
    not_significant_ids = IdList()
    not_found_ids = IdList()

    prefix = FileRoutines.split_filename(filename)[1]
    index_file = "%s.tmp.idx" % prefix
    hmm_dict = SearchIO.index_db(index_file, filename, args.format)
    if args.output == "stdout":
        out_fd = sys.stdout
    else:
        out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w")
        out_fd.write("#query\thit\tevalue\tbitscore\n")
예제 #9
0


parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input_file", action="store", dest="input_file",
                    help="Input file")
parser.add_argument("-c", "--column_number", action="store", dest="column_number", type=int,
                    help="Column number(0-based)")
parser.add_argument("-v", "--values", action="store", dest="values",
                    help="Lines with this values in corresponding column will be removed."
                         "Values should be separated by commas")
parser.add_argument("-f", "--values_file", action="store", dest="values_file",
                    help="File with values. Ignored if -v/--values is set")
parser.add_argument("-s", "--separator", action="store", dest="separator", default="\t",
                    help="Separator used in input file. Default: '\t'")
parser.add_argument("-o", "--output_prefix", action="store", dest="output_prefix", default=None,
                    help="Output_prefix")
parser.add_argument("-e", "--header", action="store_true", dest="header", default=False,
                    help="Set if header is present in input file")

args = parser.parse_args()

if (not args.values) and (not args.values_file):
    raise ValueError("Neither -v/--values nor -f/--values_file was set")

values = args.values.split(",") if args.values else IdList(filename=args.values_file)

FileRoutines.tsv_remove_by_column_value(args.input_file, args.column_number, values, separator=args.separator,
                                        header=args.header, outfile_prefix=args.output_prefix)
예제 #10
0
                    default=sys.stdin,
                    help="Input file")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    default=sys.stdout,
                    help="Output file")
parser.add_argument("-k",
                    "--column_number",
                    action="store",
                    dest="column_number",
                    type=int,
                    required=True,
                    help="Column to be get values from (0-based).")
parser.add_argument("-s",
                    "--separator",
                    action="store",
                    dest="separator",
                    default="\t",
                    help="Separator used in input file. Default: TAB")

args = parser.parse_args()

FileRoutines.count_column_values_from_file(args.input,
                                           args.column_number,
                                           output_file=args.output,
                                           separator=args.separator,
                                           comments_prefix="#",
                                           verbose=False)
예제 #11
0
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write clusters with single-copy clusters")
parser.add_argument(
    "-p",
    "--label position",
    action="store",
    dest="label_position",
    default="first",
    help="Position of label. Allowed - first, last. Default - first")
parser.add_argument("-s",
                    "--separator",
                    action="store",
                    dest="separator",
                    default="@",
                    help="Separator to use. default - '@'")

args = parser.parse_args()

list_of_cluster_files = FileRoutines.make_list_of_path_to_files(args.input)

single_copy_clusters = SequenceClusterRoutines.extract_single_copy_clusters_from_files(
    list_of_cluster_files,
    args.output,
    label_elements=args.label,
    separator=args.separator,
    label_position=args.label_position)

print("Was found %i single-copy clusters" % len(single_copy_clusters))
예제 #12
0
                    action="store",
                    dest="max_memory_per_thread",
                    default="1G",
                    help="Maximum memory per thread. Default - 1G")
args = parser.parse_args()

if args.prepare_bam and ((not args.prepared_bam_prefix) or
                         (not args.temp_dir)):
    raise ValueError(
        "Options -e/--prepared_bam_prefix and -m/--temp_dir must be set if -p/--prepare_bam option is used"
    )

SamtoolsV1.threads = args.threads

if args.prepare_bam or args.mix_ends:
    FileRoutines.safe_mkdir(FileRoutines.check_path(args.temp_dir))
    prepared_pe_bam_file = "%s.bam" % args.prepared_bam_prefix
    prepared_unpaired_bam_file = (
        "%s.unpaired.bam" %
        args.prepared_bam_prefix) if args.mix_ends else None
    """
    SamtoolsV1.prepare_bam_for_read_extraction(args.input, args.prepared_bam, temp_file_prefix=args.temp_dir,
                                               max_memory_per_thread=args.max_memory_per_thread)
    """
    SamtoolsV1.prepare_bam_for_read_extraction(
        args.input,
        prepared_pe_bam_file,
        temp_file_prefix=args.temp_dir,
        max_memory_per_thread=args.max_memory_per_thread,
        bam_file_to_write_unpaired_reads=prepared_unpaired_bam_file)
if args.paired:
예제 #13
0
__author__ = 'Sergei F. Kliver'
import os
import shutil
import argparse
from RouToolPa.Tools.ImageMagick import Convert
from RouToolPa.Tools.BioFormats import BioConvert
from RouToolPa.Routines import FileRoutines

parser = argparse.ArgumentParser()

parser.add_argument(
    "-i",
    "--input",
    action="store",
    dest="input",
    type=lambda x: FileRoutines.make_list_of_path_to_files(x.split(",")),
    help="Comma-separated list of scn files")
parser.add_argument("-t",
                    "--threads",
                    action="store",
                    dest="threads",
                    type=int,
                    default=1,
                    help="Number of threads")

parser.add_argument("-o",
                    "--output_directory",
                    action="store",
                    dest="output",
                    type=FileRoutines.check_path,
                    help="Output directory")
예제 #14
0
STAR.path = args.star_dir

if args.genome_fasta:
    STAR.index(args.genome_dir,
               args.genome_fasta,
               annotation_gtf=args.annotation_gtf,
               junction_tab_file=args.junction_tab_file,
               sjdboverhang=None,
               genomeSAindexNbases=None,
               genomeChrBinNbits=None,
               genome_size=args.genome_size)

sample_list = args.samples if args.samples else Pipeline.get_sample_list(
    args.samples_dir)

FileRoutines.safe_mkdir(args.output_dir)

for sample in sample_list:
    print("Handling %s" % sample)
    sample_dir = "%s/%s/" % (args.samples_dir, sample)
    alignment_sample_dir = "%s/%s/" % (args.output_dir, sample)
    FileRoutines.safe_mkdir(alignment_sample_dir)
    filetypes, forward_files, reverse_files, se_files = FileRoutines.make_lists_forward_and_reverse_files(
        sample_dir)

    print("\tAligning reads...")

    STAR.align_miRNA(
        args.genome_dir,
        se_files,
        output_dir=alignment_sample_dir,
예제 #15
0
                    type=float,
                    help="Maximum number of iterations")
parser.add_argument("-f",
                    "--offset",
                    action="store",
                    dest="offset",
                    type=float,
                    help="Offset (works like gap extension penalty)")
parser.add_argument("-g",
                    "--gap_open_penalty",
                    action="store",
                    dest="gap_open_penalty",
                    type=float,
                    help="Gap open penalty")

args = parser.parse_args()

FileRoutines.safe_mkdir(args.output)

MAFFT.threads = args.threads
MAFFT.parallel_align(FileRoutines.make_list_of_path_to_files(args.input),
                     args.output,
                     output_suffix="alignment",
                     gap_open_penalty=args.gap_open_penalty,
                     offset=args.offset,
                     maxiterate=args.maxiterate,
                     quiet=args.quiet,
                     mode=args.mode,
                     number_of_processes=args.processes,
                     anysymbol=True)
예제 #16
0
)
parser.add_argument("-e",
                    "--header",
                    action="store_true",
                    dest="header",
                    help="Files contain headers. Default - False")
parser.add_argument(
    "-n",
    "--column_names_list",
    action="store",
    dest="column_names_list",
    type=lambda s: s.split(","),
    help="Comma-separated list of new names for columns used for file merging."
    "Necessary if -h/--header option is specified, otherwise it is ignored")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    default=sys.stdout,
                    help="Output file with allowed ids. Default - stdout")

args = parser.parse_args()

FileRoutines.merge_files_by_columns(args.file_list,
                                    args.column_index_list,
                                    args.output,
                                    separator="\t",
                                    column_names_list=args.column_names_list,
                                    comment_prefix="#",
                                    header=args.header)
예제 #17
0
__author__ = 'Sergei F. Kliver'

import argparse
from collections import OrderedDict
from RouToolPa.Routines import MultipleAlignmentRoutines, FileRoutines, MatplotlibRoutines
from RouToolPa.Collections.General import TwoLvlDict

parser = argparse.ArgumentParser()

parser.add_argument(
    "-i",
    "--input",
    action="store",
    dest="input",
    required=True,
    type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")),
    help="Comma-separated list of files/directories with alignments")
parser.add_argument(
    "-o",
    "--output_directory",
    action="store",
    dest="output_dir",
    default="./",
    help="Output directory to write count files. Default - current directory")
parser.add_argument("-f",
                    "--format",
                    action="store",
                    dest="format",
                    default="fasta",
                    help="Format of alignments")
parser.add_argument("-g",
예제 #18
0
parser.add_argument("-a", "--starting_chunks_number", action="store", dest="starting_chunks_number", type=int,
                    help="Starting chunk number")
parser.add_argument("-b", "--ending_chunks_number", action="store", dest="ending_chunks_number", type=int,
                    help="Ending chunk number")
parser.add_argument("-n", "--chunks_number_list", action="store", dest="chunks_number_list",
                    type=lambda s: s.split(","),
                    help="Comma-separated list of  chunk numbers")

parser.add_argument("-o", "--output", action="store", dest="output", default=sys.stdout,
                    help="Output merged file")
parser.add_argument("-e", "--header_prefix", action="store", dest="header_prefix", default="#",
                    help="Header prefix")
parser.add_argument("-s", "--sorting_options", action="store", dest="sorting_options",
                    help="Sorting options for sort utility. Default: not set, i.e. no sort")
parser.add_argument("-r", "--separator", action="store", dest="separator", default="_",
                    help="Separator between prefix and chunk number in chunk filename. Default: '_'")

args = parser.parse_args()

FileRoutines.combine_chunks_with_header(args.chunks_dir,
                                        args.chunks_prefix,
                                        args.output,
                                        starting_chunk=args.starting_chunks_number,
                                        end_chunk=args.ending_chunks_number,
                                        chunk_number_list=args.chunks_number_list,
                                        chunks_suffix=args.chunks_suffix,
                                        header_prefix=args.header_prefix,
                                        sorting_options=args.sorting_options,
                                        separator=args.separator)
예제 #19
0
def make_list_of_path_to_files_from_comma_sep_string(string):
    return FileRoutines.make_list_of_path_to_files(string.split(","))
예제 #20
0
                    action="store",
                    dest="input",
                    required=True,
                    type=FileRoutines.make_list_of_path_to_files_from_string,
                    help="Comma-separated list of files/directories")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    default=sys.stdout,
                    help="Output merged file")
parser.add_argument("-e",
                    "--header_prefix",
                    action="store",
                    dest="header_prefix",
                    default="#",
                    help="Header prefix")
parser.add_argument(
    "-s",
    "--sorting_options",
    action="store",
    dest="sorting_options",
    help="Sorting options for sort utility. Default: not set, i.e. no sort")

args = parser.parse_args()

FileRoutines.combine_files_with_header(args.input,
                                       args.output,
                                       header_prefix=args.header_prefix,
                                       sorting_options=args.sorting_options)
예제 #21
0
                    type=FileRoutines.check_path,
                    help="Directory to write fam files named by species names")
parser.add_argument("-d", "--syn_file", action="store", dest="syn_file", required=True,
                    help="File with taxa ids and species names")
parser.add_argument("-k", "--key_index", action="store", dest="key_index", type=int, default=0,
                    help="Key column in file with synonyms(0-based). Default: 0")
parser.add_argument("-v", "--value_index", action="store", dest="value_index", type=int, default=1,
                    help="Value column in file with synonyms(0-based). Default: 1")
parser.add_argument("-c", "--comments_prefix", action="store", dest="comments_prefix", default="#",
                    help="Prefix of comments in synonyms file. Default - '#'")
parser.add_argument("-m", "--columns_separator", action="store", dest="separator", default="\t",
                    help="Column separator in file with synonyms")
parser.add_argument("-e", "--header", action="store_true", dest="header", default=False,
                    help="Header is present in synonyms file. Default - False")

args = parser.parse_args()

syn_dict = SynDict()
syn_dict.read(args.syn_file, header=args.header, separator=args.separator, key_index=args.key_index,
              value_index=args.value_index, comments_prefix=args.comments_prefix)

FileRoutines.safe_mkdir(args.output_files_dir)
input_files = os.listdir(args.input_files_dir)
for filename in input_files:
    directory, taxon_id, extension = FileRoutines.split_filename(filename)
    if taxon_id not in syn_dict:
        print("Species name was not found for taxon %s" % taxon_id)
        continue
    shutil.copy("%s%s" % (args.input_files_dir, filename),
                "%s%s%s" % (args.output_files_dir, syn_dict[taxon_id], extension))
예제 #22
0
                    action="store",
                    dest="syn_file",
                    required=True,
                    help="Synonym file")
parser.add_argument("-e",
                    "--separator",
                    action="store",
                    dest="separator",
                    default="\t",
                    help="Separator used in input file")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    default=None,
                    help="Output file")

args = parser.parse_args()

FileRoutines.replace_column_value_by_syn(args.input,
                                         args.syn_file,
                                         args.output,
                                         column=args.column_number,
                                         comment_prefix=None,
                                         separator=args.separator,
                                         syn_header=False,
                                         syn_separator="\t",
                                         syn_key_index=0,
                                         syn_value_index=1,
                                         syn_comment_prefix=None)
예제 #23
0
#!/usr/bin/env python
import os
from Bio import SeqIO
from RouToolPa.Routines import FileRoutines

workdir = "/home/mahajrod/Genetics/Projects/nxf/nxf_arthropoda/"
data_dir = "/home/mahajrod/Genetics/Projects/nxf/nxf_arthropoda/data/"

os.chdir(workdir)

data_files = FileRoutines.make_list_of_path_to_files([data_dir])

record_dict = SeqIO.index_db("tmp.idx", data_files, format="genbank")

print("#organism\ttaxonomy\tregion_id\ttranscript_id\tproduct\texon_len")
for record_id in record_dict:
    for feature in record_dict[record_id].features:
        if feature.type == "mRNA":
            mRNA_string = ""
            mRNA_string += "%s" % record_dict[record_id].annotations["organism"]
            mRNA_string += "\t%s" % (";".join(
                record_dict[record_id].annotations["taxonomy"]))
            mRNA_string += "\t%s" % record_id
            mRNA_string += "\t%s" % (feature.qualifiers["transcript_id"][0]
                                     if "transcript_id" in feature.qualifiers
                                     else ".")
            mRNA_string += "\t%s" % (feature.qualifiers["product"][0] if
                                     "product" in feature.qualifiers else ".")

            location_lenths = []
예제 #24
0
#!/usr/bin/env python
__author__ = 'Sergei F. Kliver'

import argparse
from RouToolPa.Routines import MultipleAlignmentRoutines, FileRoutines

parser = argparse.ArgumentParser()

parser.add_argument(
    "-i",
    "--input",
    action="store",
    dest="input",
    required=True,
    type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")),
    help="Comma-separated list of files/directories with alignments")
parser.add_argument(
    "-o",
    "--output_directory",
    action="store",
    dest="output_dir",
    default="./",
    help=
    "Output directory to write resulting files. Default - current directory")
parser.add_argument("-f",
                    "--format",
                    action="store",
                    dest="format",
                    default="fasta",
                    help="Format of alignments")
parser.add_argument("-g",
예제 #25
0
                    dest="separator",
                    default="@",
                    help="Separator to use. default - '@'")
parser.add_argument("-k",
                    "--key_column_index",
                    action="store",
                    dest="key_column_index",
                    type=int,
                    default=0,
                    help="Index of key column in synonym file. Default: 0")
parser.add_argument("-v",
                    "--value_column_index",
                    action="store",
                    dest="value_column_index",
                    type=int,
                    default=1,
                    help="Index of value column in synonym file.Default: 1")
args = parser.parse_args()

label = args.label if args.label else FileRoutines.split_filename(
    args.cluster_file)[1]

SequenceClusterRoutines.label_cluster_elements_from_file(
    args.cluster_file,
    label,
    args.output,
    separator=args.separator,
    label_position=args.label_position,
    key_index=args.key_column_index,
    value_index=args.value_column_index)
예제 #26
0
    "--min_species_number",
    action="store",
    dest="min_species_number",
    default=1,
    type=int,
    help="Minimum number of species with family to retain family. Default: 1")
parser.add_argument("-f",
                    "--filtered_families_directory",
                    action="store",
                    dest="filtered_family_dir",
                    default="filtered_fam",
                    type=FileRoutines.check_path,
                    help="Directory to write filtered_families")
args = parser.parse_args()

FileRoutines.safe_mkdir(args.filtered_family_dir)
species_list = sorted(args.species_set)
if args.white_list_file and args.black_list_file:
    raise ValueError("Black list and white list cant be set simultaneously")

black_list = IdList()
white_list = IdList()
if args.black_list_file:
    black_list.read(args.black_list_file)
if args.white_list_file:
    white_list.read(args.white_list_file)
out_fd = open(args.cafe_file, "w")
filtered_fd = open("%sfiltered_families.cafe" % args.filtered_family_dir, "w")
out_fd.write("FAMILYDESC\tFAMILY\t%s\n" % ("\t".join(species_list)))
filtered_fd.write("FAMILYDESC\tFAMILY\t%s\n" % ("\t".join(species_list)))
species_filtered_fd_list = OrderedDict()
예제 #27
0
#!/usr/bin/env python
__author__ = 'Sergei F. Kliver'
import argparse
from RouToolPa.Tools.MultipleAlignment import GUIDANCE2
from RouToolPa.Routines import FileRoutines


parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input", action="store", dest="input", required=True,
                    type=lambda x: FileRoutines.make_list_of_path_to_files(x.split(",")),
                    help="Comma-separated list of files or directory with files containing sequences to be aligned")
parser.add_argument("-p", "--processes", action="store", dest="processes", type=int, default=1,
                    help="Number of simultaneously running alignments")
parser.add_argument("-o", "--output_directory", action="store", dest="output",
                    type=FileRoutines.check_path,
                    required=True,
                    help="Output directory")
parser.add_argument("-d", "--handling_mode", action="store", dest="handling_mode", default="local",
                    help="Handling mode. Allowed: local(default), slurm")
parser.add_argument("-j", "--slurm_job_name", action="store", dest="slurm_job_name", default="JOB",
                    help="Slurm job name. Default: JOB")
parser.add_argument("-m", "--slurm_max_jobs", action="store", dest="slurm_max_jobs", default=1000, type=int,
                    help="Slurm max jobs. Default: 1000")
parser.add_argument("-y", "--slurm_log_prefix", action="store", dest="slurm_log_prefix",
                    help="Slurm log prefix. ")
parser.add_argument("-z", "--slurm_cmd_log_file", action="store", dest="slurm_cmd_log_file",
                    help="Slurm cmd logfile")
parser.add_argument("-l", "--slurm_error_log_prefix", action="store", dest="slurm_error_log_prefix",
                    help="Slurm error log prefix")
parser.add_argument("-e", "--max_memory_per_task", action="store", dest="max_memory_per_task", default="5000",
예제 #28
0
                    dest="indel_ReadPosRankSum",
                    type=float,
                    default=-20.0,
                    help="Indel ReadPosRankSum threshold. Default -   -20.0")
#parser.add_argument("--indel_InbreedingCoeff", action="store", dest="indel_InbreedingCoeff", type=float, default=-0.8,
#                    help="Indel InbreedingCoeff threshold. Default -   -0.8")
parser.add_argument("--indel_FS",
                    action="store",
                    dest="indel_FS",
                    type=float,
                    default=200.0,
                    help="Indel FS threshold. Default - 200.0")

args = parser.parse_args()

VariantFiltration.jar_path = FileRoutines.check_path(args.gatk_dir)

VariantFiltration.filter_bad_variants(
    args.reference,
    args.input_vcf,
    args.output_prefix,
    snp_filter_name=args.snp_filter_name,
    snp_QD=args.snp_QD,
    snp_FS=args.snp_FS,
    snp_MQ=args.snp_MQ,
    #snp_HaplotypeScore=args.snp_HaplotypeScore,
    snp_MappingQualityRankSum=args.snp_MappingQualityRankSum,
    snp_ReadPosRankSum=args.snp_ReadPosRankSum,
    indel_filter_name=args.indel_filter_name,
    indel_QD=args.indel_QD,
    indel_ReadPosRankSum=args.indel_ReadPosRankSum,
예제 #29
0
from RouToolPa.Routines import FileRoutines



parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input", action="store", dest="input", required=True,
                    help="Input file")
parser.add_argument("-o", "--output", action="store", dest="output", default=sys.stdout,
                    help="Output file")
parser.add_argument("-k", "--key_column", action="store", dest="key_column", type=int, required=True,
                    help="Column to be used as key(0-based). ")
parser.add_argument("-y", "--syn_file", action="store", dest="syn_file", required=True,
                    help="Synonym file with new column values")
parser.add_argument("-s", "--separator", action="store", dest="separator", default="\t",
                    help="Separator used in input file. Default: TAB")

parser.add_argument("-n", "--new_column_name", action="store", dest="new_column_name",
                    required=True,
                    help="Name of new column")

args = parser.parse_args()

FileRoutines.add_add_new_column_by_key_column(args.input,
                                              args.syn_file,
                                              args.key_column,
                                              args.output,
                                              new_column_name=args.new_column_name,
                                              separator=args.separator,
                                              absent_value=".")
예제 #30
0
#!/usr/bin/env python
__author__ = 'Sergei F. Kliver'
import os
import argparse
from Pipelines.TenX import TenXAlignmentPipeline
from RouToolPa.Routines import FileRoutines

parser = argparse.ArgumentParser()

parser.add_argument("-d",
                    "--sample_directory",
                    action="store",
                    dest="samples_dir",
                    required=True,
                    type=lambda s: FileRoutines.check_path(os.path.abspath(s)),
                    help="Directory with samples")
parser.add_argument(
    "-s",
    "--samples",
    action="store",
    dest="samples",
    type=lambda s: s.split(","),
    help="Comma-separated list of subdirectories(one per sample) to handle. "
    "If not set all subdirectories will be considered as containing samples")
parser.add_argument(
    "-o",
    "--output_dir",
    action="store",
    dest="output_dir",
    type=lambda s: FileRoutines.check_path(os.path.abspath(s)),
    default="./",