コード例 #1
0
                    default="./",
                    help="Directory to write output")
parser.add_argument("-m",
                    "--min_len",
                    action="store",
                    dest="min_len",
                    type=int,
                    default=1,
                    help="Minimum length of read to output")

args = parser.parse_args()
n_regexp = re.compile("N+$")

if args.input_se:

    se_directory, se_prefix, se_extension = split_filename(args.input_se)
    se_in_fd = open(args.input_se, "r")
    se_out_file = "%s%s.filtered%s" % (check_path(
        args.out_dir), se_prefix, se_extension)
    se_out_fd = open(se_out_file, "w")

    while True:
        name, sequence, separator, quality = read_entry(se_in_fd)
        if name is None:
            break
        match = n_regexp.search(sequence)
        if match is None:
            se_out_fd.write("%s\n%s\n%s\n%s\n" %
                            (name, sequence, separator, quality))
        elif match.start() >= args.min_len:
            se_out_fd.write("%s\n%s\n%s\n%s\n" %
コード例 #2
0
                    help="Remove nucleotide substitutions from output(preserve only AA substitutions)")
parser.add_argument("-c", "--convert_aa_to_single_letter", action="store_true", dest="convert_to_single_letter",
                    help="Convert aminoacids to single letters")

args = parser.parse_args()

args.input = make_list_of_path_to_files(args.input)

gene_alias_dict = SynDict()
if args.gene_alias_file:
    gene_alias_dict.read(args.gene_alias_file, split_values=False)
out_fd = sys.stdout if args.output == "stdout" else open(args.output, "w")

summary_dict = TwoLvlDict()
for filename in args.input:
    directory, prefix, extension = split_filename(filename)

    if args.write_dir_path and args.write_ext:
        name = filename
    elif args.write_dir_path:
        name = (directory + prefix) if directory else prefix
    elif args.write_ext:
        name = prefix + extension
    else:
        name = prefix
        if args.suffix_to_remove in name:
            name = name.replace(args.suffix_to_remove, "")
    summary_dict[name] = OrderedDict()
    with open(filename, "r") as file_fd:
        file_fd.readline()
        for line in file_fd:
コード例 #3
0
if args.threads == 1:
    TRF.search_tandem_repeats(
        args.input_file,
        matching_weight=args.matching_weight,
        mismatching_penalty=args.mismatching_penalty,
        indel_penalty=args.indel_penalty,
        match_probability=args.matching_probability,
        indel_probability=args.indel_probability,
        min_alignment_score=args.min_score,
        max_period=args.max_period_size,
        report_flanking_sequences=args.report_flanking_sequences,
        make_dat_file=True,
        disable_html_output=args.enable_html_output)

    trf_report = "%s.%i.%i.%i.%i.%i.%i.%i.dat" % (
        split_filename(args.input_file)[1] +
        split_filename(args.input_file)[2], args.matching_weight,
        args.mismatching_penalty, args.indel_penalty,
        args.matching_probability, args.indel_probability, args.min_score,
        args.max_period_size)
    TRF.convert_trf_report(trf_report, args.output_prefix)

else:

    TRF.parallel_search_tandem_repeat(
        args.input_file,
        args.output_prefix,
        matching_weight=args.matching_weight,
        mismatching_penalty=args.mismatching_penalty,
        indel_penalty=args.indel_penalty,
        match_probability=args.matching_probability,
コード例 #4
0
parser = argparse.ArgumentParser()

parser.add_argument("-i", "--tree_dir", action="store", dest="tree_dir", required=True, type=check_path,
                    help="Directory with trees")
parser.add_argument("-f", "--tree_format", action="store", dest="tree_format", default=1, type=int,
                    help="Format of input trees")
parser.add_argument("-o", "--output_file", action="store", dest="output_file", default="stdout",
                    help="Output file with leaves of trees. Default: stdout")

args = parser.parse_args()

out_fd = sys.stdout if args.output_file == "stdout" else open(args.output_file, "w")

tree_files_list = os.listdir(args.tree_dir)

names_dict = SynDict()

for tree_file in tree_files_list:
    tree_name = split_filename(tree_file)[1]
    with open("%s%s" % (args.tree_dir, tree_file), "r") as tree_fd:
        tree = Tree(tree_fd.readline().strip(), format=args.tree_format)
    leaves_list = []
    for node in tree.traverse():
        if node.is_leaf():
            leaves_list.append(node.name)
    names_dict[tree_name] = leaves_list

names_dict.write(args.outp_fd, splited_values=True)
if args.output_file != "stdout":
    out_fd.close()
コード例 #5
0
        abs_path_source_reads.append("%s/%s" % (working_dir, filename))
"""

for iteration_index in range(1, args.number_of_iterations):

    os.chdir(working_dir)

    iteration = "iteration_%i" % iteration_index
    iteration_dir = "%s/%s" % (working_dir, iteration)
    iteration_ref = "%s/%s_reference.fasta" % (iteration_dir, iteration)
    iteration_ref_index = "%s/%s_reference.idx" % (iteration_dir, iteration)
    base_prefix = "%s/%s_reference_with_rev_com" % (iteration_dir, iteration)
    iteration_ref_with_rev_com = "%s/%s_reference_with_rev_com.fasta" % (iteration_dir, iteration)
    kmer_file = "%s_%i_mer.kmer" % (base_prefix, args.kmer_length)
    masurca_config_file = "masurca_%s.config" % iteration
    left_reads_prefix = split_filename(abs_path_left_source_reads)[1]
    right_reads_prefix = split_filename(abs_path_right_source_reads)[1]

    left_reads_se = "%s.se.fastq" % left_reads_prefix
    right_reads_se = "%s.se.fastq" % right_reads_prefix
    left_reads_filtered = "%s.filtered.fastq" % left_reads_prefix
    right_reads_filtered = "%s.filtered.fastq" % right_reads_prefix

    try:
        os.mkdir(iteration_dir)
    except OSError:
        pass

    shutil.copyfile(iteration_reference_file, iteration_ref)
    os.chdir(iteration_dir)
    iteration_reference_dict = SeqIO.index_db(iteration_ref_index, iteration_ref, format="fasta")
コード例 #6
0
                    help="Suffix of fam files. Default: .fam")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    default="stdout",
                    help="Suffix of fam files")

args = parser.parse_args()

out_fd = sys.stdout if args.output == "stdout" else open(args.output, "w")
species_list = []
suffix_list = []
if args.use_basename:
    for filename in sorted(os.listdir(args.input)):
        dir, basename, ext = split_filename(filename)
        species_list.append(basename)
        suffix_list.append("%s" % ext)
else:
    species_list = sorted(args.species_set)
    suffix_list = [args.suffix for i in range(0, len(species_list))]

out_fd.write("#species\tnumber_of_families\tnumber_of_proteins\n")
for species, suffix in zip(species_list, suffix_list):
    fam_dict = SynDict()
    fam_dict.read("%s%s%s" % (args.input, species, suffix),
                  separator="\t",
                  split_values=True,
                  values_separator=",",
                  key_index=0,
                  value_index=1)