Beispiel #1
0
    def extract_proteins_from_alignments(self, dir_with_alignments, output_dir):
        out_dir = self.check_path(output_dir)

        #print type(FileRoutines)

        input_files = self.make_list_of_path_to_files([dir_with_alignments] if isinstance(dir_with_alignments, str) else dir_with_alignments)

        self.safe_mkdir(out_dir)
        from RouToolPa.Routines import MultipleAlignmentRoutines
        for filename in input_files:
            filename_list = self.split_filename(filename)
            output_file = "%s%s%s" % (out_dir, filename_list[1], filename_list[2])
            MultipleAlignmentRoutines.extract_sequences_from_alignment(filename, output_file)
Beispiel #2
0
args = parser.parse_args()

unique_position_dict = TwoLvlDict()

FileRoutines.safe_mkdir(args.output_dir)

for alignment_file in args.input:
    alignment_name_list = FileRoutines.split_filename(alignment_file)
    output_prefix = "%s/%s.unique_positions" % (args.output_dir,
                                                alignment_name_list[1])

    unique_position_dict[alignment_name_list[
        1]] = MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file(
            alignment_file,
            output_prefix,
            format=args.format,
            gap_symbol="-",
            return_mode="relative",
            verbose=False)

species_list = unique_position_dict.sl_keys()

data_dict = OrderedDict()

for species in species_list:
    data_dict[species] = []
    for alignment in unique_position_dict:
        data_dict[species].append(unique_position_dict[alignment][species])

data_list = [data_dict[species] for species in data_dict]
Beispiel #3
0
                    required=True,
                    help="Output file with protein alignment")
parser.add_argument("-f",
                    "--format",
                    action="store",
                    dest="format",
                    default="fasta",
                    help="Format of alignments. Default: fasta")
parser.add_argument("-g",
                    "--gap_symbol",
                    action="store",
                    dest="gap_symbol",
                    default="-",
                    help="Gap symbol in alignment. Default: '-'")
parser.add_argument(
    "-t",
    "--genetic_code",
    action="store",
    dest="genetic_code",
    default=1,
    type=int,
    help="Genetic code to use(NCBI tables) . Default: 1(standart)")

args = parser.parse_args()

MultipleAlignmentRoutines.translate_codon_alignment(args.codon_alignment,
                                                    args.protein_alignment,
                                                    format=args.format,
                                                    gap_symbol=args.gap_symbol,
                                                    table=args.genetic_code)
Beispiel #4
0
parser.add_argument(
    "-t",
    "--type",
    action="store",
    dest="type",
    default="nucleotide",
    help="Alignment type. Allowed: nucleotide(default), codon, protein")
parser.add_argument(
    "-l",
    "--flank_length",
    action="store",
    dest="flank_length",
    default=0,
    type=int,
    help=
    "Flank length. Default: 0, i.e no flanks will be included in the output file"
)

args = parser.parse_args()

MultipleAlignmentRoutines.get_specific_positions(
    args.input,
    args.reference_sequence_id,
    args.position_list,
    args.output_prefix,
    format=args.format,
    gap_symbol=args.gap_symbol,
    verbose=True,
    alignment_type=args.type,
    flank_length=args.flank_length)
Beispiel #5
0
__author__ = 'Sergei F. Kliver'

import argparse
from RouToolPa.Routines import MultipleAlignmentRoutines

parser = argparse.ArgumentParser()

parser.add_argument("-i",
                    "--input",
                    action="store",
                    dest="input",
                    required=True,
                    help="File with alignment")
parser.add_argument("-c",
                    "--coordinates",
                    action="store",
                    dest="coordinates",
                    required=True,
                    help="File with coordinates of gene alignments")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write alignment prepared for Codeml")

args = parser.parse_args()

MultipleAlignmentRoutines.prepare_multigene_alignment_for_codeml(
    args.input, args.coordinates, args.output, format="fasta")
Beispiel #6
0
#!/usr/bin/env python
__author__ = 'Sergei F. Kliver'

import argparse
from RouToolPa.Routines import MultipleAlignmentRoutines


parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input", action="store", dest="input", required=True,
                    help="Input file with alignment")
parser.add_argument("-o", "--output_prefix", action="store", dest="output_prefix", required=True,
                    help="Prefix of output files")
parser.add_argument("-f", "--format", action="store", dest="format", default="fasta",
                    help="Alignment format. Default: fasta")
parser.add_argument("-g", "--genetic_code_table", action="store", dest="genetic_code_table", type=int,
                    default=1,
                    help="Genetic code table number")
parser.add_argument("-r", "--remove_Ns", action="store_true", dest="remove_Ns", default=False,
                    help="Remove codon columns with Ns. Default:False")

args = parser.parse_args()

MultipleAlignmentRoutines.extract_degenerate_sites_from_codon_alignment_from_file(args.input, args.output_prefix,
                                                                                  genetic_code_table=args.genetic_code_table,
                                                                                  format=args.format,
                                                                                  remove_codon_columns_with_Ns=args.remove_Ns)
Beispiel #7
0
from RouToolPa.Routines import MultipleAlignmentRoutines

parser = argparse.ArgumentParser()

parser.add_argument("-i",
                    "--input",
                    action="store",
                    dest="input",
                    required=True,
                    help="Input file with codon alignment")

parser.add_argument("-o",
                    "--output_prefix",
                    action="store",
                    dest="output_prefix",
                    required=True,
                    help="Prefix of output files")

parser.add_argument("-f",
                    "--format",
                    action="store",
                    dest="format",
                    default="fasta",
                    help="Format of thr input alignment. Default: fasta")

args = parser.parse_args()

MultipleAlignmentRoutines.extract_codon_positions_from_file(args.input,
                                                            args.output_prefix,
                                                            format=args.format)
Beispiel #8
0
                    type=lambda x: FileRoutines.make_list_of_path_to_files(x.split(",")),
                    help="Comma-separated list of files or directory with files "
                         "containing alignments(one alignment per file)")
parser.add_argument("-n", "--max_gap_number", action="store", dest="max_gap_number", default=0, type=int,
                    help="Maximum number of gaps to retain column")
parser.add_argument("-o", "--output_directory", action="store", dest="output", type=FileRoutines.check_path,
                    help="Output directory")
parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-",
                    help="Gap symbol")
parser.add_argument("-s", "--suffix", action="store", dest="suffix", default=".gaps_removed",
                    help="Suffix to use in output files. Default: '.gaps_removed'")
parser.add_argument("-f", "--format", action="store", dest="format", default="fasta",
                    help="Format of alignment")
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose",
                    help="Print not found ids. Default - no")

args = parser.parse_args()

FileRoutines.safe_mkdir(args.output)

for alignment_file in args.input:
    splited_filename = FileRoutines.split_filename(alignment_file)
    if args.verbose:
        print ("Handling %s ..." % alignment_file)
    output_filename = "%s%s%s%s" % (args.output, splited_filename[1], args.suffix, splited_filename[2])
    alignment = AlignIO.read(alignment_file, args.format)
    filtered_alignment = MultipleAlignmentRoutines.remove_columns_with_gaps(alignment, args.max_gap_number,
                                                                            gap_symbol=args.gap_symbol)
    AlignIO.write(filtered_alignment, output_filename, args.format)

Beispiel #9
0
    "--output_directory",
    action="store",
    dest="output_dir",
    default="./",
    help=
    "Output directory to write resulting files. Default - current directory")
parser.add_argument("-f",
                    "--format",
                    action="store",
                    dest="format",
                    default="fasta",
                    help="Format of alignments")
parser.add_argument("-g",
                    "--gap_symbol",
                    action="store",
                    dest="gap_symbol",
                    default="-",
                    help="Gap symbol. Default - '-'")

args = parser.parse_args()

for alignment_file in args.input:
    alignment_name_list = FileRoutines.split_filename(alignment_file)
    output_file = "%s/%s.position_matrix" % (args.output_dir,
                                             alignment_name_list[1])
    MultipleAlignmentRoutines.get_position_presence_matrix_fom_file(
        alignment_file,
        output_file,
        format=args.format,
        gap_symbol=args.gap_symbol)
Beispiel #10
0
                    dest="format",
                    default="fasta",
                    help="Alignment file format. Default: fasta")
parser.add_argument(
    "-a",
    "--align_variants",
    action="store_true",
    dest="align_variants",
    help="Align variants between species by coordinate. Default: False")
parser.add_argument(
    "-t",
    "--target_sequence_id",
    action="store",
    dest="target_sequence_id",
    help="Target sequence id. Variants specific for this sequence will be "
    "extracted into separated file. Default: not set")
args = parser.parse_args()

MultipleAlignmentRoutines.call_variants_from_multiple_alignment_from_file(
    args.input,
    args.output_prefix,
    args.reference_sequence_id,
    gap_symbol=args.gap_symbol,
    verbose=True,
    format="fasta",
    align_variants=args.align_variants,
    output_type="hgvs",
    variant_separator=",",
    target_sequence_id=args.target_sequence_id,
    absent_symbol="")
Beispiel #11
0
    "-t",
    "--type",
    action="store",
    dest="type",
    default="nucleotide",
    help="Alignment type. Allowed: nucleotide(default), codon, protein")
parser.add_argument(
    "-l",
    "--flank_length",
    action="store",
    dest="flank_length",
    default=0,
    type=int,
    help=
    "Flank length. Default: 0, i.e no flanks will be included in the output file"
)

args = parser.parse_args()

MultipleAlignmentRoutines.get_specific_positions_for_multiple_files(
    args.input_dir,
    args.position_file,
    args.reference_sequence_id,
    args.output_dir,
    alignment_file_suffix=args.alignment_file_suffix,
    format=args.format,
    gap_symbol=args.gap_symbol,
    verbose=True,
    alignment_type=args.type,
    flank_length=args.flank_length)
Beispiel #12
0
                    help="Format of alignments. Default: fasta")
parser.add_argument("-n",
                    "--cds_seqs_format",
                    action="store",
                    dest="cds_format",
                    default="fasta",
                    help="Format of cds sequences. Default: fasta")
parser.add_argument(
    "-i",
    "--cds_index_file",
    action="store",
    dest="cds_index",
    help="Biopython index of cds files. Default - construct new")
parser.add_argument(
    "-r",
    "--retain_cds_index",
    action="store_true",
    dest="retain_cds_index",
    help="Retain constructed index after analysis. Default - False")
args = parser.parse_args()

MultipleAlignmentRoutines.get_codon_alignment_from_files(
    args.pep_alignment,
    args.cds_seqs,
    args.output,
    cds2protein_accordance_file=args.accordance_file,
    alignment_format=args.alignment_format,
    nucleotide_sequence_format=args.cds_format,
    cds_index_file=args.cds_index,
    retain_cds_index=args.retain_cds_index)
Beispiel #13
0
parser.add_argument(
    "-o",
    "--output_directory",
    action="store",
    dest="output_dir",
    default="./",
    help=
    "Output directory to write resulting files. Default - current directory")
parser.add_argument("-f",
                    "--format",
                    action="store",
                    dest="format",
                    default="fasta",
                    help="Format of alignments")
parser.add_argument("-g",
                    "--gap_symbol",
                    action="store",
                    dest="gap_symbol",
                    default="-",
                    help="Gap symbol. Default - '-'")

args = parser.parse_args()

for alignment_file in args.input:
    alignment_name_list = FileRoutines.split_filename(alignment_file)
    output_prefix = "%s/%s.unique_positions" % (args.output_dir,
                                                alignment_name_list[1])

    MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file(
        alignment_file, output_prefix, format=args.format, gap_symbol="-")
Beispiel #14
0
                    "--genetic_code_table",
                    action="store",
                    dest="genetic_code_table",
                    type=int,
                    default=1,
                    help="Genetic code table number")
parser.add_argument("-a",
                    "--gap_symbol_list",
                    action="store",
                    dest="gap_symbol_list",
                    type=lambda s: s.split(","),
                    default=["-"],
                    help="Comma-separated list of gap symbols. Default: '-'")
parser.add_argument("-b",
                    "--use_ambiguous_table",
                    action="store_true",
                    dest="use_ambiguous_table",
                    default=False,
                    help="Use ambiguous codon table. Default:False")

args = parser.parse_args()

MultipleAlignmentRoutines.count_dNdS_by_reference_seq_in_codon_alignment_from_file(
    args.input,
    args.ref_seq_id,
    genetic_code_table=args.genetic_code_table,
    gap_symbol_list=args.gap_symbol_list,
    use_ambigious_table=args.ambigious_table,
    output_file=args.output,
    format="fasta")
Beispiel #15
0
"""
def expression_hsp(hsp):
    # hit_span - length of hit for single-fragment HSP(blast etc). DO NOT work with exonerate
    return (hsp.evalue <= args.max_e_value) and (hsp.hit_span >= args.min_alignment_length)


def iterator(blast_results):
    for query_id in blast_results:
        filtered_query = blast_results[query_id].hsp_filter(func=expression_hsp)
        if filtered_query:
            yield filtered_query
"""
print("Parsing input file...")
blast_results = SearchIO.index(args.input, args.format)

gi_ids_list = map(lambda x: x.split("|")[1], MultipleAlignmentRoutines.get_db_ids(blast_results))
#print(gi_ids_list)
print("Downloading sequence summaries...")
handle = Entrez.esummary(db=args.db, id=",".join(gi_ids_list))
summaries_list = Entrez.read(handle)
tax_id_list = set()
with open(args.out_prefix + ".taxid", "w") as out_fd:
    for record in summaries_list:
        if "TaxId" in record:
            tax_id_list.add(str(record["TaxId"]))
            out_fd.write(str(record["TaxId"]) + "\n")
print("Downloading species names...")
taxa_handle = Entrez.esummary(db="taxonomy", id=",".join(tax_id_list))
taxa_list = Entrez.read(taxa_handle)
with open(args.out_prefix + ".sciname", "w") as taxa_fd:
    with open(args.out_prefix + ".commonname", "w") as com_fd:
Beispiel #16
0
                    required=True,
                    help="Output file")
parser.add_argument("-f",
                    "--format",
                    action="store",
                    dest="format",
                    default="fasta",
                    help="Alignment format. Default: fasta")
parser.add_argument("-r",
                    "--remove_Ns",
                    action="store_true",
                    dest="remove_Ns",
                    default=False,
                    help="Remove columns with Ns. Default:False")
parser.add_argument("-a",
                    "--remove_columns_with_ambigous_nucleotides",
                    action="store_true",
                    dest="remove_columns_with_ambigous_nucleotides",
                    default=False,
                    help="Remove columns with a. Default:False")

args = parser.parse_args()

MultipleAlignmentRoutines.extract_variable_sites_from_alignment_from_file(
    args.input,
    args.output,
    format=args.format,
    remove_columns_with_Ns=args.remove_Ns,
    remove_columns_with_ambigous_nucleotides=args.
    remove_columns_with_ambigous_nucleotides)
Beispiel #17
0
    required=True,
    type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")),
    help="Comma-separated list of files/directories with alignments")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write merged alignment")
parser.add_argument(
    "-c",
    "--coordinates_file",
    action="store",
    dest="coords_file",
    required=True,
    help="File to write file with coordinates of alignments in merged alignment"
)
parser.add_argument("-f",
                    "--format",
                    action="store",
                    dest="format",
                    default="fasta",
                    help="Format of alignments")

args = parser.parse_args()

MultipleAlignmentRoutines.merge_alignment(args.input,
                                          args.output,
                                          args.coords_file,
                                          format=args.format)