Ejemplo n.º 1
0
parser.add_argument("-i",
                    "--input",
                    action="store",
                    dest="input",
                    required=True,
                    help="Input file with AUGUSTUS evidence")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="Output file")
parser.add_argument("-d",
                    "--id_file",
                    action="store",
                    dest="id_file",
                    required=True,
                    help="File with ids to extract")
parser.add_argument("-m",
                    "--mode",
                    action="store",
                    dest="mode",
                    default="transcript",
                    help="Prefix of output files. Default - transcript")
args = parser.parse_args()

AUGUSTUS.extract_evidence_by_ids(args.input,
                                 args.id_file,
                                 args.output,
                                 mode=args.mode)
Ejemplo n.º 2
0
args = parser.parse_args()

top_hits_gff = "%s.target.top_hits.gff" % args.output_prefix
secondary_hits_gff = "%s.target.secondary_hits.gff" % args.output_prefix
top_hits_gff_hints = "%s.target.top_hits.hints.gff" % args.output_prefix
secondary_hits_gff_hints = "%s.target.secondary_hits.hints.gff" % args.output_prefix

Exonerate.extract_top_hits_from_target_gff(
    args.input,
    top_hits_gff,
    secondary_hits_gff,
    id_white_list_file=args.white_id_file,
    max_hits_per_query=args.max_hits_per_query)

AUGUSTUS.path = args.augustus_script_dir
AUGUSTUS.exonerate_to_hints(top_hits_gff,
                            top_hits_gff_hints,
                            priority=args.top_hits_priority,
                            min_intron_len=args.min_intron_len,
                            max_intron_len=args.max_intron_len,
                            CDS_part_cutoff=args.top_hits_CDS_part_cutoff,
                            source=args.source_for_top_hits)

AUGUSTUS.exonerate_to_hints(
    secondary_hits_gff,
    secondary_hits_gff_hints,
    priority=args.secondary_hits_priority,
    min_intron_len=args.min_intron_len,
    max_intron_len=args.max_intron_len,
    CDS_part_cutoff=args.secondary_hits_CDS_part_cutoff,
    source=args.source_for_secondary_hits)
Ejemplo n.º 3
0
CDS_gff = "%s.CDS.gff" % args.output
CDS_masked_gff = "%s.CDS.masked.gff" % args.output
all_annotated_genes_ids = "%s.genes.all.ids" % args.output
genes_masked_ids = "%s.genes.masked.ids" % args.output
genes_not_masked_ids = "%s.genes.not.masked.ids" % args.output
final_genes_ids = "%s.genes.final.ids" % args.output

final_gff = "%s.final.gff" % args.output
final_CDS_gff = "%s.final.CDS.gff" % args.output

AUGUSTUS.path = args.augustus_dir
AUGUSTUS.threads = args.threads

print("Annotating genes...")
"""
AUGUSTUS.parallel_predict(args.species, args.input, output_raw_gff, strand=args.strand, gene_model=args.gene_model,
                          output_gff3=True, other_options=args.other_options, config_dir=args.config_dir,
                          use_softmasking=args.softmasking, hints_file=args.hintsfile,
                          extrinsicCfgFile=args.extrinsicCfgFile, predict_UTR=args.predict_UTR)

AUGUSTUS.replace_augustus_ids(output_raw_gff, args.output, species_prefix=args.species_prefix,
                              number_of_digits_in_id=8)

Gffread.extract_transcript_sequences(output_gff, args.input, args.output)

SequenceRoutines.trim_cds_and_remove_terminal_stop_codons("%s.cds" % args.output, "%s.trimmed.cds" % args.output,
                                                          stop_codons_list=("TGA", "TAA", "TAG")) # using default stop_codons(from universal genetic_code)/ Note that this will affect mtDNA proteins
SequenceRoutines.translate_sequences_from_file("%s.trimmed.cds" % args.output, "%s.trimmed.pep" % args.output,
                                               format="fasta", id_expression=None,
                                               genetic_code_table=1, translate_to_stop=False,
Ejemplo n.º 4
0
#!/usr/bin/env python
__author__ = 'Sergei F. Kliver'
import argparse
from Pantera.Tools.Annotation import AUGUSTUS

parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input_gff", action="store", dest="input_gff", required=True,
                    help="Input gff from AUGUSTUS")
parser.add_argument("-o", "--output_prefix", action="store", dest="output_prefix", required=True,
                    help="Prefix of output files")
parser.add_argument("-s", "--species_prefix", action="store", dest="species_prefix", required=True,
                    help="Species prefix for ids")
parser.add_argument("-d", "--number_of_digits_in_id", action="store", dest="number_of_digits_in_id", type=int,
                    default=8, help="Number of digits in ids. Default - 8")

args = parser.parse_args()

AUGUSTUS.replace_augustus_ids(args.input_gff, args.output_prefix, species_prefix=args.species_prefix,
                              number_of_digits_in_id=args.number_of_digits_in_id)
#!/usr/bin/env python
__author__ = 'Sergei F. Kliver'

import argparse
from Pantera.Tools.Annotation import AUGUSTUS

parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input", action="store", dest="input", required=True,
                    help="Gtf or gff file with augustus output")
parser.add_argument("-o", "--output", action="store", dest="output", required=True,
                    help="File to write proteins in fasta format")
parser.add_argument("-d", "--id_prefix", action="store", dest="id_prefix", default="",
                    help="Prefix to use for protein ids")

parser.add_argument("-s", "--stat_file", action="store", dest="stat_file",
                    help="File to write statistics about annotations")
parser.add_argument("-u", "--supported_stat_file", action="store", dest="supp_stat_file",
                    help="File to write statistics about annotations supported by hints")
parser.add_argument("-c", "--complete_protein_ids", action="store", dest="complete_protein_id_file",
                    help="File to write ids of complete proteins")

args = parser.parse_args()

AUGUSTUS.extract_proteins_from_output(args.input, args.output, id_prefix=args.id_prefix,
                                      evidence_stats_file=args.stat_file,
                                      supported_by_hints_file=args.supp_stat_file,
                                      complete_proteins_id_file=args.complete_protein_id_file)
                    "--output_gff",
                    action="store",
                    dest="output_gff",
                    required=True,
                    help="Output gff with replaced ids")
parser.add_argument("-g",
                    "--gene_syn_file",
                    action="store",
                    dest="gene_syn_file",
                    required=True,
                    help="File with gene synonyms")
parser.add_argument("-t",
                    "--transcript_syn_file",
                    action="store",
                    dest="transcript_syn_file",
                    required=True,
                    help="File with transcript synonyms")
parser.add_argument("-c",
                    "--cds_syn_file",
                    action="store",
                    dest="cds_syn_file",
                    help="File with CDS synonyms")

args = parser.parse_args()

AUGUSTUS.replace_augustus_ids_by_syn(args.input_gff,
                                     args.output_gff,
                                     args.gene_syn_file,
                                     args.transcript_syn_file,
                                     cds_syn_file=args.cds_syn_file)
    "--min_supporting_uniquely_mapped_reads",
    action="store",
    dest="min_supporting_uniquely_mapped_reads",
    default=1,
    type=int,
    help=
    "Minimum number of uniquely mapped reads supporting reads to retain junction. "
    "Default: 1, i.e. only junctions supported by at least one uniquely mapped read will be "
    ".To retain all junctions set to 0")
parser.add_argument("-s",
                    "--source",
                    action="store",
                    dest="source",
                    default="RNASEQ",
                    help="Source of hints. Default: RNASEQ")
parser.add_argument("-p",
                    "--priority",
                    action="store",
                    dest="priority",
                    default=100,
                    type=int,
                    help="Priority of hints. Default: 100")
args = parser.parse_args()

AUGUSTUS.convert_star_junctions_to_intron_hints(
    args.input,
    args.output,
    min_supporting_uniquely_mapped_reads=args.
    min_supporting_uniquely_mapped_reads,
    source=args.source,
    priority=100)
Ejemplo n.º 8
0
import argparse
from Pantera.Tools.Annotation import AUGUSTUS

parser = argparse.ArgumentParser()

parser.add_argument("-i",
                    "--input",
                    action="store",
                    dest="input",
                    required=True,
                    help="Input evidence file")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write filtered evidence file")
parser.add_argument("-m",
                    "--min_fraction",
                    action="store",
                    dest="min_fraction",
                    default=0,
                    type=float,
                    help="Minimum fraction of transcript supported by hints")

args = parser.parse_args()

AUGUSTUS.extract_longest_isoforms(args.input,
                                  args.output,
                                  minimum_supported_fraction=args.min_fraction)
Ejemplo n.º 9
0
#!/usr/bin/env python
__author__ = 'Sergei F. Kliver'

import argparse

from Pantera.Tools.Annotation import AUGUSTUS

parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input", action="store", dest="input", required=True,
                    help="Input evidence file")
parser.add_argument("-o", "--output_prefix", action="store", dest="output_prefix", required=True,
                    help="Prefix of output files")

args = parser.parse_args()

AUGUSTUS.draw_evidence_figures(args.input, args.output_prefix)
                    help="Output file with synonyms")
parser.add_argument("-p",
                    "--id_prefix",
                    action="store",
                    dest="id_prefix",
                    required=True,
                    help="Prefix of id")
parser.add_argument("-n",
                    "--number_of_digits_in_number",
                    action="store",
                    dest="number_of_digits_in_number",
                    type=int,
                    default=8,
                    help="Number of digits in id. Default - 8")
parser.add_argument(
    "-f",
    "--feature_type",
    action="store",
    dest="feature_type",
    default="gene",
    help="Type of feature to assign synonyms. Default - 'gene'")

args = parser.parse_args()

AUGUSTUS.assign_synonyms_to_features_from_augustus_gff(
    args.input_gff,
    args.output,
    args.id_prefix,
    number_of_digits_in_number=args.number_of_digits_in_number,
    feature_type=args.feature_type)