Example #1
0
#!/usr/bin/env python

__author__ = 'Chris Mitchell'

import sys
from pythomics.templates import CustomParser
import pythomics.parsers.fasta as fasta
import pythomics.genomics.parsers as gp

description = """
This script will incorporate the variants in a given VCF file into a specified
fasta file.
"""

parser = CustomParser(description = description)
parser.add_fasta(help="The fasta file to incorporate changes into.")
parser.add_out(help="The file to write resulting fasta file to.")
parser.add_vcf()

def main():
    args = parser.parse_args()
    file_name = args.file
    vcf = args.vcf
    snps = not args.no_snps
    dels = args.dels
    ins = args.ins
    homs = not args.no_homozygous
    hets = args.heterozygous
    individual = args.individual-1
    fasta_file = fasta.FastaIterator(file_name)
    vcf_file = gp.VCFIterator( vcf )
Example #2
0
from pythomics.templates import CustomParser
import sys
import argparse
import operator
import pythomics.parsers.fasta as fasta
import pythomics.genomics.parsers as gp

description = """
This script will incorporate the a given GFF file into a specified
fasta file. It can also incorporate variants given in a VCF file
while generating this fasta file.
"""

parser = CustomParser(description = description)
parser.add_fasta(help="The fasta file to reference.")
parser.add_out(help="The file to write resulting fasta file to.")
gff_group = parser.add_argument_group('GFF file related options')
gff_group.add_argument('--gff', help="The GFF file to use.", type=argparse.FileType('r'), required=True)
gff_group.add_argument('--group-on', help="The key to group entries together by (such as transcript_id)", type=str, default='ID')
gff_group.add_argument('--feature', help="The feature to use for fetching coordinates (such as CDS, does not apply with cufflinks flag).", type=str, default='')
gff_group.add_argument('--cufflinks', help="If the gff file is in the standard cufflinks output", action='store_true', default=False)
vcf_group = parser.add_argument_group('VCF file related options')
vcf_group.add_vcf()
vcf_group.add_argument('--variants-only', help="Only output transcripts with variants.", action='store_true', default=False)
splice_group = parser.add_argument_group('Splice Junction Options (if a variant falls over a exon-exon junction. Default is to ignore.)')
splice_group.add_argument('--splice-partial', help="Partially splice variants (only include exonic portions of variant)", action='store_true', default=False)


def main():
    args = parser.parse_args()
Example #3
0
#!/usr/bin/env python

description = """
This script will trim N's from the ends of a fasta/fastq file so it can be
aligned by tophat (which pukes if there are >5 N's. We remove them from the read
ends only)
"""

import sys, re, os, gzip
from itertools import izip
from multiprocessing import Pool
from pythomics.templates import CustomParser
parser = CustomParser(description=description)
parser.add_fasta()
parser.add_read_pair()
parser.add_out()
parser.add_argument(
    '--min-len',
    help="The minimum read length reads must be after trimming.",
    type=int,
    default=25)
parser.add_argument('--prefix',
                    help="If using paired reads, this is the filename prefix.",
                    type=str)
parser.add_argument('--quality',
                    help='If provided, remove qualities below a given score.',
                    type=int,
                    default=0)
parser.add_argument('--chunk',
                    help='How many reads to submit to each core.',
                    type=int,
Example #4
0
import argparse, sys, csv, copy, decimal, itertools, os, operator
try:
    import re2 as re
except ImportError:
    import re
from multiprocessing import Pool, Value
from collections import Counter
from pythomics.templates import CustomParser
import pythomics.proteomics.config as config
import pythomics.proteomics.digest as digest
import pythomics.parsers.fasta as fasta
from pythomics.utils import ColumnFunctions

parser = CustomParser(description = description)
parser.add_fasta(help="The fasta file to match peptides against.")
parser.add_out(help="The name of the file you wish to create with results appended.")
parser.add_argument('--peptide-out', nargs='?', help="The file to write digested products to.", type=argparse.FileType('w'), default=os.devnull)
parser.add_argument('--protein-out', nargs='?', help="The file to write grouped products to.", type=argparse.FileType('w'), default=os.devnull)
parser.add_argument('--strict', help='For numeric operations, fail if types are incorrect (converting NA to a float for instance).', action='store_true')
parser.add_delimited_file(cols=['--peptide-col'], col_default='Peptide')
parser.add_argument('-r', '--regex', help="A perl regular expression determining which parts of the header to capture.", type=str)
parser.add_argument('--inferred-name', help="The name you want to assign for protein inference (in case you are regexing for gene names or something).", type=str, default='Proteins')
parser.add_argument('--no-inference', help="Do not append proteins inferred from sequences.", action='store_true')
parser.add_argument('--no-equality', help="Do not consider Leucine and Isoleucine equal for peptide mapping.", action='store_true')
ibaq_group = parser.add_argument_group('iBAQ related options')
ibaq_group.add_argument('--ibaq', help="Provide to append iBAQ values as well (requires protein inference).", action='store_true')
ibaq_group.add_argument('--precursors', help="The column with precursor area (defaults to header lines containing 'Precursor').", type=str)
parser.add_column_function('', col_argument='--ibaq-function', group=ibaq_group, col_help="The function to apply to groups of iBAQ values (for multiple peptide matches).", parent=False)
ibaq_group.add_argument('--non-redundant', help="Use only non-redundant theoretical tryptic peptides for the iBAQ denominator.", action='store_true')
parser.add_enzyme(help="The enzyme used to digest the sample.")
Example #5
0
#!/usr/bin/env python

description = """
This script will digest a given fasta file with the specified enzymes. 
Both protein and nucleotide fasta files are valid inputs, and when
digesting fasta files, it is possible to create 6 frame as well as 
3 frame translations.
"""

import argparse, sys, itertools
from pythomics.templates import CustomParser
import pythomics.proteomics.digest as digest
import pythomics.parsers.fasta as fasta

parser = CustomParser(description = description)
parser.add_fasta()
parser.add_argument('-t', '--type', help="The type of fasta file (default protein).", choices=['prot','nt'], type=str, default='prot')
parser.add_argument('--frame', help="If using a nucleotide file, translate in how many frames?", choices=[1,3,6], type=int)
parser.add_argument('--genome', help="Are we translating a genome? This will keep chromosome positions in the header.", action='store_true', default=False)
parser.add_out()
parser.add_enzyme()
parser.add_argument('--unique', help="Only return unique peptides per cleavage", action='store_true', default=False)

def main():
    args = parser.parse_args()
    file_name = args.fasta
    enzyme_choice = args.enzyme
    digest_type = args.type
    digest_frame = args.frame
    digest_negative = False
    if digest_frame == 6:
Example #6
0
import argparse, sys, csv, copy, decimal, itertools, os, operator
try:
    import re2 as re
except ImportError:
    import re
from multiprocessing import Pool, Value
from collections import Counter
from pythomics.templates import CustomParser
import pythomics.proteomics.config as config
import pythomics.proteomics.digest as digest
import pythomics.parsers.fasta as fasta
from pythomics.utils import ColumnFunctions

parser = CustomParser(description=description)
parser.add_fasta(help="The fasta file to match peptides against.")
parser.add_out(
    help="The name of the file you wish to create with results appended.")
parser.add_argument('--peptide-out',
                    nargs='?',
                    help="The file to write digested products to.",
                    type=argparse.FileType('w'),
                    default=os.devnull)
parser.add_argument('--protein-out',
                    nargs='?',
                    help="The file to write grouped products to.",
                    type=argparse.FileType('w'),
                    default=os.devnull)
parser.add_argument(
    '--strict',
    help=
Example #7
0
#!/usr/bin/env python

__author__ = 'Chris Mitchell'

import sys
from pythomics.templates import CustomParser
import pythomics.parsers.fasta as fasta
import pythomics.genomics.parsers as gp

description = """
This script will incorporate the variants in a given VCF file into a specified
fasta file.
"""

parser = CustomParser(description = description)
parser.add_fasta(help="The fasta file to incorporate changes into.")
parser.add_out(help="The file to write resulting fasta file to.")
parser.add_vcf()

def main():
    args = parser.parse_args()
    file_name = args.fasta
    vcf = args.vcf
    snps = args.no_snps
    dels = args.dels
    ins = args.ins
    homs = args.no_homozygous
    hets = args.heterozygous
    individual = args.individual-1
    fasta_file = fasta.FastaIterator(file_name)
    vcf_file = gp.VCFIterator( vcf )