Python Seq.translate Examples, Bio.Seq.translate Python Examples

Example #1

0

Show file

File: ORF.py Project: Jome0169/RosalindProblems

def ReadingFrameFinder(DNASTRING):
    CleanDNA = DNASTRING.rstrip("\n")
    OpenLocations = []
    CloseLocations = []
    stringlen = len(CleanDNA)
    TtoU = CleanDNA.replace("T", 'U')
    readingframeRange = xrange(0, stringlen)
    PossibleGenes = []
    for item in readingframeRange:
        if TtoU[item:item+3] == "AUG":
            Newthing = xrange(item, stringlen, 3)
            storage = item
            for number in Newthing:
                if TtoU[number:number+3] == "UAA" or TtoU[number:number+3] == "UAG" or TtoU[number:number+3] == "UGA":
                    PossibleGenes.append(TtoU[storage:number+3])
                    break
    for Seqeu in PossibleGenes:
        if len(Seqeu) % 3 == 0:
            LETGO = Seq(Seqeu, generic_rna)
            FinalizedProt.append(str(LETGO.translate()))
        else:
            Removal_Len = len(Seqeu) % 3
            UpdatedSequence = Seqeu[:-Removal_Len]
            ETGO2 = Seq(UpdatedSequence, generic_rna)
            FinalizedProt.append(str(ETGO2.translate()))

Example #2

0

Show file

File: seq_util.py Project: cdl238/pyHCA

def itercodon(seq, frame, offset, table, reverse=False):
    stop = 0
    if not reverse:
        for i in xrange(frame, len(seq) - offset, 3):
            subseq = str(seq.seq)[i:i + 3]
            assert (len(subseq) % 3 == 0), (str(seq))
            aa = Seq.translate(subseq, table)
            yield i, aa
        if i + 3 != len(seq):
            subseq = seq[i + 3:] + "N" * (3 - offset)
            assert (len(subseq) % 3 == 0)
            aa = Seq.translate(subseq, table)
            yield i, aa
    else:
        for i in xrange(len(seq), offset, -3):
            # the reverse complement
            subseq = Seq.reverse_complement(str(seq.seq)[i - 3:i])
            assert (len(subseq) % 3 == 0)
            aa = Seq.translate(subseq, table)
            yield i, aa
        if offset:
            subseq = Seq.reverse_complement("N" * (3 - offset) +
                                            str(seq.seq)[:offset])
            assert (len(subseq) % 3 == 0)
            aa = Seq.translate(subseq, table)
            yield i, aa

Example #3

0

Show file

def aa_table(codonfile, outfile):
    """
    """
    codons = pd.read_csv(codonfile, sep="\t", index_col="hxb2").fillna("")
    subtables = []

    for region in ranges:
        subtable = pd.DataFrame(columns=["region", "position", "coverage"] +
                                aa_header,
                                index=ranges[region]).fillna(0)
        subtable["region"] = region
        subtable["position"] = np.arange(1, len(subtable) + 1)
        for hxb2 in subtable.index:
            if hxb2 in codons.index:
                rows = codons.loc[[hxb2]]
                for codon, count in zip(rows["codon"], rows["count"]):
                    if codon == "":
                        subtable.loc[hxb2, "del"] += count
                    elif len(codon) > 3:
                        subtable.loc[hxb2, "ins"] += count
                        aa = str(Seq.translate(codon[:3]))
                        subtable.loc[hxb2, aa] += count
                    else:
                        aa = str(Seq.translate(codon))
                        subtable.loc[hxb2, aa] += count
        subtable["coverage"] = subtable[aa_header].sum(axis=1)
        subtables.append(subtable)

    pd.concat(subtables).to_excel(outfile, index_label="hxb2")

Example #4

0

Show file

File: translate.py Project: kantorlab/hivmmer

def translate(filename, out=sys.stdout, log=sys.stderr):
    """
    Translate nucleotide sequences in FASTA file `filename` to all six possible
    frames.

    Write amino acid sequences to FASTA file `out`, with the frame number
    appended to the sequence header.

    Log summary statistics to file `log`.
    """

    nskipped = 0

    for n, record in enumerate(SeqIO.parse(filename, "fasta")):

        seq = str(record.seq)

        if 'N' in seq:
            nskipped += 1
            continue

        for i in range(3):
            j = 3 * ((len(seq) - i) // 3) + i
            print(">%s-%d" % (record.id, i), file=out)
            print(Seq.translate(seq[i:j]), file=out)

        seq = str(record.seq.reverse_complement())

        for i in range(3):
            j = 3 * ((len(seq) - i) // 3) + i
            print(">%s-%d'" % (record.id, i), file=out)
            print(Seq.translate(seq[i:j]), file=out)

    print("nreads", n, file=log)
    print("nskipped (N)", nskipped, file=log)

Example #5

0

Show file

File: test_seq.py Project: tulw4r/biopython

 def test_stops(self):
     for nucleotide_seq in [
             self.misc_stops,
             Seq.Seq(self.misc_stops),
             Seq.Seq(self.misc_stops, Alphabet.generic_nucleotide),
             Seq.Seq(self.misc_stops, Alphabet.DNAAlphabet()),
             Seq.Seq(self.misc_stops, IUPAC.unambiguous_dna),
     ]:
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq)))
         self.assertEqual("***RR",
                          str(Seq.translate(nucleotide_seq, table=1)))
         self.assertEqual("***RR",
                          str(Seq.translate(nucleotide_seq, table="SGC0")))
         self.assertEqual("**W**",
                          str(Seq.translate(nucleotide_seq, table=2)))
         self.assertEqual(
             "**WRR",
             str(Seq.translate(nucleotide_seq,
                               table="Yeast Mitochondrial")))
         self.assertEqual("**WSS",
                          str(Seq.translate(nucleotide_seq, table=5)))
         self.assertEqual("**WSS",
                          str(Seq.translate(nucleotide_seq, table=9)))
         self.assertEqual(
             "**CRR",
             str(Seq.translate(nucleotide_seq, table="Euplotid Nuclear")))
         self.assertEqual("***RR",
                          str(Seq.translate(nucleotide_seq, table=11)))
         self.assertEqual(
             "***RR", str(Seq.translate(nucleotide_seq, table="Bacterial")))

Example #6

0

Show file

 def add_translations(self):
     '''
     translate the nucleotide sequence into the proteins specified
     in self.proteins. these are expected to be SeqFeatures
     '''
     from Bio import Seq
     for node in self.tree.find_clades(order='preorder'):
         if not hasattr(node, "translations"):
             node.translations = {}
             node.aa_mutations = {}
         if node.up is None:
             for prot in self.proteins:
                 node.translations[prot] = Seq.translate(
                     str(self.proteins[prot].extract(
                         Seq.Seq("".join(node.sequence)))).replace(
                             '-', 'N'))
                 node.aa_mutations[prot] = []
         else:
             for prot in self.proteins:
                 node.translations[prot] = Seq.translate(
                     str(self.proteins[prot].extract(
                         Seq.Seq("".join(node.sequence)))).replace(
                             '-', 'N'))
                 node.aa_mutations[prot] = [
                     (a, pos, d) for pos, (a, d) in enumerate(
                         zip(node.up.translations[prot],
                             node.translations[prot])) if a != d
                 ]
     self.dump_attr.append('translations')

Example #7

0

Show file

def assign_fitness(nodes):
	'''
	loops over all viruses, translates their sequences and calculates the virus fitness
	'''
	aa, sites, wt_aa, aa_prob = load_mutational_tolerance()
	aln = AlignIO.read('source-data/H1_H3.fasta', 'fasta')
	# returns true whenever either of the sequences have a gap
	aligned = (np.array(aln)!='-').min(axis=0)
	# map alignment positions to sequence positions, subset to aligned amino acids
	indices = {}
	for seq in aln:
		indices[seq.name] = (np.cumsum(np.fromstring(str(seq.seq), dtype='S1')!='-')-1)[aligned]

	# make a reduced set of amino-acid probabilities that only contains aligned positions
	aa_prob=aa_prob[indices['H1'],:]
	# attach another column for non-canonical amino acids
	aa_prob = np.hstack((aa_prob, 1e-5*np.ones((aa_prob.shape[0],1))))
	if isinstance(nodes, list):
		for node in nodes:
			node['tol'] = calc_fitness_tolerance(Seq.translate(node['seq']), 
															aa_prob, aa, indices['H3'])
	elif isinstance(nodes, dendropy.Tree):
		for node in nodes.postorder_node_iter():
			node.tol = calc_fitness_tolerance(Seq.translate(node.seq), 
															aa_prob, aa, indices['H3'])

Example #8

0

Show file

    def test_translation_on_proteins(self):
        """Check translation fails on a protein."""
        for s in protein_seqs:
            with self.assertRaises(TranslationError):
                Seq.translate(s)

            with self.assertRaises(TranslationError):
                s.translate()

Example #9

0

Show file

    def test_translation_on_proteins(self):
        """Test translation shouldn't work on a protein!"""
        for s in protein_seqs:
            with self.assertRaises(ValueError):
                Seq.translate(s)

            if isinstance(s, Seq.Seq):
                with self.assertRaises(ValueError):
                    s.translate()

Example #10

0

Show file

File: test_seq.py Project: hardhary/biopython

    def test_translation_to_stop(self):
        for nucleotide_seq in self.test_seqs:
            nucleotide_seq = nucleotide_seq[: 3 * (len(nucleotide_seq) // 3)]
            if "X" not in nucleotide_seq:
                short = Seq.translate(nucleotide_seq, to_stop=True)
                self.assertEqual(short, Seq.translate(nucleotide_seq).split("*")[0])

        seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG"
        self.assertEqual("VAIVMGRWKGAR", Seq.translate(seq, table=2, to_stop=True))

Example #11

0

Show file

File: test_seq.py Project: BrianLinSu/rop

    def test_translation_to_stop(self):
        for nucleotide_seq in self.test_seqs:
            nucleotide_seq = nucleotide_seq[:3 * (len(nucleotide_seq) // 3)]
            if isinstance(nucleotide_seq, Seq.Seq) and 'X' not in str(nucleotide_seq):
                short = Seq.translate(nucleotide_seq, to_stop=True)
                self.assertEqual(str(short), str(Seq.translate(nucleotide_seq).split('*')[0]))

        seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG"
        self.assertEqual("VAIVMGRWKGAR", Seq.translate(seq, table=2, to_stop=True))

Example #12

0

Show file

File: test_seq.py Project: BrianLinSu/rop

    def test_translation_on_proteins(self):
        """Test translation shouldn't work on a protein!"""
        for s in protein_seqs:
            with self.assertRaises(ValueError):
                Seq.translate(s)

            if isinstance(s, Seq.Seq):
                with self.assertRaises(ValueError):
                    s.translate()

Example #13

0

Show file

File: phylo_dnds.py Project: cfe-lab/Umberjack

def calc_total_subst(start_codon, end_codon):
    """
    Returns total synonymous substitutions, nonsynonymous substitutions.
    If there are multiple positions that differ between codons, then returns the average synonynous substitutions,
    average nonsynonymous substitutions across all possible pathways from codon1 to codon2
    where each stage in a pathway is separated by 1 position mutation.
    :param Bio.Seq.Seq start_codon:  3bp codon
    :param Bio.Seq.Seq end_codon:  3bp codon
    :return tuple (int, int):  (average point mutations that yield same amino acid across all pathways, average point mutations that yield different amino acid across all pathways)
    """
    total_syn = 0.0
    total_nonsyn = 0.0
    total_subs = 0.0

    upper_start_codon = start_codon.upper()
    upper_end_codon = end_codon.upper()

    # find positions where the codons differ
    diff_pos = []
    for pos, nucstr1 in enumerate(str(upper_start_codon)):
        nucstr2 = str(upper_end_codon[pos])
        if nucstr1 != nucstr2:
            diff_pos.extend([pos])

    # Traverse all possible pathways from start_codon to end_codon where
    # each stage of a pathway mutates by 1 base.
    last_codon = upper_start_codon
    last_aa = Seq.translate(last_codon)
    for pathway in itertools.permutations(diff_pos):
        print str(upper_start_codon) + " " + str(upper_end_codon) + " " + ",".join([str(x) for x in pathway])
        for mut_pos in pathway:
            mut_nuc = upper_end_codon[mut_pos]
            mut_codon =  last_codon[:mut_pos] + mut_nuc + last_codon[mut_pos+1:]
            mut_aa = Seq.translate(mut_codon)

            total_subs += 1
            if str(last_aa) == str(mut_aa):
                total_syn += 1
            else:
                total_nonsyn += 1

            last_codon = mut_codon
            last_aa = mut_aa

        if str(last_codon) != str(upper_end_codon):
            raise ValueError("Pathway does not yield end codon " + str(last_codon))

    if total_subs:
        ave_syn = total_syn/total_subs
        ave_nonsyn = total_nonsyn/total_subs
    else:
        ave_syn = 0.0
        ave_nonsyn = 0.0
    return ave_syn, ave_nonsyn

Example #14

0

Show file

File: lab.py Project: rotifyld/2019l-computational-biology-intro

def ex4():
    seqs_histones, seqs_bzips = read_sequences()
    seqs_histones = [Seq.translate(s, to_stop=True) for s in seqs_histones]
    seqs_bzips = [Seq.translate(s, to_stop=True) for s in seqs_bzips]

    print("histones:")
    compute_all_with_all(seqs_histones, function=compute_pair_ex4)
    print("bzips:")
    compute_all_with_all(seqs_bzips, function=compute_pair_ex4)
    print("bzips x histones")
    compute_all_with_all(seqs_histones, seqs_bzips, function=compute_pair_ex4)

Example #15

0

Show file

    def get_syn_mutations(self, region, mask_constrained=True):

        if region in self.annotation and self.annotation[region].type in [
                'gene', 'protein'
        ]:
            try:
                aft = self.get_allele_frequency_trajectories(region)
                if len(aft.mask.shape) == 0:
                    aft_valid = np.ones((aft.shape[0], aft.shape[-1]),
                                        dtype=bool)
                else:
                    aft_valid = ~np.array([af.mask.sum(axis=0) for af in aft],
                                          dtype=bool)
                gaps = self.get_gaps_by_codon(region)
                initial_seq = self.get_initial_sequence(region)
                consensi = []
                for af in aft:
                    tmp = consensus(af)
                    tmp[gaps] = 'N'
                    consensi.append(tmp)

                cons_aa = np.array([
                    np.fromstring(Seq.translate(''.join(cons.astype('U'))),
                                  dtype='S1') for cons in consensi
                ])
                no_substitution = np.repeat(
                    np.array([
                        len(np.unique(col[ind])) == 1
                        for ind, col in zip(aft_valid.T[::3], cons_aa.T)
                    ],
                             dtype=bool), 3)

                syn_muts = np.zeros(aft.shape[1:], dtype=bool)
                for pos in range(aft.shape[-1]):
                    ci = pos // 3
                    rf = pos % 3
                    codon = ''.join(initial_seq[ci * 3:(ci + 1) *
                                                3].astype("U"))
                    for ni, nuc in enumerate(alpha[:4].astype("U")):
                        mod_codon = codon[:rf] + nuc + codon[rf + 1:]
                        try:
                            syn_muts[ni,pos] = (Seq.translate(codon)==Seq.translate(mod_codon))\
                                                *no_substitution[pos]
                        except:
                            syn_muts[ni, pos] = False
                if mask_constrained:
                    syn_muts[:, self.get_constrained(region)] = False
                return syn_muts
            except:
                import ipdb
                ipdb.set_trace()
        else:
            print(region, "is not a valid protein or gene")
            return None

Example #16

0

Show file

    def test_translation_to_stop(self):
        for nucleotide_seq in self.test_seqs:
            nucleotide_seq = nucleotide_seq[:3 * (len(nucleotide_seq) // 3)]
            if isinstance(nucleotide_seq,
                          Seq.Seq) and 'X' not in str(nucleotide_seq):
                short = Seq.translate(nucleotide_seq, to_stop=True)
                self.assertEqual(
                    str(short),
                    str(Seq.translate(nucleotide_seq).split('*')[0]))

        seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG"
        self.assertEqual("VAIVMGRWKGAR",
                         Seq.translate(seq, table=2, to_stop=True))

Example #17

0

Show file

File: solution2_str_partition.py Project: alscherer/biofx_python

def main() -> None:
    """ Make a jazz noise here """

    args = get_args()
    if seqs := [str(rec.seq) for rec in SeqIO.parse(args.file, 'fasta')]:
        rna = seqs[0].replace('T', 'U')
        orfs = set()

        for seq in [rna, Seq.reverse_complement(rna)]:
            for i in range(3):
                if prot := Seq.translate(truncate(seq[i:], 3), to_stop=False):
                    for orf in find_orfs(prot):
                        orfs.add(orf)

Example #18

0

Show file

def translateSeq(cds):
    senseOrAnti = 'sense'
    finalCDS = cds
    try:
        translated = Seq.translate(cds,cds=True)
#         finalCDS = cds
    except TranslationError,e:
        try:
            reverseCDS = Seq.reverse_complement(cds)
            translated = Seq.translate(reverseCDS,cds=True)
            finalCDS = reverseCDS
            senseOrAnti = 'anti'
        except TranslationError,e:
            print 'Translation failed in %s'%cds

Example #19

0

Show file

def parseFeatureBed(bedFile,regionSeqs):
    print 'VIVAN: Parsing %s'%bedFile
    bedFile = open(bedFile,'r').xreadlines()
    features = {}
    for line in bedFile:
        if line.strip():
            feature = Feature(line)
            regionSeq = regionSeqs[feature.region]
            feature.cds = feature.getCDS(regionSeq)
            try:
                translated = Seq.translate(feature.cds,cds=True)
            except TranslationError,e:
                translated = Seq.translate(feature.cds)
                WARNINGS.append('Translation error in feature : %s\nCDS : %s\nProtein : %s\n%s\n'%(feature.featureLine,feature.cds,translated,e))
            features[feature.name]=feature

Example #20

0

Show file

def group_by_protein(fasta_file):
    """ Groups DNA sequences based on the protein they code for.

        Args:
            fasta_file (str): path to the FASTA file with DNA
            sequences for a gene.

        Returns:
            protein_diversity (dict): dictionary with a gene
            identifier as key and another dictionary as value.
            The nested dictionary has protein sequences as keys
            and a list as value for each key. Each list has
            the allele identifiers and sequences that code for
            that protein, organized in tuples.
    """

    protein_diversity = {}
    basename = os.path.basename(fasta_file)
    protein_diversity[basename] = {}
    for record in SeqIO.parse(fasta_file, 'fasta'):
        seqid = record.id
        allele_id = seqid.split('_')[-1]
        sequence = str(record.seq)
        try:
            protein = Seq.translate(sequence, table=11, cds=True)
        except Exception:
            continue

        if protein in protein_diversity[basename]:
            protein_diversity[basename][protein][0].append((allele_id, sequence))
        else:
            protein_diversity[basename][protein] = [[(allele_id, sequence)]]

    return protein_diversity

Example #21

0

Show file

File: mutation_summary.py Project: blab/ncov-ny

def read_reference(fname, genemap):
    try:
        ref = str(SeqIO.read(fname, 'fasta').seq)
    except:
        with open(fname, 'r') as fh:
            ref = "".join([x.strip() for x in fh])

    translations = {}
    with open(genemap, 'r') as fh:
        for line in fh:
            if line[0] == '#':
                continue
            entries = [x.strip() for x in line.strip().split('\t')]
            start = int(entries[3])
            end = int(entries[4])
            strand = entries[6]
            attributes = {
                x.split()[0]: ' '.join(x.split()[1:])
                for x in entries[8].split(';')
            }
            if 'gene_name' in attributes:
                name = attributes['gene_name'].strip('"')
            else:
                name = None
            translation = Seq.translate(
                SeqFeature.SeqFeature(
                    SeqFeature.FeatureLocation(
                        start - 1, end,
                        strand=-1 if strand == '-' else 1)).extract(ref))
            translations[name] = str(translation)

    return {"nuc": ref, "translations": translations}

Example #22

0

Show file

File: translationSimple.py Project: adeshpande/pybiosys

def translationBio(data):
    '''Uses Biopython translate '''
    proteinSeq = ''
    for line in data:
        proteinSeq += Seq.translate(line, table='Standard', stop_symbol='', to_stop=False)
        #proteinSeq += Seq.translate(line)
    print proteinSeq

Example #23

0

Show file

    def export(self, path = '', extra_attr = ['aa_muts']):
        from Bio import Seq
        from itertools import izip
        timetree_fname = path+'tree.json'
        sequence_fname = path+'sequences.json'
        tree_json = tree_to_json(self.tree.root, extra_attr=extra_attr)
        write_json(tree_json, timetree_fname, indent=None)
        elems = {}
        elems['root'] = {}
        elems['root']['nuc'] = "".join(self.tree.root.sequence)
        for prot in self.proteins:
            tmp = str(self.proteins[prot].extract(Seq.Seq(elems['root']['nuc'])))
            #elems['root'][prot] = str(Seq.translate(tmp.replace('---', 'NNN'))).replace('X','-')
            elems['root'][prot] = str(Seq.translate(tmp.replace('-', 'N'))).replace('X','-')


        for node in self.tree.find_clades():
            if hasattr(node, "clade") and hasattr(node, "sequence"):
                elems[node.clade] = {}
                elems[node.clade]['nuc'] = {pos:state for pos, (state, ancstate) in
                                enumerate(izip(node.sequence, self.tree.root.sequence)) if state!=ancstate}
        for node in self.tree.find_clades():
            if hasattr(node, "clade") and hasattr(node, "translations"):
                for prot in self.proteins:
                    elems[node.clade][prot] = {pos:state for pos, (state, ancstate) in
                                    enumerate(izip(node.translations[prot], elems['root'][prot])) if state!=ancstate}

        write_json(elems, sequence_fname, indent=None)

Example #24

0

Show file

File: tree.py Project: alliblk/augur

    def add_translations(self):
        '''
        translate the nucleotide sequence into the proteins specified
        in self.proteins. these are expected to be SeqFeatures
        '''
        from Bio import Seq

        # Sort proteins by start position of the corresponding SeqFeature entry.
        sorted_proteins = sorted(self.proteins.items(), key=lambda protein_pair: protein_pair[1].start)

        for node in self.tree.find_clades(order='preorder'):
            if not hasattr(node, "translations"):
                # Maintain genomic order of protein translations for easy
                # assembly by downstream functions.
                node.translations=OrderedDict()
                node.aa_mutations = {}

            for prot, feature in sorted_proteins:
                node.translations[prot] = Seq.translate(str(feature.extract(Seq.Seq("".join(node.sequence)))).replace('-', 'N'))

                if node.up is None:
                    node.aa_mutations[prot] = []
                else:
                    node.aa_mutations[prot] = [(a,pos,d) for pos, (a,d) in
                                               enumerate(zip(node.up.translations[prot],
                                                             node.translations[prot])) if a!=d]

        self.dump_attr.append('translations')

Example #25

0

Show file

File: CpnClassiPhyR.py Project: kevmu/CpnClassiPhyR

    def RFLP_digests(self, fasta_infile):
        RFLP_digests = {}
        for fasta_record in SeqIO.parse(fasta_infile, "fasta"):

            id = str(fasta_record.id)
            sequence = str(fasta_record.seq)
            desc = str(fasta_record.description)
            print(sequence)
            #            sys.exit()
            digest_metadata = self.RFLP_digest(sequence)
            digest_metadata['ID'] = id
            digest_metadata['Description'] = desc
            digest_metadata['Nucleotide UT Sequence'] = sequence
            digest_metadata['Nucleotide UT Sequence Length'] = len(sequence)

            amino_acid_sequence = Seq.translate(fasta_record.seq,
                                                table='Standard',
                                                stop_symbol='*',
                                                to_stop=False,
                                                cds=False,
                                                gap=None)
            #            print(str(amino_acid_sequence))
            digest_metadata['Peptide UT Sequence'] = str(amino_acid_sequence)
            digest_metadata['Peptide UT Sequence Length'] = len(
                digest_metadata['Peptide UT Sequence'])
            RFLP_digests[id] = digest_metadata
        return RFLP_digests

Example #26

0

Show file

 def add_translations(self):
     from Bio import Seq
     for node in self.tree.find_clades():
         if not hasattr(node, "translations"):
             node.translations={}
         for prot in self.proteins:
             node.translations[prot] = Seq.translate(str(self.proteins[prot].extract(Seq.Seq("".join(node.sequence)))).replace('-', 'N'))

Example #27

0

Show file

File: test_seq.py Project: hardhary/biopython

 def test_stops(self):
     for nucleotide_seq in [self.misc_stops, Seq.Seq(self.misc_stops)]:
         self.assertEqual("***RR", Seq.translate(nucleotide_seq))
         self.assertEqual("***RR", Seq.translate(nucleotide_seq, table=1))
         self.assertEqual("***RR", Seq.translate(nucleotide_seq, table="SGC0"))
         self.assertEqual("**W**", Seq.translate(nucleotide_seq, table=2))
         self.assertEqual(
             "**WRR", Seq.translate(nucleotide_seq, table="Yeast Mitochondrial")
         )
         self.assertEqual("**WSS", Seq.translate(nucleotide_seq, table=5))
         self.assertEqual("**WSS", Seq.translate(nucleotide_seq, table=9))
         self.assertEqual(
             "**CRR", Seq.translate(nucleotide_seq, table="Euplotid Nuclear")
         )
         self.assertEqual("***RR", Seq.translate(nucleotide_seq, table=11))
         self.assertEqual("***RR", Seq.translate(nucleotide_seq, table="Bacterial"))

Example #28

0

Show file

File: parsing.py Project: NProfileAnalysisComputationalTool/npact

def translate(config, rc=False):
    table = 1
    if mycoplasma(config):
        # table 4 is for mycoplasma ala:
        # http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
        table = 4
    fd, fmap = None, None
    try:
        log.debug("Doing translation with table %d, rc: %s", table, rc)
        fd = os.open(ddna(config), os.O_RDONLY)
        fmap = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ)
        # By convention (e.g. from the C or NCBI) the DNA is is 1
        # indexed; our DDNA is a c style array that is 0 indexed
        startIdx = config['startBase'] - 1
        # The end index here is inclusive but array.slice isn't so we
        # don't need to subtract 1
        endIdx = config['endBase']
        seq = Seq.Seq(fmap[startIdx:endIdx])

        if rc:
            seq = seq.reverse_complement()
        return {
            'seq': str(seq),
            'trans': str(Seq.translate(seq, table))
        }
    finally:
        if fmap:
            fmap.close
        if fd:
            os.close(fd)

Example #29

0

Show file

File: tree.py Project: blab/nextstrain-augur

    def add_translations(self):
        '''
        translate the nucleotide sequence into the proteins specified
        in self.proteins. these are expected to be SeqFeatures
        '''
        from Bio import Seq

        # Sort proteins by start position of the corresponding SeqFeature entry.
        sorted_proteins = sorted(self.proteins.items(), key=lambda protein_pair: protein_pair[1].start)

        for node in self.tree.find_clades(order='preorder'):
            if not hasattr(node, "translations"):
                # Maintain genomic order of protein translations for easy
                # assembly by downstream functions.
                node.translations=OrderedDict()
                node.aa_mutations = {}

            for prot, feature in sorted_proteins:
                node.translations[prot] = Seq.translate(str(feature.extract(Seq.Seq("".join(node.sequence)))).replace('-', 'N'))

                if node.up is None:
                    node.aa_mutations[prot] = []
                else:
                    node.aa_mutations[prot] = [(a,pos,d) for pos, (a,d) in
                                               enumerate(zip(node.up.translations[prot],
                                                             node.translations[prot])) if a!=d]

        self.dump_attr.append('translations')

Example #30

0

Show file

File: fluvinput.py Project: lialmonacid/FluVar

def getCodonTableInfo(codon_table_dict,ref_cds_coordinates_dict,proteins_pos_list,strain,nucleotide_pos,nucleotide,segment):
  codon_table_dict_copied = codon_table_dict
  for protein_pos in proteins_pos_list:
    protein_fields=protein_pos.split(":")
    protein_name=protein_fields[0]
    protein_codon_number=int(protein_fields[1])
    if protein_name not in codon_table_dict_copied[strain]: # if protein does not exist yet
      codon_table_dict_copied[strain][protein_name]={protein_codon_number:[nucleotide,str(nucleotide_pos),None,None,None]}
    else:
      if protein_codon_number not in codon_table_dict_copied[strain][protein_name]: # if protein codon number does not exist yet
        codon_table_dict_copied[strain][protein_name][protein_codon_number]=[nucleotide,str(nucleotide_pos),None,None,None]
      else:
        if codon_table_dict_copied[strain][protein_name][protein_codon_number][2] == None: # if the secod position of the codon has not been filled
          codon_table_dict_copied[strain][protein_name][protein_codon_number][2]=str(nucleotide_pos)
          codon_table_dict_copied[strain][protein_name][protein_codon_number][0]=codon_table_dict_copied[strain][protein_name][protein_codon_number][0]+nucleotide
        else:
          if codon_table_dict_copied[strain][protein_name][protein_codon_number][3] == None: # if the third position of the codon has not been filled
            codon_table_dict_copied[strain][protein_name][protein_codon_number][3]=str(nucleotide_pos)
            codon_table_dict_copied[strain][protein_name][protein_codon_number][0]=codon_table_dict_copied[strain][protein_name][protein_codon_number][0]+nucleotide
            codon = codon_table_dict_copied[strain][protein_name][protein_codon_number][0]

            aa_code = Seq.translate(codon,to_stop=False,stop_symbol='*')
            codon_table_dict_copied[strain][protein_name][protein_codon_number][4] = aa_code
          else:
            print >> sys.stderr , "\n[ERROR]: The codon \""+str(protein_codon_number)+"\" is already set in the codon table as, "+codon_table_dict_copied[strain][protein_name][protein_codon_number]+". Contact the author because this is a major issue.\n"
            sys.exit(1)
  return codon_table_dict_copied

Example #31

0

Show file

def getProtein(dna, protein):
    for i in range(1, 16):
        if i in range(7, 9):
            continue
        x = Seq.translate(dna, stop_symbol='', table=i)
        if x == protein:
            return i

Example #32

0

Show file

File: GenerateAlignment.py Project: xji3/Joel756FinalProject

def translateDNAtoAA(input_fasta, output_fasta, remove_lower_case = False):
    with open(input_fasta, 'r') as f:
        with open(output_fasta, 'w+') as g:
            for line in f.readlines():
                if line[0] == '>':
                    g.write(line)
                    continue
                else:
                    if line[-2:] == '\r\n':
                        assert(len(line) %3 == 2)
                    elif line[-1:] == '\n':
                        assert(len(line) %3 == 1)
                    if remove_lower_case:
                        g.write(Seq.translate(line.translate(None, string.ascii_lowercase)[:-1], to_stop = True) + '\n')
                    else:
                        g.write(Seq.translate(line[:-1], to_stop = True) + '\n')

Example #33

0

Show file

File: test_indels.py Project: nathanielknight/validate-nucamino

 def insertion_is_synonymous_match(self, insertion, mtn):
     nt_pos = mtn["NAPosition"] + 3 - 1
     nt_ins = mtn["InsertedCodonsText"]
     recovered_insertion = indels.Insertion(
         nt_ins=nt_ins,
         nt_pos=nt_pos,
         gene=insertion.gene,
         genotype=insertion.genotype,
     )
     self.assertGreater(len(nt_ins), 0)
     mutated = insertion.mutated_gene
     aligned = recovered_insertion.mutated_gene
     self.assertEqual(
         bioseq.translate(mutated),
         bioseq.translate(aligned),
     )

Example #34

0

Show file

def extract_proteome():
	print 'EXTRAINDO PROTEOMA ...'
	global Arguments
	global BIN_PATHS
	global CWD
	global ProteomeFastaPath
	global ProteomeFastaDecoyPath
	global record
	ProteomeFastaContent = ''
	ProteomeFastaHandle = open(ProteomeFastaPath,'w')
	ProteomeCDSIndex = 0
	for FilePath in SplitFilePathString(Arguments.genome):
		FileHandle = open(FilePath)
		for Scaffold in SeqIO.parse(FileHandle,'genbank'):
			print Scaffold.id
			for Feature in Scaffold.features:
				if Feature.type == 'CDS':
					ProteomeCDSIndex += 1
					CDSSeq = Feature.location.extract(Scaffold)
					CDSProtSeq = Seq.translate(CDSSeq)
					if 'locus_tag' in Feature.qualifiers.keys():
						LocusTag = Feature.qualifiers['locus_tag'][0]
					else:
						LocusTag = 'MISSING_LOCUS_TAG'
					if 'product' in Feature.qualifiers.keys():
						Product = Feature.qualifiers['product'][0]
					else:
						Product = 'MISSING_PRODUCT'
					ProteomeFastaContent += '>{0}|{0} {1} {2} {3}\n{4}\n'.format(
	                						str(ProteomeCDSIndex), Scaffold.id,
	                                        LocusTag,Product.replace("'",""),
	                                        str(CDSProtSeq))

	# add crap contaminant proteins

	crapProteinsHandler = open('/home/cdtec/Frederico/ms6/bin/crap.fasta')
	crapProteinsParser = SeqIO.parse(crapProteinsHandler,'fasta')
	for crapProteinIndex,crapProtein in enumerate(crapProteinsParser):
		ProteomeCDSIndex += 1
		ProteomeFastaContent += '>CONTAMINANT_CRAP_{0}|{0} {1} {2}\n{3}\n'.format(
	                			str(ProteomeCDSIndex), crapProteinIndex,
	                            crapProtein.description.replace("'",""),
	                            str(crapProtein.seq))

	# add custom contaminant proteins

	if record[8]:
		customContaminantProteinsHandler = open('/home/cdtec/Frederico/ms6/jobs_data/%s/contaminants.fasta'%Arguments.job_id)
		customContaminantProteinsParser = SeqIO.parse(customContaminantProteinsHandler,'fasta')
		for customContaminantIndex,customContaminant in enumerate(customContaminantProteinsParser):
			ProteomeCDSIndex += 1
			ProteomeFastaContent += '>CONTAMINANT_CUSTOM_{0}|{0} {1} {2}\n{3}\n'.format(
	                			str(ProteomeCDSIndex), customContaminantIndex,
	                            customContaminant.description.replace("'",""),
	                            str(customContaminant.seq))

	ProteomeFastaHandle.write(ProteomeFastaContent)
	ProteomeFastaHandle.close()
	return True

Example #35

0

Show file

File: test_seq.py Project: BrianLinSu/rop

 def test_stops(self):
     for nucleotide_seq in [self.misc_stops, Seq.Seq(self.misc_stops),
                            Seq.Seq(self.misc_stops, Alphabet.generic_nucleotide),
                            Seq.Seq(self.misc_stops, Alphabet.DNAAlphabet()),
                            Seq.Seq(self.misc_stops, IUPAC.unambiguous_dna)]:
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq)))
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table=1)))
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table="SGC0")))
         self.assertEqual("**W**", str(Seq.translate(nucleotide_seq, table=2)))
         self.assertEqual("**WRR", str(Seq.translate(nucleotide_seq,
                                       table='Yeast Mitochondrial')))
         self.assertEqual("**WSS", str(Seq.translate(nucleotide_seq, table=5)))
         self.assertEqual("**WSS", str(Seq.translate(nucleotide_seq, table=9)))
         self.assertEqual("**CRR", str(Seq.translate(nucleotide_seq,
                                       table='Euplotid Nuclear')))
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table=11)))
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table='Bacterial')))

Example #36

0

Show file

 def test_translation(self):
     for nucleotide_seq in self.test_seqs:
         nucleotide_seq = nucleotide_seq[:3 * (len(nucleotide_seq) // 3)]
         if isinstance(nucleotide_seq,
                       Seq.Seq) and "X" not in str(nucleotide_seq):
             expected = Seq.translate(nucleotide_seq)
             self.assertEqual(repr(expected),
                              repr(nucleotide_seq.translate()))

Example #37

0

Show file

File: countMutations.py Project: bochaozhang/countMutations

def mutationType(single_mutations):
    "Find mutations type (R/S) for single mutation"
    from Bio import Seq
    
    print len(single_mutations)
    for i in range(len(single_mutations)):
        germline = single_mutations[i][0]
        mutated = single_mutations[i][2]
        if '-' not in germline and 'N' not in germline and '-' not in mutated and 'N' not in mutated:
            if Seq.translate(germline) == Seq.translate(mutated):
                single_mutations[i].append('silent')
            else:
                single_mutations[i].append('replacement')
        else:
            single_mutations[i].append('unknown')

    return single_mutations

Example #38

0

Show file

    def test_translation_using_tables_with_ambiguous_stop_codons(self):
        """Check for error and warning messages.

        Here, 'ambiguous stop codons' means codons of unambiguous sequence
        but with a context sensitive encoding as STOP or an amino acid.
        Thus, these codons appear within the codon table in the forward
        table as well as in the list of stop codons.
        """
        seq = "ATGGGCTGA"
        with self.assertRaises(ValueError):
            Seq.translate(seq, table=28, to_stop=True)
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            Seq.translate(seq, table=28)
            message = str(w[-1].message)
            self.assertTrue(message.startswith("This table contains"))
            self.assertTrue(message.endswith("be translated as amino acid."))

Example #39

0

Show file

File: test_seq.py Project: hardhary/biopython

    def test_translation_on_proteins(self):
        """Check translation fails on a protein."""
        for s in protein_seqs:
            if len(s) % 3 != 0:
                with self.assertWarns(BiopythonWarning):
                    with self.assertRaises(TranslationError):
                        Seq.translate(s)

                with self.assertWarns(BiopythonWarning):
                    with self.assertRaises(TranslationError):
                        s.translate()
            else:
                with self.assertRaises(TranslationError):
                    Seq.translate(s)

                with self.assertRaises(TranslationError):
                    s.translate()

Example #40

0

Show file

File: __init__.py Project: ecolell/amphipathic

 def __init__(self, string):
     string = string.lower()
     if is_nucleotide(string):
         self.nucleotide = string
         warnings.simplefilter('ignore', BiopythonWarning)
         string = Seq.translate(string).lower()
     self.primary = string.split('*')
     self.secondary = []
     self.structures = []

Example #41

0

Show file

File: test_indels.py Project: nathanielknight/validate-nucamino

 def deletion_is_synonymous_match(self, deletion, mtn):
     nt_pos = mtn["NAPosition"] - 1
     nt_count = mtn["Control"].count('-')
     recovered_deletion = indels.Deletion(
         nt_pos=nt_pos,
         gene=deletion.gene,
         genotype=deletion.genotype,
         nt_count=nt_count,
         orig_nt=None,
     )
     self.assertGreater(nt_count, 0)
     mutated = deletion.mutated_gene
     aligned = recovered_deletion.mutated_gene
     # Translate to amino-acids to account for synonymous mutations.
     self.assertEqual(
         bioseq.translate(mutated),
         bioseq.translate(aligned),
     )

Example #42

0

Show file

File: phylo_dnds.py Project: cfe-lab/Umberjack

def calc_total_poss_subst(codon):
    total_poss_syn = 0.0
    total_poss_nonsyn = 0.0
    orig_aa = Seq.translate(codon)
    for codon_pos in range(0, Utility.NUC_PER_CODON):
        nuc = codon[codon_pos]
        for mut_str in ("A", "C", "T", "G"):
            mut = Seq.Seq(mut_str)
            if str(mut).upper() == str(nuc).upper():
                continue
            mut_codon = codon[:codon_pos] + mut + codon[codon_pos+1:]
            mut_aa = Seq.translate(mut_codon)
            if str(orig_aa).upper() == str(mut_aa).upper():
                total_poss_syn += 1
            else:
                total_poss_nonsyn += 1

    return total_poss_syn, total_poss_nonsyn

Example #43

0

Show file

File: GenerateAlignment.py Project: xji3/JGT_MBE_2016

def translateDNAtoAA(input_fasta, output_fasta):  
    with open(input_fasta, 'r') as f:
        with open(output_fasta, 'w+') as g:
            for line in f.readlines():
                if line[0] == '>':
                    g.write(line)
                    continue
                else:
                    assert(len(line) %3 == 1)
                    g.write(Seq.translate(line[:-1], to_stop = True) + '\n')

Example #44

0

Show file

File: check_oligo_script.py Project: raman-lab/biosensor_design

def check_fragments(oligo_file, design_fasta):
    design_aa_list = []
    with open(design_fasta, 'r') as f:
        for pdb, seq in izip_longest(f, f, fillvalue=None):
            if '4AC0' and 'B0' in pdb:
                block = seq[77:117]
            elif '4AC0' and 'B1' in pdb:
                block = seq[99:138]
            elif '2uxo' and 'B0' in pdb:
                block = seq[62:100]
            elif '2uxo' and 'B1' in pdb:
                block = seq[136:176]
            else:
                raise Exception('Unrecognized design name')
            design_aa_list.append(block)

    fragment_list = []
    with open(oligo_file, 'r') as o:
        for pdb, seq in izip_longest(o, o, fillvalue=None):
            if '4AC0' and 'B0' in pdb:
                seq_lower = seq.lower()
                seq_no_5p = seq_lower.split('gtgacccgtccctgggtctcaagat')[1]
                fragment = seq_no_5p.split('gccttgagaccgggcagaggtcgac')[0]
            elif '4AC0' and 'B1' in pdb:
                seq_lower = seq.lower()
                seq_no_5p = seq_lower.split('tgcccgctgtcttcaggtctcaagta')[1]
                fragment = seq_no_5p.split('catttgagacctgtagcccggcagtg')[0]
            elif '2uxo' and 'B0' in pdb:
                seq_lower = seq.lower()
                seq_no_5p = seq_lower.split('cgatcgtgcccacctggtctccactg')[1]
                fragment = seq_no_5p.split('gttctgagaccagttggagcccgcac')[0]
            elif '2uxo' and 'B1' in pdb:
                seq_lower = seq.lower()
                seq_no_5p = seq_lower.split('ctggtgcgtcgtctggtctctggat')[1]
                fragment = seq_no_5p.split('cgttggagaccggcgaacacttccc')[0]
            else:
                raise Exception('Unrecognized oligo name')
            fragment_list.append(fragment)

    missing_list = []
    for item in fragment_list:
        aa_fragment = Seq.translate(item)
        if aa_fragment in design_aa_list:
            design_aa_list.remove(aa_fragment)
        else:
            missing_list.append(aa_fragment)
    if missing_list:
        sys.stderr.write('Error: The following oligo sequences do not match a design amino acid sequence\n')
        for miss in missing_list:
            sys.stderr.write('{0}\n'.format(miss))
    if design_aa_list:
        sys.stderr.write('Error: The following design sequences do not match an oligo sequence\n')
        for design in design_aa_list:
            sys.stderr.write('{0}\n'.format(design))
    sys.stdout.write('done\n')

Example #45

0

Show file

File: patients.py Project: vpuller/HIVEVO_access

    def get_syn_mutations(self, region, mask_constrained = True):
        from itertools import izip
        if region in self.annotation and self.annotation[region].type in ['gene', 'protein']:
            try:
                aft = self.get_allele_frequency_trajectories(region)
                if len(aft.mask.shape) == 0:
                    aft_valid = np.ones((aft.shape[0], aft.shape[-1]), dtype=bool)
                else:
                    aft_valid = -np.array([af.mask.sum(axis=0) for af in aft], dtype=bool)
                gaps = self.get_gaps_by_codon(region)
                initial_seq = self.get_initial_sequence(region)
                consensi = []
                for af in aft:
                    tmp = consensus(af)
                    tmp[gaps]='N'
                    consensi.append(tmp)

                cons_aa = np.array([np.fromstring(Seq.translate(''.join(cons)), 
                                   dtype='|S1') for cons in consensi])
                no_substitution = np.repeat(np.array([len(np.unique(col[ind]))==1 
                                for ind, col in izip(aft_valid.T[::3], cons_aa.T)], dtype=bool), 3)

                syn_muts = np.zeros(aft.shape[1:], dtype=bool)
                for pos in xrange(aft.shape[-1]):
                    ci = pos//3
                    rf = pos%3
                    codon = ''.join(initial_seq[ci*3:(ci+1)*3])
                    for ni,nuc in enumerate(alpha[:4]):
                        mod_codon = codon[:rf] + nuc + codon[rf+1:]
                        try:
                            syn_muts[ni,pos] = (Seq.translate(codon)==Seq.translate(mod_codon))\
                                                *no_substitution[pos]
                        except:
                            syn_muts[ni,pos] = False
                if mask_constrained:
                    syn_muts[:,self.get_constrained(region)] = False
                return syn_muts
            except:
                import pdb; pdb.set_trace()
        else:
            print region,"is not a valid protein or gene"
            return None

Example #46

0

Show file

File: Gaf.py Project: jcambry/oncotator

    def get_protein_seq(self, transcript_id):
        gaf_record = self.get_transcript(transcript_id)
        tx_seq = self.get_transcript_seq(transcript_id)
        if not gaf_record or not tx_seq:
            return None

        if "cds_start" not in gaf_record or not gaf_record["cds_start"]:
            return None

        prot_seq = Seq.translate(tx_seq[gaf_record["cds_start"] - 1 : gaf_record["cds_stop"]])
        if prot_seq[-1] == "*":
            prot_seq = prot_seq[:-1]

        return prot_seq

Example #47

0

Show file

File: createUniprotProteinSeqsAlignments.py Project: alexramos/oncotator

def create_sequence_dbs_for_GAF(gaf, transcripts_file, output_dir):
    from Bio import SeqIO
    from Bio import Seq
    import os

    print "Indexing GAF db by transcript id...\n"
    gaf_transcript_idx = dict()
    for i,g in enumerate(gaf):
        for k in gaf[g].keys():
            for ctr,t in enumerate(gaf[g][k]):
                gaf_transcript_idx[t['transcript_id']] = (ctr,g,k)

    fh_transcripts = SeqIO.parse(transcripts_file, 'fasta')
    # transcripts_shlv = shelve.open(os.path.join(output_dir, 'GAF_transcript_seqs.fa.shlv'), 'c')
    # proteins_shlv = shelve.open(os.path.join(output_dir, 'GAF_protein_seqs.fa.shlv'), 'c')
    transcripts_shlv = Shove("file://" + os.path.join(output_dir, 'GAF_transcript_seqs.fa.shove'))
    protein_seqs_url = "file://" + os.path.join(output_dir, 'GAF_protein_seqs.fa.shove')
    proteins_shlv = Shove(protein_seqs_url)

    print "Writing transcript and protein shove dbs..."
    j = 0
    transcripts_to_remove = list()
    for transcript in fh_transcripts:
        if j % 1000 == 0: print j
        j += 1
        if transcript.name not in gaf_transcript_idx:
            continue
        gaf_record = gaf[gaf_transcript_idx[transcript.name][1]][gaf_transcript_idx[transcript.name][2]][gaf_transcript_idx[transcript.name][0]]
        raw_seq = str(transcript.seq)
        transcripts_shlv[transcript.name] = raw_seq
        if 'cds_start' not in gaf_record or not gaf_record['cds_start']: continue
        prot_seq = Seq.translate(raw_seq[gaf_record['cds_start']-1:gaf_record['cds_stop']])
        if prot_seq[-1] == '*':
            prot_seq = prot_seq[:-1]
        elif prot_seq.find('*') != -1:
            # skip small number (n=12) transcripts with incorrect CDS coordinates
            transcripts_to_remove.append(transcript.name)
            continue

        proteins_shlv[transcript.name] = prot_seq


    for t in transcripts_to_remove:
        del transcripts_shlv[t]

    transcripts_shlv.close()
    proteins_shlv.close()

    return transcripts_to_remove,protein_seqs_url

Example #48

0

Show file

File: countMutations.py Project: bochaozhang/countMutations

def applyBias( multiple_mutations,multiple_group,bias ):
    "Determine types for multi mutations using pre-set bias"
    from itertools import permutations
    from Bio import Seq
    from collections import Counter
    
    counted = [0]*len(multiple_mutations)
    for mutation in multiple_mutations:
        if counted[multiple_mutations.index(mutation)] != 1:
            mismatch_positions = [i for i in range(len(mutation[0])) if mutation[0][i]!=mutation[2][i]]
            p = list(permutations(mismatch_positions))
            type_list = []
            type_count = []
            for i in range(len(p)):
                types = []
                germline = mutation[0]
                for j in range(len(p[i])):
                    mutated = germline[:p[i][j]] + mutation[2][p[i][j]] + germline[p[i][j]+1:]
                    if '-' not in germline and 'N' not in germline and '-' not in mutated and 'N' not in mutated:
                        if Seq.translate(germline) == Seq.translate(mutated):
                            types.append('silent')
                        else:
                            types.append('replacement')
                    else:
                        types.append('unkown')   
                    germline = mutated
                type_list.append(types)
                type_frequency = Counter(types)
                type_count.append(type_frequency[bias])
            type_list = type_list[type_count.index(max(type_count))]
                         
            indices = [i for i, x in enumerate(multiple_group) if x == multiple_group[multiple_mutations.index(mutation)]]
            for idx in indices:
                counted[idx] = 1
                multiple_mutations[idx].append(type_list[indices.index(idx)])
    return multiple_mutations

Example #49

0

Show file

File: seq_util.py Project: neherlab/treetool

def translate(nuc):
	"""Translate nucleotide sequence to amino acid"""
	from Bio import Seq
	try:
		tmp_aa = Seq.translate(nuc.replace('-','N')) #returns string when argument is a string, Bio.Seq otherwise
	except:
		print("translation failed",nuc)
		tmp_aa = 'X'*len(nuc)//3
	aa_seq = ""
	for i,aa in enumerate(tmp_aa):
		if nuc[i*3:(i+1)*3]=='---':
			aa_seq+='-'
		else:
			aa_seq+=aa
	return aa_seq

Example #50

0

Show file

File: sequence.py Project: mauriceling/cynote

def translate(seq):
    r = {}
    r['First Frame'] = Seq.translate(seq)
    r['Second Frame'] = Seq.translate(seq[1:])
    r['Third Frame'] = Seq.translate(seq[2:])
    seq = Seq.reverse_complement(seq)
    r['Complement First Frame'] = Seq.translate(seq)
    r['Complement Second Frame'] = Seq.translate(seq[1:])
    r['Complement Third Frame'] = Seq.translate(seq[2:])
    return r

Example #51

0

Show file

File: MutUtils.py Project: Tmacme/oncotator

 def translate_sequence(input_seq):
     """Wrapper for Biopython translate function.  Bio.Seq.translate will complain if input sequence is 
     not a mulitple of 3.  This wrapper function passes an acceptable input to Bio.Seq.translate in order to
     avoid this warning."""
 
     trailing_bases = len(input_seq) % 3
 
     if trailing_bases:
         input_seq = ''.join([input_seq, 'NN']) if trailing_bases == 1 else ''.join([input_seq, 'N'])
 
     output_seq = Seq.translate(input_seq)
 
     if trailing_bases:
         #remove last residue if input needed to be extended because of trailing bases
         output_seq = output_seq[:-1]
 
     return output_seq

Example #52

0

Show file

File: test_seq.py Project: BrianLinSu/rop

    def test_translation_using_cds(self):
        seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG"
        self.assertEqual("MAIVMGRWKGAR", Seq.translate(seq, table=2, cds=True))

        seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCG"  # not multiple of three
        with self.assertRaises(TranslationError):
            Seq.translate(seq, table=2, cds=True)

        seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA"  # no stop codon
        with self.assertRaises(TranslationError):
            Seq.translate(seq, table=2, cds=True)

        seq = "GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG"  # no start codon
        with self.assertRaises(TranslationError):
            Seq.translate(seq, table=2, cds=True)

Example #53

0

Show file

File: nucleotide_to_acid_amino.py Project: Raphael-De-Wang/Semestre02

def translate_en_orf6():
    for tgt in tgt_list:
        input_handle  = open(path_base+tgt)
        output_fname  = path_base+'.'.join(tgt.split('.')[:-1])+'_translate.fasta'
        print output_fname
        output_handle = open(output_fname,'w')
        data = SeqIO.parse(input_handle,'fasta')
        for record in data:
            frame01 = SeqRecord(Seq.translate(record.seq[0:]),id=record.id+'ORF1|',
                                name=record.name+'ORF1|',description=record.description)
            frame02 = SeqRecord(Seq.translate(record.seq[1:]),id=record.id+'ORF2|',
                                name=record.name+'ORF2|',description=record.description)
            frame03 = SeqRecord(Seq.translate(record.seq[2:]),id=record.id+'ORF3|',
                                name=record.name+'ORF3|',description=record.description)
            frame04 = SeqRecord(Seq.translate(record.reverse_complement().seq[0:]),id=record.id+'ORF4|',
                                name=record.name+'ORF4|',description=record.description)
            frame05 = SeqRecord(Seq.translate(record.reverse_complement().seq[1:]),id=record.id+'ORF5|',
                                name=record.name+'ORF5|',description=record.description)
            frame06 = SeqRecord(Seq.translate(record.reverse_complement().seq[2:]),id=record.id+'ORF6|',
                                name=record.name+'ORF6|',description=record.description)
            SeqIO.write([frame01,frame02,frame03,frame04,frame05,frame06], output_handle, "fasta")
        output_handle.close()

Example #54

0

Show file

File: test_seq.py Project: frankkl/biopython

        print s.complement()
        assert False, "Complement shouldn't work on a protein!"
    except ValueError :
        pass
    try :
        print s.reverse_complement()
        assert False, "Reverse complement shouldn't work on a protein!"
    except ValueError :
        pass
   
print
print "Translating"
print "==========="
for nucleotide_seq in test_seqs:
    try :
        expected = Seq.translate(nucleotide_seq)
        print "%s\n-> %s" \
        % (repr(nucleotide_seq) , repr(expected))
    except (ValueError, TranslationError), e :
        expected = None
        print "%s\n-> %s" \
        % (repr(nucleotide_seq) , str(e))
    #Now test the Seq object's method
    if isinstance(nucleotide_seq, Seq.Seq) :
        try :
            assert repr(expected) == repr(nucleotide_seq.translate())
        except (ValueError, TranslationError) :
            assert expected is None
    #Now check translate(..., to_stop=True)
    try :
        short = Seq.translate(nucleotide_seq, to_stop=True)

Example #55

0

Show file

File: seq_util.py Project: zysong/nextflu

def translate(nuc, to_stop=False):
    """Translate nucleotide sequence to amino acid"""
    from Bio import Seq

    return Seq.translate(nuc, to_stop=to_stop)  # returns string when argument is a string, Bio.Seq otherwise

Example #56

0

Show file

File: ctleptop.py Project: demis001/bio_pieces

def access_mixed_aa(file_name):
    """(str) ->(list,list,list,list).
    Return a list of amino acide code for ambiguous dna codon, position of
    ambiguous nt codon, aa name,seq id from fasta header  by reading multifasta
    nucleotide fasta file
    """
    from Bio import SeqIO
    aa = []
    nucleotide_idx = []
    nucl_codon = []
    seqids = []
    for seq_record in SeqIO.parse(file_name, 'fasta'):
        seq_id = seq_record.id
        seq_len = len(seq_record)
        header, seqline = seq_record.id, str(seq_record.seq)
    # for header, seqline in readFasta(file_name):
        # print header + "\n" + seq_line

        # my_seq = Seq(seq_line, IUPAC.extended_dna)
        my_seq = Seq(str(seqline), IUPAC.ambiguous_dna)
        # seq2 = Seq("ARAWTAGKAMTA", IUPAC.ambiguous_dna)
        # seq2 = seq2.translate()
        # print seq2
        # print ambiguous_dna_values["W"]
        # print IUPAC.ambiguous_dna.letters
        seqline = seqline.replace("-", "N")
        n = 3
        codon_list = {i + n: seqline[i:i + n] for i in range(0, len(seqline), n)}
        # print yaml.dump(ambi_codon)
        # print yaml.dump(codon_list)
        ambi_nucl = AMBICODON.keys()
        # print ambi_nucl
        # print ambi_codon["Y"]
        for key, codon in sorted(codon_list.iteritems()):
            # print "key: ", key , "codon:", codon
            if list_overlap(codon, ambi_nucl):
                d, e, f = codon
                m = [d, e, f]
                # print codon, ".....", key
                # print type(ambi_nucl)
                items = set(m).intersection(ambi_nucl)
                indexm = m.index(list(items)[0])
                # print "index ...", indexm
                items = list(items)      # eg. ['R']
                for idx, val in enumerate(items):
                    # print idx
                    # print val
                    codonlist = list(nearbyPermutations(codon))
                    # print "codon list :", codonlist
                    val = getaalist(codonlist)
                    # remove if aa codon is the same eg. ['D', 'D']
                    val = list(set(val))
                    val = "/".join(val)   # yeild 'I/L'
                    val = str(val)
                    # print "codonlist *****", codonlist
                    # print "aa val *******", val
                    if "/" in val and indexm == 2:
                        key = key
                        nucleotide_idx.append(key)
                        nucl_codon.append(codon)
                        seqids.append(seq_id)
                    elif "/" in val and indexm == 1:
                        key = key - 1
                        nucleotide_idx.append(key)
                        nucl_codon.append(codon)
                        seqids.append(seq_id)
                    elif "/" in val and indexm == 0:
                        key = key - 2
                        nucleotide_idx.append(key)
                        nucl_codon.append(codon)
                        seqids.append(seq_id)
                    else:
                        pass
                    # print ".....", val
                    aa.append(val)

            else:
                # print "codon3 ..." ,codon
                aa1 = Seq(codon, IUPAC.unambiguous_dna)
                aa1 = aa1.translate()
                aa1 = str(aa1)
                aa.append(aa1)
    #print aa, nucleotide_idx, nucl_codon, seqids
    return aa, nucleotide_idx, nucl_codon, seqids

Example #57

0

Show file

File: translate.py Project: Heanthor/rosalind

from pileup_user import read_fa_file
from Bio import Seq

reference = read_fa_file("data/reference.fa")

print "Original sequence: " + Seq.translate(reference)[274]

mutated_reference = ""
for i, x in enumerate(reference):
    if i == 822:
        mutated_reference += "T"
    else:
        mutated_reference += x

print "Read sequence: " + Seq.translate(mutated_reference)[274]