Example #1
0
def cutOligos(GeneName, cutsite, DNA):
    
# This function asks user for a target cut site, and generates oligos to clone that cut site into 
# a CRISPR plasmid, and to screen for positive clones containing that cut site. It may also generate
# universal homology regions to integrate or otherwise alter that target region. 
#
# The current cut site architecture is:  >>>>> YtRNAp-HDV ribozyme- >20nt< -gRNA <<<<<

    GeneName=input("Name, using quotes: ")
    cutsite=input("20-mer cut sequence, using quotes: ")
    DNA=input("Locus sequence +/- a few kb, using quotes: ")

    if DNA.find(cutsite)==-1:                   # If cutiste sequence found in ANTISENSE
        DNA=Seq(DNA).reverse_complement()       # then reverse DNA, and turn it into a string
    
    index=DNA.find(cutsite)+16                  # index gives the start position of the string, e.g., 0. 
                                                # we add 16 since index+0=start of 20-mer, so index+16=cut site, 
                                                # 3 nt before last of 20mer
        
    Lup=DNA[index-520:index-490]                # This primer binds 500bp upstream of cut site

    
    cutSequence=Seq("cgggtggcgaatgggacttt")+cutsite+Seq("gttttagagctagaaatagc")
    seqprimer=Seq("gacttt")+cutsite
    

    print("cut" + GeneName + "  " + cutSequence)
    print("Lcolony" + GeneName + "  " + seqprimer)
    print("Lup" + GeneName + "  " + Lup)
Example #2
0
def ReadingFrameFinder(DNASTRING):
    CleanDNA = DNASTRING.rstrip("\n")
    OpenLocations = []
    CloseLocations = []
    stringlen = len(CleanDNA)
    TtoU = CleanDNA.replace("T", 'U')
    readingframeRange = xrange(0, stringlen)
    PossibleGenes = []
    for item in readingframeRange:
        if TtoU[item:item+3] == "AUG":
            Newthing = xrange(item, stringlen, 3)
            storage = item
            for number in Newthing:
                if TtoU[number:number+3] == "UAA" or TtoU[number:number+3] == "UAG" or TtoU[number:number+3] == "UGA":
                    PossibleGenes.append(TtoU[storage:number+3])
                    break
    for Seqeu in PossibleGenes:
        if len(Seqeu) % 3 == 0:
            LETGO = Seq(Seqeu, generic_rna)
            FinalizedProt.append(str(LETGO.translate()))
        else:
            Removal_Len = len(Seqeu) % 3
            UpdatedSequence = Seqeu[:-Removal_Len]
            ETGO2 = Seq(UpdatedSequence, generic_rna)
            FinalizedProt.append(str(ETGO2.translate()))
def assign_fitness(nodes):
	'''
	loops over all viruses, translates their sequences and calculates the virus fitness
	'''
	aa, sites, wt_aa, aa_prob = load_mutational_tolerance()
	aln = AlignIO.read('source-data/H1_H3.fasta', 'fasta')
	# returns true whenever either of the sequences have a gap
	aligned = (np.array(aln)!='-').min(axis=0)
	# map alignment positions to sequence positions, subset to aligned amino acids
	indices = {}
	for seq in aln:
		indices[seq.name] = (np.cumsum(np.fromstring(str(seq.seq), dtype='S1')!='-')-1)[aligned]

	# make a reduced set of amino-acid probabilities that only contains aligned positions
	aa_prob=aa_prob[indices['H1'],:]
	# attach another column for non-canonical amino acids
	aa_prob = np.hstack((aa_prob, 1e-5*np.ones((aa_prob.shape[0],1))))
	if isinstance(nodes, list):
		for node in nodes:
			node['tol'] = calc_fitness_tolerance(Seq.translate(node['seq']), 
															aa_prob, aa, indices['H3'])
	elif isinstance(nodes, dendropy.Tree):
		for node in nodes.postorder_node_iter():
			node.tol = calc_fitness_tolerance(Seq.translate(node.seq), 
															aa_prob, aa, indices['H3'])
Example #4
0
    def test_reverse_complement_on_proteins(self):
        """Test reverse complement shouldn't work on a protein!"""
        for s in protein_seqs:
            with self.assertRaises(ValueError):
                Seq.reverse_complement(s)

            with self.assertRaises(ValueError):
                s.reverse_complement()
Example #5
0
    def test_translation_on_proteins(self):
        """Test translation shouldn't work on a protein!"""
        for s in protein_seqs:
            with self.assertRaises(ValueError):
                Seq.translate(s)

            if isinstance(s, Seq.Seq):
                with self.assertRaises(ValueError):
                    s.translate()
Example #6
0
    def test_translation_to_stop(self):
        for nucleotide_seq in self.test_seqs:
            nucleotide_seq = nucleotide_seq[:3 * (len(nucleotide_seq) // 3)]
            if isinstance(nucleotide_seq, Seq.Seq) and 'X' not in str(nucleotide_seq):
                short = Seq.translate(nucleotide_seq, to_stop=True)
                self.assertEqual(str(short), str(Seq.translate(nucleotide_seq).split('*')[0]))

        seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG"
        self.assertEqual("VAIVMGRWKGAR", Seq.translate(seq, table=2, to_stop=True))
Example #7
0
    def test_back_transcription_of_proteins(self):
        """Test back-transcription shouldn't work on a protein!"""
        for s in protein_seqs:
            with self.assertRaises(ValueError):
                Seq.back_transcribe(s)

            if isinstance(s, Seq.Seq):
                with self.assertRaises(ValueError):
                    s.back_transcribe()
Example #8
0
 def rc_kmers(self, kmers):
     res={}
     keys=[]
     for s in kmers:
         if Seq.reverse_complement(s) in keys:
             res[s]=Seq.reverse_complement(s)
         else:
             keys.append(s)
             res[s]=s
     return keys,res
Example #9
0
def delGene(geneName, cutsite):

# This function asks user for a chromosomal locus, a region to be deleted, a suitable CRIPSR cutsite 
# and outputs oligos for cloning of a pL308 Cas9-gRNA vector, and ones for generating a donor DNA
# to delete the unwanted chromosomal region. Primers Lup+Rdown produce a 1kb band if deletion was
# successful. 
# part of yCRISPRv3 by [email protected]

    #GeneName=input("Name, using quotes: ")
    #cutsite=input("20-mer cut sequence, using quotes: ").upper()
    locus = genomicData[geneName][0]    
    deletion = genomicData[geneName][1]

    deletion = Seq(deletion)
     
    if deletion.find(cutsite)==-1:
        if deletion.reverse_complement().find(cutsite)==-1:
            print ("WARNING: Guide 20-mer sequence not found in deletion region.")
        
    locus=Seq(locus)
    
    index=locus.find(deletion)                  
   
    # index gives the start position within locus of the string deletion.
    # now we delete the deletion region to redefine a newlocus:
    
    newlocus=locus[0:index]+locus[index+len(deletion):]

    # note that since index starts at 0, a value of n points to, in the newlocus,
    # the first nt after the deletion. So we define the newlocus as above. Note too
    # that a string of len=40 ends at an index of 39--so we pick up at index+len-1. 
    
    Lup=newlocus[index-500:index-470]
    Rdown=newlocus[index+469:index+499].reverse_complement()

    Rtemp1 = newlocus[:index].reverse_complement()
    Rtemp2 = newlocus[index:].reverse_complement()

    rPrimer, rLength = getPrimer(Rtemp1)
    lPrimer, lLength = getPrimer(newlocus[index:])

    Rup = getOverhang(Rtemp2, rLength) + rPrimer
    Ldown = getOverhang(newlocus[:index], lLength) + lPrimer

    cutSequence=Seq("cgggtggcgaatgggacttt")+cutsite+Seq("gttttagagctagaaatagc")
    seqprimer=Seq("gacttt")+cutsite
    
    print("cut" + GeneName + "  " + cutSequence)
    print("seq" + GeneName + "  " + seqprimer)
    print("Lup" + GeneName + "del" + " " + Lup)
    print("Rup" + GeneName + "del" + " " + Rup)
    print("Ldown" + GeneName + "del" + " " + Ldown)
    print("Rdown" + GeneName + "del" + " " + Rdown)

    return Ldown, Rup
Example #10
0
def calc_total_subst(start_codon, end_codon):
    """
    Returns total synonymous substitutions, nonsynonymous substitutions.
    If there are multiple positions that differ between codons, then returns the average synonynous substitutions,
    average nonsynonymous substitutions across all possible pathways from codon1 to codon2
    where each stage in a pathway is separated by 1 position mutation.
    :param Bio.Seq.Seq start_codon:  3bp codon
    :param Bio.Seq.Seq end_codon:  3bp codon
    :return tuple (int, int):  (average point mutations that yield same amino acid across all pathways, average point mutations that yield different amino acid across all pathways)
    """
    total_syn = 0.0
    total_nonsyn = 0.0
    total_subs = 0.0

    upper_start_codon = start_codon.upper()
    upper_end_codon = end_codon.upper()

    # find positions where the codons differ
    diff_pos = []
    for pos, nucstr1 in enumerate(str(upper_start_codon)):
        nucstr2 = str(upper_end_codon[pos])
        if nucstr1 != nucstr2:
            diff_pos.extend([pos])

    # Traverse all possible pathways from start_codon to end_codon where
    # each stage of a pathway mutates by 1 base.
    last_codon = upper_start_codon
    last_aa = Seq.translate(last_codon)
    for pathway in itertools.permutations(diff_pos):
        print str(upper_start_codon) + " " + str(upper_end_codon) + " " + ",".join([str(x) for x in pathway])
        for mut_pos in pathway:
            mut_nuc = upper_end_codon[mut_pos]
            mut_codon =  last_codon[:mut_pos] + mut_nuc + last_codon[mut_pos+1:]
            mut_aa = Seq.translate(mut_codon)

            total_subs += 1
            if str(last_aa) == str(mut_aa):
                total_syn += 1
            else:
                total_nonsyn += 1

            last_codon = mut_codon
            last_aa = mut_aa

        if str(last_codon) != str(upper_end_codon):
            raise ValueError("Pathway does not yield end codon " + str(last_codon))

    if total_subs:
        ave_syn = total_syn/total_subs
        ave_nonsyn = total_nonsyn/total_subs
    else:
        ave_syn = 0.0
        ave_nonsyn = 0.0
    return ave_syn, ave_nonsyn
Example #11
0
    def test_reverse_complement(self):
        test_seqs_copy = copy.copy(test_seqs)
        test_seqs_copy.pop(21)

        for nucleotide_seq in test_seqs_copy:
            if not isinstance(nucleotide_seq.alphabet, Alphabet.ProteinAlphabet) and \
                    isinstance(nucleotide_seq, Seq.Seq):
                expected = Seq.reverse_complement(nucleotide_seq)
                self.assertEqual(repr(expected), repr(nucleotide_seq.reverse_complement()))
                self.assertEqual(repr(expected[::-1]), repr(nucleotide_seq.complement()))
                self.assertEqual(str(nucleotide_seq.complement()),
                                 str(Seq.reverse_complement(nucleotide_seq))[::-1])
                self.assertEqual(str(nucleotide_seq.reverse_complement()),
                                 str(Seq.reverse_complement(nucleotide_seq)))
Example #12
0
    def export(self, path = '', extra_attr = ['aa_muts']):
        from Bio import Seq
        from itertools import izip
        timetree_fname = path+'tree.json'
        sequence_fname = path+'sequences.json'
        tree_json = tree_to_json(self.tree.root, extra_attr=extra_attr)
        write_json(tree_json, timetree_fname, indent=None)
        elems = {}
        elems['root'] = {}
        elems['root']['nuc'] = "".join(self.tree.root.sequence)
        for prot in self.proteins:
            tmp = str(self.proteins[prot].extract(Seq.Seq(elems['root']['nuc'])))
            #elems['root'][prot] = str(Seq.translate(tmp.replace('---', 'NNN'))).replace('X','-')
            elems['root'][prot] = str(Seq.translate(tmp.replace('-', 'N'))).replace('X','-')


        for node in self.tree.find_clades():
            if hasattr(node, "clade") and hasattr(node, "sequence"):
                elems[node.clade] = {}
                elems[node.clade]['nuc'] = {pos:state for pos, (state, ancstate) in
                                enumerate(izip(node.sequence, self.tree.root.sequence)) if state!=ancstate}
        for node in self.tree.find_clades():
            if hasattr(node, "clade") and hasattr(node, "translations"):
                for prot in self.proteins:
                    elems[node.clade][prot] = {pos:state for pos, (state, ancstate) in
                                    enumerate(izip(node.translations[prot], elems['root'][prot])) if state!=ancstate}

        write_json(elems, sequence_fname, indent=None)
Example #13
0
def translationBio(data):
    '''Uses Biopython translate '''
    proteinSeq = ''
    for line in data:
        proteinSeq += Seq.translate(line, table='Standard', stop_symbol='', to_stop=False)
        #proteinSeq += Seq.translate(line)
    print proteinSeq
def translateDNAtoAA(input_fasta, output_fasta, remove_lower_case = False):
    with open(input_fasta, 'r') as f:
        with open(output_fasta, 'w+') as g:
            for line in f.readlines():
                if line[0] == '>':
                    g.write(line)
                    continue
                else:
                    if line[-2:] == '\r\n':
                        assert(len(line) %3 == 2)
                    elif line[-1:] == '\n':
                        assert(len(line) %3 == 1)
                    if remove_lower_case:
                        g.write(Seq.translate(line.translate(None, string.ascii_lowercase)[:-1], to_stop = True) + '\n')
                    else:
                        g.write(Seq.translate(line[:-1], to_stop = True) + '\n')
def translate(config, rc=False):
    table = 1
    if mycoplasma(config):
        # table 4 is for mycoplasma ala:
        # http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
        table = 4
    fd, fmap = None, None
    try:
        log.debug("Doing translation with table %d, rc: %s", table, rc)
        fd = os.open(ddna(config), os.O_RDONLY)
        fmap = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ)
        # By convention (e.g. from the C or NCBI) the DNA is is 1
        # indexed; our DDNA is a c style array that is 0 indexed
        startIdx = config['startBase'] - 1
        # The end index here is inclusive but array.slice isn't so we
        # don't need to subtract 1
        endIdx = config['endBase']
        seq = Seq.Seq(fmap[startIdx:endIdx])

        if rc:
            seq = seq.reverse_complement()
        return {
            'seq': str(seq),
            'trans': str(Seq.translate(seq, table))
        }
    finally:
        if fmap:
            fmap.close
        if fd:
            os.close(fd)
Example #16
0
    def add_translations(self):
        '''
        translate the nucleotide sequence into the proteins specified
        in self.proteins. these are expected to be SeqFeatures
        '''
        from Bio import Seq

        # Sort proteins by start position of the corresponding SeqFeature entry.
        sorted_proteins = sorted(self.proteins.items(), key=lambda protein_pair: protein_pair[1].start)

        for node in self.tree.find_clades(order='preorder'):
            if not hasattr(node, "translations"):
                # Maintain genomic order of protein translations for easy
                # assembly by downstream functions.
                node.translations=OrderedDict()
                node.aa_mutations = {}

            for prot, feature in sorted_proteins:
                node.translations[prot] = Seq.translate(str(feature.extract(Seq.Seq("".join(node.sequence)))).replace('-', 'N'))

                if node.up is None:
                    node.aa_mutations[prot] = []
                else:
                    node.aa_mutations[prot] = [(a,pos,d) for pos, (a,d) in
                                               enumerate(zip(node.up.translations[prot],
                                                             node.translations[prot])) if a!=d]

        self.dump_attr.append('translations')
Example #17
0
 def add_translations(self):
     from Bio import Seq
     for node in self.tree.find_clades():
         if not hasattr(node, "translations"):
             node.translations={}
         for prot in self.proteins:
             node.translations[prot] = Seq.translate(str(self.proteins[prot].extract(Seq.Seq("".join(node.sequence)))).replace('-', 'N'))
def mutationType(single_mutations):
    "Find mutations type (R/S) for single mutation"
    from Bio import Seq
    
    print len(single_mutations)
    for i in range(len(single_mutations)):
        germline = single_mutations[i][0]
        mutated = single_mutations[i][2]
        if '-' not in germline and 'N' not in germline and '-' not in mutated and 'N' not in mutated:
            if Seq.translate(germline) == Seq.translate(mutated):
                single_mutations[i].append('silent')
            else:
                single_mutations[i].append('replacement')
        else:
            single_mutations[i].append('unknown')

    return single_mutations
Example #19
0
 def test_stops(self):
     for nucleotide_seq in [self.misc_stops, Seq.Seq(self.misc_stops),
                            Seq.Seq(self.misc_stops, Alphabet.generic_nucleotide),
                            Seq.Seq(self.misc_stops, Alphabet.DNAAlphabet()),
                            Seq.Seq(self.misc_stops, IUPAC.unambiguous_dna)]:
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq)))
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table=1)))
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table="SGC0")))
         self.assertEqual("**W**", str(Seq.translate(nucleotide_seq, table=2)))
         self.assertEqual("**WRR", str(Seq.translate(nucleotide_seq,
                                       table='Yeast Mitochondrial')))
         self.assertEqual("**WSS", str(Seq.translate(nucleotide_seq, table=5)))
         self.assertEqual("**WSS", str(Seq.translate(nucleotide_seq, table=9)))
         self.assertEqual("**CRR", str(Seq.translate(nucleotide_seq,
                                       table='Euplotid Nuclear')))
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table=11)))
         self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table='Bacterial')))
Example #20
0
def calc_total_poss_subst(codon):
    total_poss_syn = 0.0
    total_poss_nonsyn = 0.0
    orig_aa = Seq.translate(codon)
    for codon_pos in range(0, Utility.NUC_PER_CODON):
        nuc = codon[codon_pos]
        for mut_str in ("A", "C", "T", "G"):
            mut = Seq.Seq(mut_str)
            if str(mut).upper() == str(nuc).upper():
                continue
            mut_codon = codon[:codon_pos] + mut + codon[codon_pos+1:]
            mut_aa = Seq.translate(mut_codon)
            if str(orig_aa).upper() == str(mut_aa).upper():
                total_poss_syn += 1
            else:
                total_poss_nonsyn += 1

    return total_poss_syn, total_poss_nonsyn
Example #21
0
def reverse_complement(sequence):
    """
    Reverse complement of a sequence represented as unicode string.

    Unfortunately, BioPython's reverse_complement doesn't work on unicode
    strings. We work almost exclusively with unicode strings, so this is a
    convenience wrapper.
    """
    return unicode(Seq.reverse_complement(str(sequence)))
Example #22
0
 def __init__(self, string):
     string = string.lower()
     if is_nucleotide(string):
         self.nucleotide = string
         warnings.simplefilter('ignore', BiopythonWarning)
         string = Seq.translate(string).lower()
     self.primary = string.split('*')
     self.secondary = []
     self.structures = []
Example #23
0
File: lox.py Project: bh0085/zhang
def oligos_1():
    overhangs = ['CATG', 'ACAA']
    bc =  'GATGATTGA'
    kozak =  'gccacc' 
    start = 'atg'
    fwd =overhangs[0] + loxp + bc + kozak + start + lox71
    rev =  seq.reverse_complement(loxp+bc+kozak+start+lox71+overhangs[1])
    
    print fwd.upper()
    print rev.upper()
Example #24
0
def translateDNAtoAA(input_fasta, output_fasta):  
    with open(input_fasta, 'r') as f:
        with open(output_fasta, 'w+') as g:
            for line in f.readlines():
                if line[0] == '>':
                    g.write(line)
                    continue
                else:
                    assert(len(line) %3 == 1)
                    g.write(Seq.translate(line[:-1], to_stop = True) + '\n')
def check_fragments(oligo_file, design_fasta):
    design_aa_list = []
    with open(design_fasta, 'r') as f:
        for pdb, seq in izip_longest(f, f, fillvalue=None):
            if '4AC0' and 'B0' in pdb:
                block = seq[77:117]
            elif '4AC0' and 'B1' in pdb:
                block = seq[99:138]
            elif '2uxo' and 'B0' in pdb:
                block = seq[62:100]
            elif '2uxo' and 'B1' in pdb:
                block = seq[136:176]
            else:
                raise Exception('Unrecognized design name')
            design_aa_list.append(block)

    fragment_list = []
    with open(oligo_file, 'r') as o:
        for pdb, seq in izip_longest(o, o, fillvalue=None):
            if '4AC0' and 'B0' in pdb:
                seq_lower = seq.lower()
                seq_no_5p = seq_lower.split('gtgacccgtccctgggtctcaagat')[1]
                fragment = seq_no_5p.split('gccttgagaccgggcagaggtcgac')[0]
            elif '4AC0' and 'B1' in pdb:
                seq_lower = seq.lower()
                seq_no_5p = seq_lower.split('tgcccgctgtcttcaggtctcaagta')[1]
                fragment = seq_no_5p.split('catttgagacctgtagcccggcagtg')[0]
            elif '2uxo' and 'B0' in pdb:
                seq_lower = seq.lower()
                seq_no_5p = seq_lower.split('cgatcgtgcccacctggtctccactg')[1]
                fragment = seq_no_5p.split('gttctgagaccagttggagcccgcac')[0]
            elif '2uxo' and 'B1' in pdb:
                seq_lower = seq.lower()
                seq_no_5p = seq_lower.split('ctggtgcgtcgtctggtctctggat')[1]
                fragment = seq_no_5p.split('cgttggagaccggcgaacacttccc')[0]
            else:
                raise Exception('Unrecognized oligo name')
            fragment_list.append(fragment)

    missing_list = []
    for item in fragment_list:
        aa_fragment = Seq.translate(item)
        if aa_fragment in design_aa_list:
            design_aa_list.remove(aa_fragment)
        else:
            missing_list.append(aa_fragment)
    if missing_list:
        sys.stderr.write('Error: The following oligo sequences do not match a design amino acid sequence\n')
        for miss in missing_list:
            sys.stderr.write('{0}\n'.format(miss))
    if design_aa_list:
        sys.stderr.write('Error: The following design sequences do not match an oligo sequence\n')
        for design in design_aa_list:
            sys.stderr.write('{0}\n'.format(design))
    sys.stdout.write('done\n')
Example #26
0
 def get_sgrna(self):
     # return DataFrame contains possible sgRNAs.
     if not hasattr(self, 'sgrna'):
         ngg = re.compile(
             '([atgcATGC]{20})([atgcATGC](GG|gg|Gg|gG))'
         )
         ccn = re.compile(
             '((CC|cc|Cc|cC)[atgcATGC])([atgcATGC]{20})'
         )
         columns = ['seqname', 'start', 'cut', 'end', 'sgrna', 'pam']
         sgrna = list()
         for chromosome in self.genome:
             sglist = [
                 {
                     'seqname': chromosome.id,
                     'start': x.start(),
                     'cut': x.end() - 6,
                     'end': x.end() - 3,
                     'sgrna': x.group(1),
                     'pam': x.group(2)
                 }
                 for x in ngg.finditer(str(chromosome.seq))
             ]
             sglist.extend(
                 {
                     'seqname': chromosome.id,
                     'start': x.start() + 3,
                     'cut': x.start() + 6,
                     'end': x.end(),
                     'sgrna': Seq.reverse_complement(x.group(3)),
                     'pam': Seq.reverse_complement(x.group(1))
                 }
                 for x in ccn.finditer(str(chromosome.seq))
             )
             sgrna.append(
                 pd.DataFrame(
                     sglist,
                     columns = columns
                 )
             )
         self.sgrna = pd.concat(sgrna, axis = 0, ignore_index = True)
     return self.sgrna
Example #27
0
    def get_syn_mutations(self, region, mask_constrained = True):
        from itertools import izip
        if region in self.annotation and self.annotation[region].type in ['gene', 'protein']:
            try:
                aft = self.get_allele_frequency_trajectories(region)
                if len(aft.mask.shape) == 0:
                    aft_valid = np.ones((aft.shape[0], aft.shape[-1]), dtype=bool)
                else:
                    aft_valid = -np.array([af.mask.sum(axis=0) for af in aft], dtype=bool)
                gaps = self.get_gaps_by_codon(region)
                initial_seq = self.get_initial_sequence(region)
                consensi = []
                for af in aft:
                    tmp = consensus(af)
                    tmp[gaps]='N'
                    consensi.append(tmp)

                cons_aa = np.array([np.fromstring(Seq.translate(''.join(cons)), 
                                   dtype='|S1') for cons in consensi])
                no_substitution = np.repeat(np.array([len(np.unique(col[ind]))==1 
                                for ind, col in izip(aft_valid.T[::3], cons_aa.T)], dtype=bool), 3)

                syn_muts = np.zeros(aft.shape[1:], dtype=bool)
                for pos in xrange(aft.shape[-1]):
                    ci = pos//3
                    rf = pos%3
                    codon = ''.join(initial_seq[ci*3:(ci+1)*3])
                    for ni,nuc in enumerate(alpha[:4]):
                        mod_codon = codon[:rf] + nuc + codon[rf+1:]
                        try:
                            syn_muts[ni,pos] = (Seq.translate(codon)==Seq.translate(mod_codon))\
                                                *no_substitution[pos]
                        except:
                            syn_muts[ni,pos] = False
                if mask_constrained:
                    syn_muts[:,self.get_constrained(region)] = False
                return syn_muts
            except:
                import pdb; pdb.set_trace()
        else:
            print region,"is not a valid protein or gene"
            return None
Example #28
0
def getSequences(geneName):
    from intermine.webservice import Service

    template = service.get_template('Gene_GenomicDNA')

    rows = template.rows(
        E = {"op": "LOOKUP", "value": geneName, "extra_value": "S. cerevisiae"}
    )

    count = 0
    for row in rows:
        geneSeq = Seq(row["sequence.residues"])
        locusSeq = Seq(row["chromosome.residues.locus"])

        index = locusSeq.find(geneSeq)
        locusSeq = locusSeq[index-1000:locusSeq]

        # Reduce locusSize so it is only +/- 1 kbp of geneSeq
        break

    return geneSeq, locusSeq
def orf_reader(infile):
	orfs = {}
	handle = open(infile,"r")
	lines = handle.readlines()
	for line in lines:
		if line[0] != "#":
			line_array = line.split("\t")
			if int(line_array[1]) < 0:
				orfs[line_array[0]] = [Seq.reverse_complement(line_array[4]),line_array[4]]
			else:
				orfs[line_array[0]] = [line_array[4],line_array[4]]
	return orfs
Example #30
0
def translate(seq):
    r = {}
    r['First Frame'] = Seq.translate(seq)
    r['Second Frame'] = Seq.translate(seq[1:])
    r['Third Frame'] = Seq.translate(seq[2:])
    seq = Seq.reverse_complement(seq)
    r['Complement First Frame'] = Seq.translate(seq)
    r['Complement Second Frame'] = Seq.translate(seq[1:])
    r['Complement Third Frame'] = Seq.translate(seq[2:])
    return r
Example #31
0
 def setUp(self):
     self.test_seqs = [
         Seq.Seq("TCAAAAGGATGCATCATG"),
         Seq.Seq("ATGAAACTG"),
         Seq.Seq("ATGAARCTG"),
         Seq.Seq("AWGAARCKG"),  # Note no U or T
         Seq.Seq("".join(ambiguous_rna_values)),
         Seq.Seq("".join(ambiguous_dna_values)),
         Seq.Seq("AUGAAACUG"),
         Seq.Seq("ATGAAACTGWN"),
         Seq.Seq("AUGAAACUGWN"),
         Seq.MutableSeq("ATGAAACTG"),
         Seq.MutableSeq("AUGaaaCUG"),
     ]
Example #32
0
    def setUp(self):

        self.bed_row = "\t".join(
            "TRIAE_CS42_1AL_TGACv1_000002_AA0000030.1	0	3539	TRIAE_CS42_1AL_TGACv1_000002_AA0000030.1|m.13	0	+	2	2969	0	1	3539	0"
            .split())
        self.sequence = """ATCGAGCAGATTGGCCGCAACCTACAACTCCCACGGCCCAAGCACTCTCTCTCTCTCTTTCCCTCTCACC
CTCGCCTCCGCTCCCCCATTTCCGAAGTACTCGCGAGCCAGCGGCCTCCAGCTCACCACCGTTTCCGCCG
CGCGCAGATCCGCCCAATCCGTGCAGCCTCAGGCCACCGCTCTGGTTCCGTGACATGTGGCGAGGTGGTG
GCGCAGACGCTGATGCAGGAGGCGCTCGCGAGGCTGAGGAGCACAACAATGTCGAGGAAGAGGAAGGGAG
TGAGGATGGAGATCGGGACCTGCAGAATAAACGTCCTAAAGTGGGTGCTTTTGGCGAAGAAAGCTCTGGT
GTTAATGCATCCTTCTTTGGATATGAAGCACCACATTTGCATGCTTTTGCTGAACATGACCATTTGAAGC
TGTCACATGGTCCAGAAAATGAATTGGATTTTGGTTTGTCGCTTATCTCAAATGATGGTGGGAATGATAT
TCCAAGGGAGACCAACAGTCATGGTGTCTGTGATGTAGAAAGATCAGGTGGAACAAATGCAGAAGATCTT
GAAATAAGAATGGACCTATCTGATGATCTCTTGCACCTGATATTCTCCTTCTTATGCCAGAAGGATTTAT
GTAGAGCAGGGGCTGCCTGCAAACAGTGGCAGTCTGCTAGTATGCATGAGGATTTCTGGAAATATTTGAA
GTTTGAGAACACCAGAATATCTCTGCAGAACTTTGTTAATATTTGCCACCGTTATCAGAATGTGACAAAT
CTCAATTTGTCTGGTGTCTTAAGTGCAGAAAGCCTAGTGATTGAAGCAATAACATTCTTAAGGCATCTTA
AGACCTTGATAATGGGCAAGGGACAACTGGGAGAAACATTTTTTCAGGCTTTGGCTGAATGCCCATTGTT
AAATACTTTAACAGTCAGTGATGCATCCCTTGGTAGTGGCATTCAAGAGGTAACTGTTAATCATGATGGA
TTGCATGAACTTCAAATTGTGAAGTGTCGTGCACTCAGAGTATCTATCAGATGCCACCAACTTCGAATAC
TGTCTCTGAGGAGAACTGGCATGGCTCATGTATCACTCAATTGTCCTCAGTTGCTTGAATTGGATTTTCA
GTCCTGCCATAAGCTTTCTGACACTGCAATTCGTCAAGCAGCGACAGCCTGTCCACTGTTAGCGTCACTA
GATATGTCATCCTGCTCGTGTGTTACTGATGAGACATTGCGTGAGATAGCTAATGCATGTCAAAATCTTT
CTGTTCTTGATGCATCTAACTGCCCCAACATTTCTTTCGAGTCGGTAAAGCTTCCAATGTTGGTAGACTT
GAGACTATCAAGTTGTGAGGGAATCACATCTGCTTCAATGGGTGCAGTATGTTTTAGTCGTATACTTGAG
GCGTTGCAACTTGATAATTGTAGCCTGTTGACATCTGTGTCTTTGGATCTGCCACATCTCAAGAATATTA
GTCTTGTACACCTCCGCAAGTTTGCTGATTTAAATCTGCGAAGCCCTGTGCTTTCTTACATAAAAGTTTC
CAGATGCTCAGCACTTCGTTGTGTTACCATAACATCAAATGCTCTTAAGAAACTGGTGCTTCAAAAACAA
GAGAGCCTATGTAATTTATCATTGCAATGCCACAATTTAATTGATGTTGATCTTAGTGATTGCGAGTCAT
TGACAAATGAGATCTGCAAAGTTCTCAGTGACGGAGGGGGTTGCCCCATGCTCAGGTCATTAATTCTTGA
TAATTGTGAGAGTTTGAGTGTCGTGGAACTGAATAATAGTTCTTTGGTTAATCTCTCACTTGCTGGTTGC
CGTTCCATGACATTCCTGAAACTTGCATGCCCAAAGCTTCAAGTGGTGATTCTTGATGGTTGTGATCATC
TTGAAAGAGCATCATTTTGCCCGGTTGGTCTTGAATCCCTAAACCTTGGAATTTGTCCAAAGTTGAGTGT
TCTACGCATAGAGGCCCCAAATATGTCTATATTGGAGCTGAAGGGCTGTGGTGTCCTTTCTGAGGCTTCA
ATTAATTGTCCTTGCTTGATATCTTTAGATGCCTCTTTCTGCAGACAGTTTATGGATGATTCGCTGTCCC
AAACAGCAGAAGCATGCCCTCTTATTGAACATCTTATATTGTCTTCATGTTTATCCATTGACGTCCGTGG
ATTGTCTTCTCTGCATTGCCTTCAGAAGCTGGCCTTGCTTGACCTATCATATACATTTTTGATGAACTTG
AAGCCGGTTTTTGACAGTTGTCTGCAGTTGAAGGTCTTGAAACTTTCAGCTTGCAAGTATCTCAGTGATT
CATCTTTGGAACCACTCTACAGAGAGGGTGCTCTACCGATGCTCGTTGAGCTAGATCTGTCCTACTCGTC
CATTGGGCAGACTGCAATAGAAGAGCTTCTCGCGTGCTGTACAAATTTGGTTAATGTGAACCTAAACGGA
TGTACGAACTTGCATGAATTGGTATGTGGATCAGACTATTGCCGGTCCGGTGACATGCCAATTGATGCTT
TCCCCCCTGATTCTGCACCAGACAAGACCAAAGAGATCAGGGAGAGTTCGGATTGTCAGCTTGAAGTTCT
CAGTTGTACTGGCTGTCCAAATATTAAGAAAGTTGTTATTCCTTCAACGGCCAACTATCTGAATTTGTCT
AAGATCAACCTTAATTTGTCTGCAAACTTGAAGGAAGTAGATTTGAAGTGCTCCAATCTTTACAATTTAA
ATTTGAGCAATTGTAACTCACTGGAGATTCTGAAGCTTGATTGCCCAAGATTGGCTAACCTCCAACTTTT
GGCATGCACAATGTTGCAAGAGGATGAACTGAAATCTGCACTATCCTTTTGCGGTGCATTGGAGATCCTC
AATGTGCACTCTTGTCCACAAATAAACACGCTGGATTTTGGCAGGCTACAGGCTGTTTGCCCAACTCTTA
AGCGCATCCAGAGCAGCCCCATCGCATAGTATGAAGGATTCTGGTCTTCTTAATGGACTCGAGTAAATAG
TCCAGATTTGAAACAGAAAAGGCCATGTCGTACTCTTGTACATATGCAGCACCGCCAATATATTGTATGG
CTGCATGTATTAGGGAGCCAGGGCTGACATGAAACCTGTTCTTCCAATCGATTTCTTGTGTTGAATCTAG
TTGAAACATGGAAACCGCACTTCCTAGTTTGTATTTGCTTTTGAGGTGCAGTGATGGAGTAAGCAGATCT
GTATTTATATGAATGAATAACCATCTTGTTTGGATCGTCGATGTTGTATGCTTCATTGATGACATGGGGT
GCTAAGTTTGACTGAAATTACACCAGGTTCTATGGTTCTCTCATAAGGTGCAGTGATTCTGCGGTCTTTA
TTAATCTGTCTCAACTGTGACGATGCAACTGAGACGTTTCCATCTGCCGGCTGCTGATGCTGTGAACTCT
TGGTAAAAAACCTGGTGTACTTGATCCAAGAGCATTCGTTGGGTCACTTGTATCCTTGAAAATTGAGTAA
CTAATAAATGCTGTTGTGTAAAAAAAAGGGGCTTTCTTT"""

        self.seq = SeqRecord.SeqRecord(
            Seq.Seq(self.sequence.replace("\n", "")),
            id="TRIAE_CS42_1AL_TGACv1_000002_AA0000030.1")

        self.index = dict()
        self.index["TRIAE_CS42_1AL_TGACv1_000002_AA0000030.1"] = self.seq
def Tm_NN(seq, check=True, strict=True, c_seq=None, shift=0, nn_table=DNA_NN3,
          tmm_table=DNA_TMM1, imm_table=DNA_IMM1, de_table=DNA_DE1,
          dnac1=25, dnac2=25, selfcomp=False, Na=50, K=0, Tris=0, Mg=0,
          dNTPs=0, saltcorr=5):
    """Return the Tm using nearest neighbor thermodynamics.

    Arguments:
     - seq: The primer/probe sequence as string or Biopython sequence object.
       For RNA/DNA hybridizations seq must be the RNA sequence.
     - c_seq: Complementary sequence. The sequence of the template/target in
       3'->5' direction. c_seq is necessary for mismatch correction and
       dangling-ends correction. Both corrections will automatically be
       applied if mismatches or dangling ends are present. Default=None.
     - shift: Shift of the primer/probe sequence on the template/target
       sequence, e.g.::

                           shift=0       shift=1        shift= -1
        Primer (seq):      5' ATGC...    5'  ATGC...    5' ATGC...
        Template (c_seq):  3' TACG...    3' CTACG...    3'  ACG...

       The shift parameter is necessary to align seq and c_seq if they have
       different lengths or if they should have dangling ends. Default=0
     - table: Thermodynamic NN values, eight tables are implemented:
       For DNA/DNA hybridizations:

        - DNA_NN1: values from Breslauer et al. (1986)
        - DNA_NN2: values from Sugimoto et al. (1996)
        - DNA_NN3: values from Allawi & SantaLucia (1997) (default)
        - DNA_NN4: values from SantaLucia & Hicks (2004)

       For RNA/RNA hybridizations:

        - RNA_NN1: values from Freier et al. (1986)
        - RNA_NN2: values from Xia et al. (1998)
        - RNA_NN3: valuse from Chen et al. (2012)

       For RNA/DNA hybridizations:

        - R_DNA_NN1: values from Sugimoto et al. (1995)

       Use the module's maketable method to make a new table or to update one
       one of the implemented tables.
     - tmm_table: Thermodynamic values for terminal mismatches.
       Default: DNA_TMM1 (SantaLucia & Peyret, 2001)
     - imm_table: Thermodynamic values for internal mismatches, may include
       insosine mismatches. Default: DNA_IMM1 (Allawi & SantaLucia, 1997-1998;
       Peyret et al., 1999; Watkins & SantaLucia, 2005)
     - de_table: Thermodynamic values for dangling ends:

        - DNA_DE1: for DNA. Values from Bommarito et al. (2000). Default
        - RNA_DE1: for RNA. Values from Turner & Mathews (2010)

     - dnac1: Concentration of the higher concentrated strand [nM]. Typically
       this will be the primer (for PCR) or the probe. Default=25.
     - dnac2: Concentration of the lower concentrated strand [nM]. In PCR this
       is the template strand which concentration is typically very low and may
       be ignored (dnac2=0). In oligo/oligo hybridization experiments, dnac1
       equals dnac1. Default=25.
       MELTING and Primer3Plus use k = [Oligo(Total)]/4 by default. To mimic
       this behaviour, you have to divide [Oligo(Total)] by 2 and assign this
       concentration to dnac1 and dnac2. E.g., Total oligo concentration of
       50 nM in Primer3Plus means dnac1=25, dnac2=25.
     - selfcomp: Is the sequence self-complementary? Default=False. If 'True'
       the primer is thought binding to itself, thus dnac2 is not considered.
     - Na, K, Tris, Mg, dNTPs: See method 'Tm_GC' for details. Defaults: Na=50,
       K=0, Tris=0, Mg=0, dNTPs=0.
     - saltcorr: See method 'Tm_GC'. Default=5. 0 means no salt correction.

    """
    seq = str(seq)
    if not c_seq:
        # c_seq must be provided by user if dangling ends or mismatches should
        # be taken into account. Otherwise take perfect complement.
        c_seq = Seq.Seq(seq).complement()
    c_seq = str(c_seq)
    if check:
        seq = _check(seq, 'Tm_NN')
        c_seq = _check(c_seq, 'Tm_NN')
    tmp_seq = seq
    tmp_cseq = c_seq
    delta_h = 0
    delta_s = 0
    d_h = 0  # Names for indexes
    d_s = 1  # 0 and 1

    # Dangling ends?
    if shift or len(seq) != len(c_seq):
        # Align both sequences using the shift parameter
        if shift > 0:
            tmp_seq = '.' * shift + seq
        if shift < 0:
            tmp_cseq = '.' * abs(shift) + c_seq
        if len(tmp_cseq) > len(tmp_seq):
            tmp_seq += (len(tmp_cseq) - len(tmp_seq)) * '.'
        if len(tmp_cseq) < len(tmp_seq):
            tmp_cseq += (len(tmp_seq) - len(tmp_cseq)) * '.'
        # Remove 'over-dangling' ends
        while tmp_seq.startswith('..') or tmp_cseq.startswith('..'):
            tmp_seq = tmp_seq[1:]
            tmp_cseq = tmp_cseq[1:]
        while tmp_seq.endswith('..') or tmp_cseq.endswith('..'):
            tmp_seq = tmp_seq[:-1]
            tmp_cseq = tmp_cseq[:-1]
        # Now for the dangling ends
        if tmp_seq.startswith('.') or tmp_cseq.startswith('.'):
            left_de = tmp_seq[:2] + '/' + tmp_cseq[:2]
            try:
                delta_h += de_table[left_de][d_h]
                delta_s += de_table[left_de][d_s]
            except KeyError:
                _key_error(left_de, strict)
            tmp_seq = tmp_seq[1:]
            tmp_cseq = tmp_cseq[1:]
        if tmp_seq.endswith('.') or tmp_cseq.endswith('.'):
            right_de = tmp_cseq[-2:][::-1] + '/' + tmp_seq[-2:][::-1]
            try:
                delta_h += de_table[right_de][d_h]
                delta_s += de_table[right_de][d_s]
            except KeyError:
                _key_error(right_de, strict)
            tmp_seq = tmp_seq[:-1]
            tmp_cseq = tmp_cseq[:-1]

    # Now for terminal mismatches
    left_tmm = tmp_cseq[:2][::-1] + '/' + tmp_seq[:2][::-1]
    if left_tmm in tmm_table:
        delta_h += tmm_table[left_tmm][d_h]
        delta_s += tmm_table[left_tmm][d_s]
        tmp_seq = tmp_seq[1:]
        tmp_cseq = tmp_cseq[1:]
    right_tmm = tmp_seq[-2:] + '/' + tmp_cseq[-2:]
    if right_tmm in tmm_table:
        delta_h += tmm_table[right_tmm][d_h]
        delta_s += tmm_table[right_tmm][d_s]
        tmp_seq = tmp_seq[:-1]
        tmp_cseq = tmp_cseq[:-1]

    # Now everything 'unusual' at the ends is handled and removed and we can
    # look at the initiation.
    # One or several of the following initiation types may apply:

    # Type: General initiation value
    delta_h += nn_table['init'][d_h]
    delta_s += nn_table['init'][d_s]

    # Type: Duplex with no (allA/T) or at least one (oneG/C) GC pair
    if SeqUtils.GC(seq) == 0:
        delta_h += nn_table['init_allA/T'][d_h]
        delta_s += nn_table['init_allA/T'][d_s]
    else:
        delta_h += nn_table['init_oneG/C'][d_h]
        delta_s += nn_table['init_oneG/C'][d_s]

    # Type: Penalty if 5' end is T
    if seq.startswith('T'):
        delta_h += nn_table['init_5T/A'][d_h]
        delta_s += nn_table['init_5T/A'][d_s]
    if seq.endswith('A'):
        delta_h += nn_table['init_5T/A'][d_h]
        delta_s += nn_table['init_5T/A'][d_s]

    # Type: Different values for G/C or A/T terminal basepairs
    ends = seq[0] + seq[-1]
    AT = ends.count('A') + ends.count('T')
    GC = ends.count('G') + ends.count('C')
    delta_h += nn_table['init_A/T'][d_h] * AT
    delta_s += nn_table['init_A/T'][d_s] * AT
    delta_h += nn_table['init_G/C'][d_h] * GC
    delta_s += nn_table['init_G/C'][d_s] * GC

    # Finally, the 'zipping'
    for basenumber in range(len(tmp_seq) - 1):
        neighbors = tmp_seq[basenumber:basenumber + 2] + '/' + \
            tmp_cseq[basenumber:basenumber + 2]
        if neighbors in imm_table:
            delta_h += imm_table[neighbors][d_h]
            delta_s += imm_table[neighbors][d_s]
        elif neighbors[::-1] in imm_table:
            delta_h += imm_table[neighbors[::-1]][d_h]
            delta_s += imm_table[neighbors[::-1]][d_s]
        elif neighbors in nn_table:
            delta_h += nn_table[neighbors][d_h]
            delta_s += nn_table[neighbors][d_s]
        elif neighbors[::-1] in nn_table:
            delta_h += nn_table[neighbors[::-1]][d_h]
            delta_s += nn_table[neighbors[::-1]][d_s]
        else:
            # We haven't found the key...
            _key_error(neighbors, strict)

    k = (dnac1 - (dnac2 / 2.0)) * 1e-9
    if selfcomp:
        k = dnac1 * 1e-9
        delta_h += nn_table['sym'][d_h]
        delta_s += nn_table['sym'][d_s]
    R = 1.987  # universal gas constant in Cal/degrees C*Mol
    if saltcorr:
        corr = salt_correction(Na=Na, K=K, Tris=Tris, Mg=Mg, dNTPs=dNTPs,
                               method=saltcorr, seq=seq)
    if saltcorr == 5:
        delta_s += corr
    melting_temp = (1000 * delta_h) / (delta_s + (R * (math.log(k)))) - 273.15
    if saltcorr in (1, 2, 3, 4):
        melting_temp += corr
    if saltcorr in (6, 7):
        # Tm = 1/(1/Tm + corr)
        melting_temp = (1 / (1 / (melting_temp + 273.15) + corr) - 273.15)

    return melting_temp
Example #34
0
        if "AO=" in t[0:3]:
            ao = int(t.split(",")[0][3:])

    if (ao + ro > 0 and float(ao) / (ao + ro) >= args.minab
            and "," not in vals[3] and "," not in vals[4]):
        if vals[0] not in vcf:
            vcf[vals[0]] = []
        vcf[vals[0]].append(vals)

chroms = refFile.references

for chrom in chroms:
    chromSeq = refFile.fetch(chrom)

    if chrom not in vcf:
        rec = SeqRecord.SeqRecord(Seq.Seq(chromSeq),
                                  id=chrom,
                                  name="",
                                  description="")
        SeqIO.write(rec, outFile, "fasta")
        continue
    sys.stderr.write(chrom + "\n")
    var = vcf[chrom]
    varPos = [0] + [int(v[1]) - 1 for v in var]
    varRefLen = [0] + [len(v[3]) for v in var]
    refSeqs = [
        chromSeq[varPos[i - 1] + varRefLen[i - 1]:varPos[i]] + var[i - 1][4]
        for i in range(1, len(varPos))
    ] + [chromSeq[varPos[-1] + varRefLen[-1]:]]
    newSeq = "".join(refSeqs)
    rec = SeqRecord.SeqRecord(Seq.Seq(newSeq),
Example #35
0
import copy
import unittest
import warnings

from Bio import BiopythonWarning
from Bio import Seq
from Bio.Data.IUPACData import (
    ambiguous_dna_complement,
    ambiguous_rna_complement,
    ambiguous_dna_values,
    ambiguous_rna_values,
)
from Bio.Data.CodonTable import TranslationError, standard_dna_table

test_seqs = [
    Seq.Seq("TCAAAAGGATGCATCATG"),
    Seq.Seq("T"),
    Seq.Seq("ATGAAACTG"),
    Seq.Seq("ATGAARCTG"),
    Seq.Seq("AWGAARCKG"),  # Note no U or T
    Seq.Seq("".join(ambiguous_rna_values)),
    Seq.Seq("".join(ambiguous_dna_values)),
    Seq.Seq("AWGAARCKG"),
    Seq.Seq("AUGAAACUG"),
    Seq.Seq("ATGAAA-CTG"),
    Seq.Seq("ATGAAACTGWN"),
    Seq.Seq("AUGAAA==CUG"),
    Seq.Seq("AUGAAACUGWN"),
    Seq.Seq("AUGAAACTG"),  # U and T
    Seq.MutableSeq("ATGAAACTG"),
    Seq.MutableSeq("AUGaaaCUG"),
Example #36
0
 def test_translation_wrong_type(self):
     """Test translation table cannot be CodonTable."""
     seq = Seq.Seq("ATCGTA")
     with self.assertRaises(ValueError):
         seq.translate(table=ambiguous_dna_complement)
Example #37
0
 def test_translation_of_gapped_string_with_gap_char_given(self):
     seq = "GTG---GCCATTGTAATGGGCCGC"
     expected = "V-AIVMGR"
     self.assertEqual(expected, Seq.translate(seq, gap="-"))
     self.assertRaises(TypeError, Seq.translate, seq, gap=[])
     self.assertRaises(ValueError, Seq.translate, seq, gap="-*")
Example #38
0
 def test_append_nucleotides(self):
     self.test_chars.append(Seq.Seq("A"))
     self.assertEqual(5, len(self.test_chars))
Example #39
0
 def test_not_equal_comparsion(self):
     """Test __ne__ comparison method."""
     self.assertNotEqual(Seq.Seq("TCAAA"), Seq.Seq("TCAAAA"))
Example #40
0
 def setUp(self):
     self.s = Seq.Seq("TCAAAAGGATGCATCATG")
     self.dna = [
         Seq.Seq("ATCG"),
         Seq.Seq("gtca"),
         Seq.MutableSeq("GGTCA"),
         Seq.Seq("CTG-CA"),
     ]
     self.rna = [
         Seq.Seq("AUUUCG"),
         Seq.MutableSeq("AUUCG"),
         Seq.Seq("uCAg"),
         Seq.MutableSeq("UC-AG"),
         Seq.Seq("U.CAG"),
     ]
     self.nuc = [Seq.Seq("ATCG")]
     self.protein = [
         Seq.Seq("ATCGPK"),
         Seq.Seq("atcGPK"),
         Seq.Seq("T.CGPK"),
         Seq.Seq("T-CGPK"),
         Seq.Seq("MEDG-KRXR*"),
         Seq.MutableSeq("ME-K-DRXR*XU"),
         Seq.Seq("MEDG-KRXR@"),
         Seq.Seq("ME-KR@"),
         Seq.Seq("MEDG.KRXR@"),
     ]
     self.test_chars = ["-", Seq.Seq("-"), Seq.Seq("*"), "-X@"]
Example #41
0
 def test_concatenation_of_seq(self):
     t = Seq.Seq("T")
     u = self.s + t
     self.assertEqual(str(self.s) + "T", u)
     self.assertEqual(self.s + Seq.Seq("T"), "TCAAAAGGATGCATCATGT")
Example #42
0
    def test_translation_of_stops(self):
        self.assertEqual(Seq.translate("TAT"), "Y")
        self.assertEqual(Seq.translate("TAR"), "*")
        self.assertEqual(Seq.translate("TAN"), "X")
        self.assertEqual(Seq.translate("NNN"), "X")

        self.assertEqual(Seq.translate("TAt"), "Y")
        self.assertEqual(Seq.translate("TaR"), "*")
        self.assertEqual(Seq.translate("TaN"), "X")
        self.assertEqual(Seq.translate("nnN"), "X")

        self.assertEqual(Seq.translate("tat"), "Y")
        self.assertEqual(Seq.translate("tar"), "*")
        self.assertEqual(Seq.translate("tan"), "X")
        self.assertEqual(Seq.translate("nnn"), "X")
Example #43
0
 def test_stops(self):
     for nucleotide_seq in [self.misc_stops, Seq.Seq(self.misc_stops)]:
         self.assertEqual("***RR", Seq.translate(nucleotide_seq))
         self.assertEqual("***RR", Seq.translate(nucleotide_seq, table=1))
         self.assertEqual("***RR",
                          Seq.translate(nucleotide_seq, table="SGC0"))
         self.assertEqual("**W**", Seq.translate(nucleotide_seq, table=2))
         self.assertEqual(
             "**WRR",
             Seq.translate(nucleotide_seq, table="Yeast Mitochondrial"))
         self.assertEqual("**WSS", Seq.translate(nucleotide_seq, table=5))
         self.assertEqual("**WSS", Seq.translate(nucleotide_seq, table=9))
         self.assertEqual(
             "**CRR", Seq.translate(nucleotide_seq,
                                    table="Euplotid Nuclear"))
         self.assertEqual("***RR", Seq.translate(nucleotide_seq, table=11))
         self.assertEqual("***RR",
                          Seq.translate(nucleotide_seq, table="Bacterial"))
Example #44
0
 def test_translation_with_codon_table_as_table_argument(self):
     table = standard_dna_table
     self.assertEqual("VAIVMGR",
                      Seq.translate("GTGGCCATTGTAATGGGCCGC", table=table))
Example #45
0
 def test_translation_of_invalid_codon(self):
     for codon in ["TA?", "N-N", "AC_", "Ac_"]:
         with self.assertRaises(TranslationError):
             Seq.translate(codon)
Example #46
0
    def test_append_proteins(self):
        self.test_chars.append(Seq.Seq("K"))
        self.test_chars.append(Seq.Seq("K-"))
        self.test_chars.append(Seq.Seq("K@"))

        self.assertEqual(7, len(self.test_chars))
Example #47
0
 def test_translation_of_string(self):
     seq = "GTGGCCATTGTAATGGGCCGC"
     self.assertEqual("VAIVMGR", Seq.translate(seq))
Example #48
0
 def setUp(self):
     self.dna = [
         Seq.Seq("ATCG"),
         Seq.Seq("gtca"),
         Seq.MutableSeq("GGTCA"),
         Seq.Seq("CTG-CA"),
         "TGGTCA",
     ]
     self.rna = [
         Seq.Seq("AUUUCG"),
         Seq.MutableSeq("AUUCG"),
         Seq.Seq("uCAg"),
         Seq.MutableSeq("UC-AG"),
         Seq.Seq("U.CAG"),
         "UGCAU",
     ]
     self.nuc = [
         Seq.Seq("ATCG"),
         "UUUTTTACG",
     ]
     self.protein = [
         Seq.Seq("ATCGPK"),
         Seq.Seq("atcGPK"),
         Seq.Seq("T.CGPK"),
         Seq.Seq("T-CGPK"),
         Seq.Seq("MEDG-KRXR*"),
         Seq.MutableSeq("ME-K-DRXR*XU"),
         "TEDDF",
     ]
Example #49
0
 def test_gapped_seq_no_gap_char_given(self):
     seq = Seq.Seq("ATG---AAACTG")
     self.assertRaises(TranslationError, seq.translate, gap=None)
Example #50
0
 def setUp(self):
     sequence = b"TCAAAAGGATGCATCATG"
     self.s = Seq.Seq(sequence)
     self.mutable_s = Seq.MutableSeq(sequence)
Example #51
0
 def add_sequence(self, key, sequence):
     self.chain_sequences[key] = sq.Seq(sequence)
Example #52
0
 def setUp(self):
     self.s = Seq.Seq("TCAAAAGGATGCATCATG")
                        if min(int(r[1]), int(r[2])) < interval_start:
                            warnings += "NOUPSTREAM,"
                            interval_end = interval_start - 1
                        else:
                            # In case there are multiple called genes in the interval, and since we replaced
                            # interval_end in a previous iteration, we need to make sure we allow
                            # the replaced one to still be the lowest.
                            interval_end = min(interval_end, int(r[1]) - 1)
                            interval_end = min(interval_end, int(r[2]) - 1)
                    else:
                        if max(int(r[1]), int(r[2])) > interval_end:
                            warnings += "NOUPSTREAM,"
                            interval_start = interval_end + 1
                        else:
                            interval_start = max(interval_start, int(r[1]) + 1)
                            interval_start = max(interval_start, int(r[2]) + 1)
        # Get the actual sequence from what is left.
        # BUT: Note that the start and stop locations start at 1, while the
        # array indexes start at 0!
        startidx = interval_start - 1
        stopidx = interval_end - 1
        seq = contigseq[startidx:stopidx + 1]
        if seq.lower().count("n") > options.gapwarn:
            warnings += "CONTAINSGAP"
        # We need to do the reverse complement if we are on the "-" strand...
        if strand == "-":
            seq = str(Seq.Seq(seq).reverse_complement())
        print("%s\t%s\t%s" % (gene, warnings, seq))

con.close()
Example #54
0
 def test_translation_with_bad_table_argument(self):
     table = {}
     with self.assertRaises(ValueError):
         Seq.translate("GTGGCCATTGTAATGGGCCGC", table=table)
Example #55
0
 def test_translation_of_leucine(self):
     for codon in [
             "WTA", "MTY", "MTT", "MTW", "MTM", "MTH", "MTA", "MTC", "HTA"
     ]:
         self.assertEqual("J", Seq.translate(codon))
Example #56
0
 def test_translation_of_asparagine(self):
     for codon in ["RAY", "RAT", "RAC"]:
         self.assertEqual("B", Seq.translate(codon))
Example #57
0
    def setUp(self):
        """
        Starting operations
        """

        seq1 = """CCGAAGAAGAACAAATTCCTTGCTGAATCATGGCGAAGTTGAAGCTCTACTCTTACTGGA
GAAGCTCATGTGCTCATCGCGTCCGTATCGCCCTCACTTTAAAAGGGCTTGATTATGAAT
ATATACCGGTTAATTTGCTCAAAGGGGATCAATCCGATTCAGATTTCAAGAAGATCAATC
CAATGGGCACTGTACCAGCGCTTGTTGATGGTGATGTTGTGATTAATGACTCTTTCGCAA
TAATAATGTACCTGGATGATAAGTATCCGGAGCCACCGCTGTTACCAAGTGACTACCATA
AACGGGCGGTAAATTACCAGGCGACGAGTATTGTCATGTCTGGTATACAGCCTCATCAAA
ATATGGCTCTTTTTGTGAGAAGATGAGATTAATAGGTATCTCGAGGACAAGATAAATGCT
GAGGAGAAAACTGCTTGGATTACTAATGCTATCACAAAAGGATTCACAGGTTTATAACGA
CCTGTCTGATAATGTCTCATATGTCCTTCAGCTCTCGAGAAACTGTTGGTGAGTTGCGCT
GGAAAATACGCGACTGGTGATGAAGTTTACTTGGCTGATCTTTTCCTAGCACCACAGATC
CACGCAGCATTCAACAGATTCCATATTAACATGGAACCATTCCCGACTCTTGCAAGGTTT
TACGAGTCATACAACGAACTGCCTGCATTTCAAAATGCAGTCCCGGAGAAGCAACCAGAT
ACTCCTTCCACCATCTGATTCTGTGAACCGTAAGCTTCTCTCAGTCTCAGCTCAATAAAA
TCTC"""
        self.seq1 = SeqRecord.SeqRecord(Seq.Seq(seq1.replace("\n", "")),
                                        id="CLASS_2.159")

        seq2 = """ACAAAACAAAGTAATCGCGAAAACACACAACAATCGCTGGACTCTGCTACTGCGAAGAAC
AACAAATTCCTTGTTTATCATGGCGAATTCCGGCGAAGAGAAGTTGAAGCTCTACTCTTA
CTGGAGAAGCTCGTGTGCTCATCGTGTCCGTATCGCCCTCGCTTTGAAAGGGCTTGATTA
TGAGTATATACCAGTGAATTTGCTCAAGGGTGATCAATTCGATTCAGTTTATCGTTTTGA
TCTTCAAGATTTCAAGAAGATCAATCCAATGGGAACTGTACCAGCTCTGGTGGATGGAGA
TGTTGTGATTAATGATTCTTTTGCGATAATAATGTATCTGGATGAGAAGTACCCTGAGCC
ACCTTTGTTACCTCGTGACCTCCATAAACGAGCTGTGAATTACCAGGCAATGAGTATTGT
CTTGTCTGGCATACAGCCTCATCAAAATCTGGCTGTTATTAGGTATATCGAGGAAAAGAT
AAATGTGGAGGAGAAGACTGCCTGGGTTAATAATGCTATCACAAAAGGATTTACAGCTCT
CGAGAAACTGTTGGTGAATTGCGCTGGGAAACATGCGACTGGTGATGAAATTTACCTGGC
TGATCTCTTTCTAGCACCACAGATCCACGGAGCAATCAACAGATTCCAGATTAACATGGA
ACCGTACCCAACTCTTGCAAAATGTTACGAATCATACAACGAACTGCCTGCGTTTCAAAA
TGCACTACCGGAAAAGCAGCCAGATGCTCCTTCTTCCACCATCTGATTCTGTGAACCCAT
AAGCTACTCTCACTTTAATAAAACCTCAG"""

        self.seq2 = SeqRecord.SeqRecord(Seq.Seq(seq2.replace("\n", "")),
                                        id="CLASS_2.160")

        seq3 = """GATGCCCTTAGTTTCTCTACTTGTATCATACAATAAAGGTCACAGATTTTGAAATTTGCAAAGATATATC
ATACATTCTCAGAGGAAGCCTTTGTCTCTAAGACTCTGGACCGTCTCCTTAACCGCATCTTCAACCGCAG
TAAAAACCAGCCCGAGCTCAATCAATCGCTTAGCCGCATCATTACACGACGTAAGCCCAGGTTGAGTCTC
TTTATCAAACTTGTGAACAGCAAACTCAGGGAAGAGCTTAGACACAAGAGCAGCAAACTCACTGAACTGA
TAAATCCCATTAGTGCATAAAAACCGTCCAGAAGCGTCAGGTGTTTCGAATAGCATCACATGACCTTTAG
CCACATCTTTCACGTGAACCACACCGAGCCAGTGATGCTCTTGCGTCTCGGTCGAGCCTTGTAAAAGCTG
TAGCAGAACAGCACAGCTTGCGTTTAGGTTCGGTTGCAGAAGCGGTCCGAGACATGTTGATGGATGAATC
GTCACAATGTTGGTTCCATGCTTCTCCGAAAATTCCCAAGCTGCTTTCTCAGCTAATGTCTTCGAAATTG
GATACCATTTCTAAAATCAAAATTATATACCAATCGTTACAAATATCATATAAATCATCACTAACCATTT
TAAACCGAAATTTAACGAACCTGCCTCGACTTGCAAAAATCAAGATCGGACCACGACGACTCATCGACGG
GAACTTTTTCCGGCCAATTAGGGTTAGGAACCAATGCGGAGATAGATGACGTGATCACCACGCGTCTCAC
ATTAAACCTCTTAGCAGCTTCCAACACATTGATCGTTCCCTTAACCGCCGGTTCGACCAGCTCCTTCTCT
GGATCTACCGGTGGATCCAACGTACAAGGTGACGCCACGTGGAACACTCCCGCACATCCATCAATAGCTC
TGGAGATTGCATCAGAGTCTAAGAGATCCGCTTCAAAGATCTTGATCTTAGAATCGGATCCGGGTAGTTG
CAGAAGATGAGTCGGGTCGGATCCTGGGTAAATCGAAGCGTGGATTTTAGTATATCCTTTCTCAATTAAC
GTTCGGATTATCCAAGATCCGATGAAACCATTAGCTCCGGTTACACACACTGTCTCTTTCGCCATTGTTG
ATCAATAAGCGCTCACTGAGAATTTTTTTGTTCTCTCTCTCTATCGCAATTTATCTCAGAAGATAAGAAA
AAAAAAACATCTTTCCAGTAAAAAAGGATCCTTTGTTTTTTTCTTACACGTAAAAAATGGATTTTTTTTT
CTCTCTTAAAGATATAATGCGTTGATACAAAAGCGTAACGTTGACATGATATTATCCACTAGTTTTATAG
ACTTTTCAAAAAAAGGAGAGAATTTTCAATTCTTCAGTAGTCAAATAGATGAAGACCGCCGGAGCGCCGC
CGCAGAGAGGTGGTTCCTCTTCCTCCTCCGCCGTATACTTTAACTGGTCTTCATCATCTTGTTCTTACGA
TAGCTGTAGAGTTTTGGTGGTGAAGATGGGAGGAAAAAGCAAGAAGCCTCATCAATCTTCTTCTTTTAAG
GAGTCAGAGCCAGAACCACCGAGAATCAAATCCAATGTTAAGCATAACTTGCAGCTTCTCAAGTTATGGA
AGGAGTTTCAGAGCAGAGGATCTGGCATGGCTAAGCCAGCGACTAGTTACAGGAAGAAGAAAGTAGAGAA
AGACGAGTTACCGGATGATAGCGAGCTCTACCGGGATCCTACAAATACGCTTTACTACACGAACCAAGGT
CTATTGGATGACGCAGTTCCGGTTTTGCTTGTTGATGGTTATAATGTGTGTGGATATTGGATGAAGTTAA
AGAAACATTTCATGAAAGGAAGGCTTGACGTTGCTCGGCAGAAGTTAGTTGATGAACTTGTGTCCTTCAG
TATGGTTAAAGAGGTTAAGGTAGTGGTTGTGTTTGATGCTCTCATGTCTGGTCTTCCTACTCACAAGGAA
GACTTTGCAGGTGTTGATGTGATTTTCTCAGGAGAAACTTGTGCTGACGCTTGGATTGAAAAGGAGGTGG
TTGCATTGAGAGAAGATGGATGCCCCAAGGTTTGGGTTGTAACATCTGATGTCTGTCAACAACAAGCAGC
ACATGGAGCGGTATTGGGGCATCATATCGATGTTATAAACTCGTTATGTTCATATCTTGTTTTTGATTTT
GGTGACTGATTCTTGACAGGGAGCTTATATTTGGAGTAGCAAGGCATTGGTTTCTGAGATTAAATCGATG
CATAAGGAGGTTGAGAAAATGATGCAAGAAACAAGGTCAACATCTTTCCAAGGGAGATTGCTTAAACACA
ATCTTGATTCTGAAGTCGTTGATGCTCTTAAAGATCTTAGAGACAAATTATCAGAAAACGAAACAAAGAG
ATGACAAAAAGACCAATCCGGATTATATAAACAATTAACAAGGCTTGGTCTCTCCATGTAACTTCTGTCC
CAAGTAAGTAAGCTAATCTGACTTGTAAAAAACAGAGGCTGCAGAGGAAACGAGGGAGATAGAGAGAGAG
AGAGCTCAAATGCTTTGTTATTGTTGTATTTGTGTCTGAATTCTTTTTGACTAATCTATATATAGATTCG
TTTTCTTTGGTCCAAACATATGGTTAAAAGATAGTTCTGAATTTTTCTTTTAGCTTCATGCATAAGAATC
ATCTTAACCTAATAACCTATGTTTATTATTTTACAATAATGTAAAAATGTAAATTTTTAGTTGAATAATG
AACCAAATTTTTATGTAAAAAAACTTGGATGTTTATTTTCAAACACAAACATCAGTAACACTTGAAGCAG
TAGAGAGAATTGGAGGCAGAGCAAGTCTACAAATTTGCAGATAGTTCCAGGGTTTGAGCTGTTTGTTCTG
GTCAGTCTCCAATCAATCAAAGCATATGGTTTATCGAGAATGGATAGAGATTCAAGAGAAGATTGAAGAA
CTGAGTTTGCAAAGGCTTATCAATGCCTTCGACTTCGAGTTGAGATTGAAGAAAAGGTAAAGAAATAGCA
AGTGATCTTTTGAAAATAGATCTCATATATTAATGACTTTCCATGTCTGTATTTGCTGAAGTTGATCTGA
ATTTGCATATTGTTCATGTCAATGGATTGTCTGCTGTTACTAAATTTAACTTTGTGTCAGCACTCTTTAC
GTTTTGAATTGTCGAACCATTCACTTGTTCAGTTATTATTTGGTCTATCCATCCTTATATGTTGTTCTCT
GTTTAGATAAGGACAAAGAATAGACACCAGAGGAACTGAACCAAACAGCTGAGGCAGTTGGATATGGTGC
GGTGAAGTAAGTATACGTATCATCTCTATTCTACTGGTCACATGTCATGAGCAGGGAAATTACAGCCGTT
TATCAGAAAGTCTGGCAAAGACATAGATGAGCTGAAACAGACGGTTGAGGAAGCTTACACCAACTTGTTA
CCGAGCGTACTGTGCGAGTACCTCTACAGATTATCTGAACACTACACGGACTAGCGTACCATGAAATTTG
TGGATTGGCCTCTGCAGCTTTGTTTGAAATTCACTATAGCTTAGATGGCGAATTGGATTTAGACATGGAC
TTCCGGATTGTATGTTGTCTTTGAGTCTCAAGGGATTGATTAATGTGATGATATTTATACACCATAGCTG
AAATGAAATTTGTACTTAAAACTGATGGATAATTAATAACAGA"""

        self.seq3 = SeqRecord.SeqRecord(Seq.Seq(seq3.replace("\n", "")),
                                        id="PRJEB7093_DN.7194.1")

        seq4 = """GATGCCCTTAGTTTCTCTACTTGTATCATACAATAAAGGTCACAGATTTTGAAATTTGCA
AAGATATATCATACATTCTCAGAGGAAGCCTTTGTCTCTAAGACTCTGGACCGTCTCCTT
AACCGCATCTTCAACCGCAGTAAAAACCAGCCCGAGCTCAATCAATCGCTTAGCCGCATC
ATTACACGACGTAAGCCCAGGTTGAGTCTCTTTATCAAACTTGTGAACAGCAAACTCAGG
GAAGAGCTTAGACACAAGAGCAGCAAACTCACTGAACTGATAAATCCCATTAGTGCATAA
AAACCGTCCAGAAGCGTCAGGTGTTTCGAATAGCATCACATGACCTTTAGCCACATCTTT
CACGTGAACCACACCGAGCCAGTGATGCTCTTGCGTCTCGGTCGAGCCTTGTAAAAGCTG
TAGCAGAACAGCACAGCTTGCGTTTAGGTTCGGTTGCAGAAGCGGTCCGAGACATGTTGA
TGGATGAATCGTCACAATGTTGGTTCCATGCTTCTCCGAAAATTCCCAAGCTGCTTTCTC
AGCTAATGTCTTCGAAATTGGATACCATTTCTAAAATCAAAATTATATACCAATCGTTAC
AAATATCATATAAATCATCACTAACCATTTTAAACCGAAATTTAACGAACCTGCCTCGAC
TTGCAAAAATCAAGATCGGACCACGACGACTCATCGACGGGAACTTTTTCCGGCCAATTA
GGGTTAGGAACCAATGCGGAGATAGATGACGTGATCACCACGCGTCTCACATTAAACCTC
TTAGCAGCTTCCAACACATTGATCGTTCCCTTAACCGCCGGTTCGACCAGCTCCTTCTCT
GGATCTACCGGTGGATCCAACGTACAAGGTGACGCCACGTGGAACACTCCCGCACATCCA
TCAATAGCTCTGGAGATTGCATCAGAGTCTAAGAGATCCGCTTCAAAGATCTTGATCTTA
GAATCGGATCCGGGTAGTTGCAGAAGATGAGTCGGGTCGGATCCTGGGTAAATCGAAGCG
TGGATTTTAGTATATCCTTTCTCAATTAACGTTCGGATTATCCAAGATCCGATGAAACCA
TTAGCTCCGGTTACACACACTGTCTCTTTCGCCATTGTTGATCAATAAGCGCTCACTGAG
AATTTTTTTGTTCTCTCTCTCTATCGCAATTTATCTCAGAAGATAAGAAAAAAAAAACAT
CTTTCCAGTAAAAAAGGATCCTTTGTTTTTTTCTTACACGTAAAAAATGGATTTTTTTTT
CTCTCTTAAAGATATAATGCGTTGATACAAAAGCGTAACGTTGACATGATATTATCCACT
AGTTTTATAGACTTTTCAAAAAAAGGAGAGAATTTTCAATTCTTCAGTAGTCAAATAGAT
GAAGACCGCCGGAGCGCCGCCGCAGAGAGGTGGTTCCTCTTCCTCCTCCGCCGTATACTT
TAACTGGTCTTCATCATCTTGTTCTTACGATAGCTGTAGAGTTTTGGTGGTGAAGATGGG
AGGAAAAAGCAAGAAGCCTCATCAATCTTCTTCTTTTAAGGAGTCAGAGCCAGAACCACC
GAGAATCAAATCCAATGTTAAGCATAACTTGCAGCTTCTCAAGTTATGGAAGGAGTTTCA
GAGCAGAGGATCTGGCATGGCTAAGCCAGCGACTAGTTACAGGAAGAAGAAAGTAGAGAA
AGACGAGTTACCGGATGATAGCGAGCTCTACCGGGATCCTACAAATACGCTTTACTACAC
GAACCAAGGTCTATTGGATGACGCAGTTCCGGTTTTGCTTGTTGATGGTTATAATGTGTG
TGGATATTGGATGAAGTTAAAGAAACATTTCATGAAAGGAAGGCTTGACGTTGCTCGGCA
GAAGTTAGTTGATGAACTTGTGTCCTTCAGTATGGTTAAAGAGGTTAAGGTAGTGGTTGT
GTTTGATGCTCTCATGTCTGGTCTTCCTACTCACAAGGAAGACTTTGCAGGTGTTGATGT
GATTTTCTCAGGAGAAACTTGTGCTGACGCTTGGATTGAAAAGGAGGTGGTTGCATTGAG
AGAAGATGGATGCCCCAAGGTTTGGGTTGTAACATCTGATGTCTGTCAACAACAAGCAGC
ACATGGAGCGGGAGCTTATATTTGGAGTAGCAAGGCATTGGTTTCTGAGATTAAATCGAT
GCATAAGGAGGTTGAGAAAATGATGCAAGAAACAAGGTCAACATCTTTCCAAGGGAGATT
GCTTAAACACAATCTTGATTCTGAAGTCGTTGATGCTCTTAAAGATCTTAGAGACAAATT
ATCAGAAAACGAAACAAAGAGATGACAAAAAGACCAATCCGGATTATATAAACAATTAAC
AAGGCTTGGTCTCTCCATGTAACTTCTGTCCCAAGTAAGTAAGCTAATCTGACTTGTAAA
AAACAGAGGCTGCAGAGGAAACGAGGGAGATAGAGAGAGAGAGAGCTCAAATGCTTTGTT
ATTGTTGTATTTGTGTCTGAATTCTTTTTGACTAATCTATATATAGATTCGTTTTCTTTG
GTCCAAACATATGGTTAAAAGATAGTTCTGAATTTTTCTTTTAGCTTCATGCATAAGAAT
CATCTTAACCTAATAACCTATGTTTATTATTTTACAATAATGTAAAAATGTAAATTTTTA
GTTGAATAATGAACCAAATTTTTATGTAAAAAAACTTGGATGTTTATTTTCAAACACAAA
CATCAGTAACACTTGAAGCAGTAGAGAGAATTGGAGGCAGAGCAAGTCTACAAATTTGCA
GATAGTTCCAGGGTTTGAGCTGTTTGTTCTGGTCAGTCTCCAATCAATCAAAGCATATGG
TTTATCGAGAATGGATAGAGATTCAAGAGAAGATTGAAGAACTGAGTTTGCAAAGGCTTA
TCAATGCCTTCGACTTCGAGTTGAGATTGAAGAAAAGGTAAAGAAATAGCAAGTGATCTT
TTGAAAATAGATCTCATATATTAATGACTTTCCATGTCTGTATTTGCTGAAGTTGATCTG
AATTTGCATATTGTTCATGTCAATGGATTGTCTGCTGTTACTAAATTTAACTTTGTGTCA
GCACTCTTTACGTTTTGAATTGTCGAACCATTCACTTGTTCAGTTATTATTTGGTCTATC
CATCCTTATATGTTGTTCTCTGTTTAGATAAGGACAAAGAATAGACACCAGAGGAACTGA
ACCAAACAGCTGAGGCAGTTGGATATGGTGCGGTGAAGTAAGTATACGTATCATCTCTAT
TCTACTGGTCACATGTCATGAGCAGGGAAATTACAGCCGTTTATCAGAAAGTCTGGCAAA
GACATAGATGAGCTGAAACAGACGGTTGAGGAAGCTTACACCAACTTGTTACCGAGCGTA
CTGTGCGAGTACCTCTACAGATTATCTGAACACTACACGGACTAGCGTACCATGAAATTT
GTGGATTGGCCTCTGCAGCTTTGTTTGAAATTCACTATAGCTTAGATGGCGAATTGGATT
TAGACATGGACTTCCGGATTGTATGTTGTCTTTGAGTCTCAAGGGATTGATTAATGTGAT
GATATTTATACACCATAGCTGAAATGAAATTTGTACTTAAAACTGATGGATAATTAATAA
CAGA"""

        self.seq4 = SeqRecord.SeqRecord(Seq.Seq(seq4.replace("\n", "")),
                                        id="PRJEB7093_DN.7194.2")

        self.index = dict()
        self.index[self.seq1.id] = self.seq1
        self.index[self.seq2.id] = self.seq2
        self.index[self.seq3.id] = self.seq3
        self.index[self.seq4.id] = self.seq4

        self.bed1 = "\t".join(
            """CLASS_2.159    0    784    ID=CLASS_2.159|m.24650  0    +    29    386    0    1    784    0"""
            .split())
        self.bed2 = "\t".join(
            "CLASS_2.160    0    809    ID=CLASS_2.160|m.34763 0    +    1    766    0    1    809    0"
            .split())
        self.bed3 = "\t".join(
            "PRJEB7093_DN.7194.1  0 3683 ID=PRJEB7093_DN.7194.1|m.16659 0  -  641    1115  0  1    3683    0"
            .split())
        self.bed4 = "\t".join(
            "PRJEB7093_DN.7194.2  0  3604 ID=PRJEB7093_DN.7194.2|m.16657 0 - 641    1115  0    1    3604    0"
            .split())
Example #58
0
 def test_translation_of_glutamine(self):
     for codon in ["SAR", "SAG", "SAA"]:
         self.assertEqual("Z", Seq.translate(codon))
Example #59
0
 def test_translation_extra_stop_codon(self):
     seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAGTAG"
     with self.assertRaises(TranslationError):
         Seq.translate(seq, table=2, cds=True)
Example #60
0
 def test_translation_incomplete_codon(self):
     with self.assertWarns(BiopythonWarning):
         Seq.translate("GTGGCCATTGTAATGGGCCG")