Esempio n. 1
0
 def test_assemble_seq(self):
     """should correctly fill in a sequence with N's"""
     expect = DNA.makeSequence("NAAAAANNCCCCCNNGGGNNN")
     frags = ["AAAAA","CCCCC","GGG"]
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 31, positions), expect)
     positions = [(1, 6), (8, 13), (15, 18)]
     self.assertEqual(_assemble_seq(frags, 0, 21, positions), expect)
     # should work with:
     # start matches first frag start
     expect = DNA.makeSequence("AAAAANNCCCCCNNGGGNNN")
     positions = [(0, 5), (7, 12), (14, 17)]
     self.assertEqual(_assemble_seq(frags, 0, 20, positions), expect)
     # end matches last frag_end
     expect = DNA.makeSequence("NAAAAANNCCCCCNNGGG")
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 28, positions), expect)
     # both start and end matched
     expect = DNA.makeSequence("AAAAANNCCCCCNNGGG")
     positions = [(10, 15), (17, 22), (24, 27)]
     self.assertEqual(_assemble_seq(frags, 10, 27, positions), expect)
     # one frag
     expect = DNA.makeSequence(''.join(frags))
     positions = [(10, 23)]
     self.assertEqual(_assemble_seq([''.join(frags)],10,23,positions),
                             expect)
Esempio n. 2
0
 def test_gaps_at_both_ends(self):
     s = 'aaaccggttt'
     s1 = DNA.makeSequence(s[:-2], Name="A")
     s2 = DNA.makeSequence(s[2:], Name="B")
     for a in self._aligned_both_ways(s1, s2, local=False):
         self.assertEqual(matchedColumns(a), 6)
         self.assertEqual(len(a), 10)
Esempio n. 3
0
 def test_gaps_at_both_ends(self):
     s = 'aaaccggttt'
     s1 = DNA.makeSequence(s[:-2], Name="A")
     s2 = DNA.makeSequence(s[2:], Name="B")
     for a in self._aligned_both_ways(s1, s2, local=False):
         self.assertEqual(matchedColumns(a), 6)
         self.assertEqual(len(a), 10)
Esempio n. 4
0
 def test_assemble_seq(self):
     """should correctly fill in a sequence with N's"""
     expect = DNA.makeSequence("NAAAAANNCCCCCNNGGGNNN")
     frags = ["AAAAA", "CCCCC", "GGG"]
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 31, positions), expect)
     positions = [(1, 6), (8, 13), (15, 18)]
     self.assertEqual(_assemble_seq(frags, 0, 21, positions), expect)
     # should work with:
     # start matches first frag start
     expect = DNA.makeSequence("AAAAANNCCCCCNNGGGNNN")
     positions = [(0, 5), (7, 12), (14, 17)]
     self.assertEqual(_assemble_seq(frags, 0, 20, positions), expect)
     # end matches last frag_end
     expect = DNA.makeSequence("NAAAAANNCCCCCNNGGG")
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 28, positions), expect)
     # both start and end matched
     expect = DNA.makeSequence("AAAAANNCCCCCNNGGG")
     positions = [(10, 15), (17, 22), (24, 27)]
     self.assertEqual(_assemble_seq(frags, 10, 27, positions), expect)
     # one frag
     expect = DNA.makeSequence(''.join(frags))
     positions = [(10, 23)]
     self.assertEqual(_assemble_seq([''.join(frags)], 10, 23, positions),
                      expect)
Esempio n. 5
0
 def test_local_tiebreak(self):
     """Should pick the first best-equal hit rather than the last one"""
     # so that the Pyrex and Python versions give the same result.
     score_matrix = make_dna_scoring_dict(match=1, transition=-1, 
             transversion=-1)
     pattern = DNA.makeSequence('cwc', Name='pattern')
     two_hit = DNA.makeSequence( 'cactc', Name= 'target')
     aln = local_pairwise(pattern, two_hit, score_matrix, 5, 2)
     hit = aln.NamedSeqs['target']
     self.assertEqual(str(hit).lower(), 'cac')
Esempio n. 6
0
 def setUp(self):
     self.cigar_text = '3D2M3D6MDM2D3MD'
     self.aln_seq = DNA.makeSequence('---AA---GCTTAG-A--CCT-')
     self.aln_seq1 = DNA.makeSequence('CCAAAAAA---TAGT-GGC--G')
     self.map, self.seq = self.aln_seq.parseOutGaps()
     self.map1, self.seq1 = self.aln_seq1.parseOutGaps()
     self.slices = [(1, 4), (0, 8), (7, 12), (0, 1), (3, 5)]
     self.aln = LoadSeqs(data = {"FAKE01": self.aln_seq, "FAKE02": self.aln_seq1})
     self.cigars = {"FAKE01": self.cigar_text, "FAKE02": map_to_cigar(self.map1)}
     self.seqs = {"FAKE01": str(self.seq), "FAKE02": str(self.seq1)}
Esempio n. 7
0
 def setUp(self):
     self.cigar_text = '3D2M3D6MDM2D3MD'
     self.aln_seq = DNA.makeSequence('---AA---GCTTAG-A--CCT-')
     self.aln_seq1 = DNA.makeSequence('CCAAAAAA---TAGT-GGC--G')
     self.map, self.seq = self.aln_seq.parseOutGaps()
     self.map1, self.seq1 = self.aln_seq1.parseOutGaps()
     self.slices = [(1, 4), (0, 8), (7, 12), (0, 1), (3, 5)]
     self.aln = LoadSeqs(data = {"FAKE01": self.aln_seq, "FAKE02": self.aln_seq1})
     self.cigars = {"FAKE01": self.cigar_text, "FAKE02": map_to_cigar(self.map1)}
     self.seqs = {"FAKE01": str(self.seq), "FAKE02": str(self.seq1)}
Esempio n. 8
0
 def test_local_tiebreak(self):
     """Should pick the first best-equal hit rather than the last one"""
     # so that the Pyrex and Python versions give the same result.
     score_matrix = make_dna_scoring_dict(match=1,
                                          transition=-1,
                                          transversion=-1)
     pattern = DNA.makeSequence('cwc', Name='pattern')
     two_hit = DNA.makeSequence('cactc', Name='target')
     aln = local_pairwise(pattern, two_hit, score_matrix, 5, 2)
     hit = aln.NamedSeqs['target']
     self.assertEqual(str(hit).lower(), 'cac')
Esempio n. 9
0
 def _make_utr_seq(self):
     if self.UntranslatedExons5 is None and self.UntranslatedExons3 is None:
         self._cached["Utr5"] = self.NULL_VALUE
         self._cached["Utr3"] = self.NULL_VALUE
         return
     Utr5_seq, Utr3_seq = DNA.makeSequence(""), DNA.makeSequence("")
     for exon in self.UntranslatedExons5:
         Utr5_seq += exon.Seq
     for exon in self.UntranslatedExons3:
         Utr3_seq += exon.Seq
     self._cached["Utr5"] = Utr5_seq
     self._cached["Utr3"] = Utr3_seq
Esempio n. 10
0
def process_uclust_pw_alignment_results(fasta_pairs_lines, uc_lines):
    """ Process results of uclust search and align """
    alignments = get_next_two_fasta_records(fasta_pairs_lines)
    for hit in get_next_record_type(uc_lines, 'H'):
        matching_strand = hit[4]
        if matching_strand == '-':
            strand_id = '-'
            target_rev_match = True
        elif matching_strand == '+':
            strand_id = '+'
            target_rev_match = False
        elif matching_strand == '.':
            # protein sequence, so no strand information
            strand_id = ''
            target_rev_match = False
        else:
            raise UclustParseError, "Unknown strand type: %s" % matching_strand
        uc_query_id = hit[8]
        uc_target_id = hit[9]
        percent_id = float(hit[3])

        fasta_pair = alignments.next()

        fasta_query_id = fasta_pair[0][0]
        aligned_query = fasta_pair[0][1]

        if fasta_query_id != uc_query_id:
            raise UclustParseError,\
             "Order of fasta and uc files do not match."+\
             " Got query %s but expected %s." %\
              (fasta_query_id, uc_query_id)

        fasta_target_id = fasta_pair[1][0]
        aligned_target = fasta_pair[1][1]

        if fasta_target_id != uc_target_id + strand_id:
            raise UclustParseError, \
             "Order of fasta and uc files do not match."+\
             " Got target %s but expected %s." %\
              (fasta_target_id, uc_target_id + strand_id)

        if target_rev_match:
            query_id = uc_query_id + ' RC'
            aligned_query = DNA.rc(aligned_query)
            target_id = uc_target_id
            aligned_target = DNA.rc(aligned_target)
        else:
            query_id = uc_query_id
            aligned_query = aligned_query
            target_id = uc_target_id
            aligned_target = aligned_target

        yield (query_id, target_id, aligned_query, aligned_target, percent_id)
Esempio n. 11
0
 def test_picklability(self):
     """Pickle an alignment containing an annotated sequence"""
     # This depends on alignments, sequences, features, maps and spans
     # Doesn't test round trip result is correct, which should possibly
     # be done for maps/spans, but seqs/alignments are just simple
     # python classes without __getstate__ etc.
     import cPickle as pickle
     seq1 = DNA.makeSequence("aagaagaagaccccca")
     seq2 = DNA.makeSequence("aagaagaagaccccct")
     seq2.addFeature('exon', 'fred', [(10,15)])
     aln = LoadSeqs(data={'a':seq1, 'b':seq2})
     aln2 = pickle.loads(pickle.dumps(aln))
Esempio n. 12
0
def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.makeSequence('AAAATGCTTA' * r)
    seq1 = DNA.makeSequence('AATTTTGCTG' * r)

    t0 = time.time()
    aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, **kw)
    t = time.time() - t0
    return (len(seq1) * len(seq2)) / t

    print t
Esempio n. 13
0
def process_uclust_pw_alignment_results(fasta_pairs_lines,uc_lines):
    """ Process results of uclust search and align """
    alignments = get_next_two_fasta_records(fasta_pairs_lines)
    for hit in get_next_record_type(uc_lines,'H'):
        matching_strand = hit[4]
        if matching_strand == '-':
            strand_id = '-'
            target_rev_match = True
        elif matching_strand == '+':
            strand_id = '+'
            target_rev_match = False
        elif matching_strand == '.':
            # protein sequence, so no strand information
            strand_id = ''
            target_rev_match = False
        else:
            raise UclustParseError, "Unknown strand type: %s" % matching_strand
        uc_query_id = hit[8]
        uc_target_id = hit[9]
        percent_id = float(hit[3])
        
        fasta_pair = alignments.next()
        
        fasta_query_id = fasta_pair[0][0]
        aligned_query = fasta_pair[0][1]
        
        if fasta_query_id != uc_query_id:
            raise UclustParseError,\
             "Order of fasta and uc files do not match."+\
             " Got query %s but expected %s." %\
              (fasta_query_id, uc_query_id)
            
        fasta_target_id = fasta_pair[1][0]
        aligned_target = fasta_pair[1][1]
            
        if fasta_target_id != uc_target_id + strand_id:
            raise UclustParseError, \
             "Order of fasta and uc files do not match."+\
             " Got target %s but expected %s." %\
              (fasta_target_id, uc_target_id + strand_id)
            
        if target_rev_match:
            query_id = uc_query_id + ' RC'
            aligned_query = DNA.rc(aligned_query)
            target_id = uc_target_id
            aligned_target = DNA.rc(aligned_target)
        else:
            query_id = uc_query_id
            aligned_query = aligned_query
            target_id = uc_target_id
            aligned_target = aligned_target
            
        yield (query_id, target_id, aligned_query, aligned_target, percent_id)
Esempio n. 14
0
def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.makeSequence("AAAATGCTTA" * r)
    seq1 = DNA.makeSequence("AATTTTGCTG" * r)

    t0 = time.time()
    aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, **kw)
    t = time.time() - t0
    return (len(seq1) * len(seq2)) / t

    print t
Esempio n. 15
0
 def test_picklability(self):
     """Pickle an alignment containing an annotated sequence"""
     # This depends on alignments, sequences, features, maps and spans
     # Doesn't test round trip result is correct, which should possibly
     # be done for maps/spans, but seqs/alignments are just simple
     # python classes without __getstate__ etc.
     import pickle as pickle
     seq1 = DNA.makeSequence("aagaagaagaccccca")
     seq2 = DNA.makeSequence("aagaagaagaccccct")
     seq2.addFeature('exon', 'fred', [(10, 15)])
     aln = LoadSeqs(data={'a': seq1, 'b': seq2})
     aln2 = pickle.loads(pickle.dumps(aln))
Esempio n. 16
0
 def test_codon(self):
     s1 = DNA.makeSequence('tacgccgta', Name="A")
     s2 = DNA.makeSequence('tacgta', Name="B")
     codon_model = cogent.evolve.substitution_model.Codon(
                              model_gaps=False, equal_motif_probs=True,
                              mprob_model='conditional')
     tree = cogent.LoadTree(tip_names=['A', 'B'])
     lf = codon_model.makeLikelihoodFunction(tree, aligned=False)
     lf.setSequences(dict(A=s1, B=s2))
     a = lf.getLogLikelihood().edge.getViterbiPath().getAlignment()
     self.assertEqual(matchedColumns(a), 6)
     self.assertEqual(len(a), 9)
Esempio n. 17
0
 def test_codon(self):
     s1 = DNA.makeSequence('tacgccgta', Name="A")
     s2 = DNA.makeSequence('tacgta', Name="B")
     codon_model = cogent.evolve.substitution_model.Codon(
         model_gaps=False,
         equal_motif_probs=True,
         mprob_model='conditional')
     tree = cogent.LoadTree(tip_names=['A', 'B'])
     lf = codon_model.makeLikelihoodFunction(tree, aligned=False)
     lf.setSequences(dict(A=s1, B=s2))
     (score, a) = lf.getLogLikelihood().edge.getViterbiScoreAndAlignment()
     self.assertEqual(matchedColumns(a), 6)
     self.assertEqual(len(a), 9)
Esempio n. 18
0
def test(r=1, **kw):   
    S = make_dna_scoring_dict(10, -1, -8)
    
    seq2 = DNA.makeSequence('AAAATGCTTA' * r)
    seq1 = DNA.makeSequence('AATTTTGCTG' * r)
    
    t0 = time.clock()
    try:
        # return_alignment is False in order to emphasise the quadratic part of the work.
        aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, return_alignment=False, **kw)
    except ArithmeticError:
        return '*'
    else:
        t = time.clock() - t0
        return int ( (len(seq1)*len(seq2))/t/1000 )
Esempio n. 19
0
def process_barcode_single_end_data(read1_data,
                                    output_bc_fastq,
                                    output_fastq1,
                                    bc1_len=6,
                                    rev_comp_bc1=False):
    """ Processes, writes single-end barcode data, parsed sequence
    
    read1_data: list of header, read, quality scores
    output_bc_fastq: open output fastq filepath
    output_fastq1: open output fastq reads filepath
    bc1_len: length of barcode to remove from beginning of data
    rev_comp_bc1: reverse complement barcode before writing.
    """

    header_index = 0
    sequence_index = 1
    quality_index = 2

    bc_read = read1_data[sequence_index][:bc1_len]
    bc_qual = read1_data[quality_index][:bc1_len]
    if rev_comp_bc1:
        bc_read = DNA.rc(bc_read)
        bc_qual = bc_qual[::-1]

    bc_lines = format_fastq_record(read1_data[header_index], bc_read, bc_qual)
    output_bc_fastq.write(bc_lines)
    seq_lines = format_fastq_record(read1_data[header_index],
                                    read1_data[sequence_index][bc1_len:],
                                    read1_data[quality_index][bc1_len:])
    output_fastq1.write(seq_lines)

    return
Esempio n. 20
0
def makeSampleSequence():
    seq = 'tgccnwsrygagcgtgttaaacaatggccaactctctaccttcctatgttaaacaagtgagatcgcaggcgcgccaaggc'
    seq = DNA.makeSequence(seq)
    v = seq.addAnnotation(annotation.Feature, 'exon', 'exon', [(20,35)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'repeat_unit', [(39,49)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'rep2', [(49,60)])
    return seq
Esempio n. 21
0
 def test_inherit_feature(self):
     """should be able to subclass and extend _Feature"""
     class NewFeat(_Feature):
         def __init__(self, *args, **kwargs):
             super(NewFeat, self).__init__(*args, **kwargs)
         
         def newMethod(self):
             if len(self.map.spans) > 1:
                 as_one = self.asOneSpan() # should create new instance of NewFeat
                 return as_one.newMethod()
             return True
         
     
     seq = DNA.makeSequence('ACGTACGTACGT')
     f = seq.addAnnotation(NewFeat, as_map([(1,3), (5,7)], len(seq)),
                             type='gene', Name='abcd')
     self.assertEqual(type(f.asOneSpan()), NewFeat)
     self.assertEqual(type(f.getShadow()), NewFeat)
     f2 = seq.addAnnotation(NewFeat, as_map([(3,5)], len(seq)),
                             type='gene', Name='def')
     
     self.assertEqual(type(seq.getRegionCoveringAll([f, f2],
                                             feature_class=NewFeat)),
                     NewFeat)
     # now use the new method
     f.newMethod()
Esempio n. 22
0
def parse_illumina_line(l,barcode_length,rev_comp_barcode,
                        barcode_in_sequence=False):
    """Parses a single line of Illumina data
    """
    fields = l.strip().split(':')
    
    y_position_subfields = fields[4].split('#')
    y_position = int(y_position_subfields[0])
    sequence = fields[5]
    qual_string = fields[6]
    
    if barcode_in_sequence:
        barcode = sequence[:barcode_length]
        sequence = sequence[barcode_length:]
        qual_string = qual_string[barcode_length:]
    else:
        barcode = y_position_subfields[1][:barcode_length]
    
    if rev_comp_barcode:
        barcode = DNA.rc(barcode)
    
    result = {\
     'Full description':':'.join(fields[:5]),\
     'Machine Name':fields[0],\
     'Channel Number':int(fields[1]),\
     'Tile Number':int(fields[2]),\
     'X Position':int(fields[3]),\
     'Y Position':y_position,\
     'Barcode':barcode,\
     'Full Y Position Field':fields[4],\
     'Sequence':sequence,\
     'Quality Score':qual_string}
     
    return result
Esempio n. 23
0
def process_barcode_single_end_data(read1_data, output_bc_fastq, output_fastq1, bc1_len=6, rev_comp_bc1=False):
    """ Processes, writes single-end barcode data, parsed sequence
    
    read1_data: list of header, read, quality scores
    output_bc_fastq: open output fastq filepath
    output_fastq1: open output fastq reads filepath
    bc1_len: length of barcode to remove from beginning of data
    rev_comp_bc1: reverse complement barcode before writing.
    """

    header_index = 0
    sequence_index = 1
    quality_index = 2

    bc_read = read1_data[sequence_index][:bc1_len]
    bc_qual = read1_data[quality_index][:bc1_len]
    if rev_comp_bc1:
        bc_read = DNA.rc(bc_read)
        bc_qual = bc_qual[::-1]

    bc_lines = format_fastq_record(read1_data[header_index], bc_read, bc_qual)
    output_bc_fastq.write(bc_lines)
    seq_lines = format_fastq_record(
        read1_data[header_index], read1_data[sequence_index][bc1_len:], read1_data[quality_index][bc1_len:]
    )
    output_fastq1.write(seq_lines)

    return
def parse_illumina_single_end_read_file(read_file,barcode_length,\
    max_bad_run_length,quality_threshold,min_per_read_length,
    rev_comp,rev_comp_barcode,barcode_in_seq,barcode_max_N=0,seq_max_N=0):
    """Parses Illumina single-end read file
    """
    
    for read_line in read_file:
        read = parse_illumina_line(read_line,barcode_length,
                                   rev_comp_barcode,barcode_in_seq)
        
        read_desc = illumina_read_description_from_read_data(read)
        
        read_barcode = read['Barcode']
        
        if read_barcode.count('N') > barcode_max_N:
           continue
        
        seq, qual = read_qual_score_filter(\
         read['Sequence'], read['Quality Score'],\
         max_bad_run_length, quality_threshold)
         
        if (len(seq) < min_per_read_length) or (seq.count('N') > seq_max_N):
            continue
            
        if rev_comp:
            seq = DNA.rc(seq)
            qual = qual[::-1]
        
        yield read_desc, read_barcode, seq, qual
Esempio n. 25
0
def CigarParser(seqs, cigars, sliced = False, ref_seqname = None, start = None, end = None, moltype=DNA):
    """return an alignment from raw sequences and cigar strings
    if sliced, will return an alignment correspondent to ref sequence start to end
    
    Arguments:
        seqs - raw sequences as {seqname: seq}
        cigars - corresponding cigar text as {seqname: cigar_text}
        cigars and seqs should have the same seqnames
        MolType - optional default to DNA
    """
    data = {}
    if not sliced:
        for seqname in seqs.keys():
            aligned_seq = aligned_from_cigar(cigars[seqname], 
                                            seqs[seqname], moltype=moltype)
            data[seqname] = aligned_seq
    else:
        ref_aln_seq = aligned_from_cigar(cigars[ref_seqname], 
                                        seqs[ref_seqname], moltype=moltype)
        m, aln_loc = slice_cigar(cigars[ref_seqname], start, end, by_align = False)
        data[ref_seqname] = ref_aln_seq[aln_loc[0]:aln_loc[1]]
        for seqname in [seqname for seqname in seqs.keys() if seqname != ref_seqname]:
            m, seq_loc = slice_cigar(cigars[seqname], aln_loc[0], aln_loc[1])
            if seq_loc:
                seq = seqs[seqname]
                if isinstance(seq, str):
                    seq = moltype.makeSequence(seq)
                data[seqname] = seq[seq_loc[0]:seq_loc[1]].gappedByMap(m)
            else:
                data[seqname] = DNA.makeSequence('-'*(aln_loc[1] - aln_loc[0]))
    aln = LoadSeqs(data = data, aligned = True)
    return aln
Esempio n. 26
0
def parse_illumina_line(l, barcode_length, rev_comp_barcode,
                        barcode_in_sequence=False):
    """Parses a single line of Illumina data
    """
    fields = l.strip().split(':')

    y_position_subfields = fields[4].split('#')
    y_position = int(y_position_subfields[0])
    sequence = fields[5]
    qual_string = fields[6]

    if barcode_in_sequence:
        barcode = sequence[:barcode_length]
        sequence = sequence[barcode_length:]
        qual_string = qual_string[barcode_length:]
    else:
        barcode = y_position_subfields[1][:barcode_length]

    if rev_comp_barcode:
        barcode = DNA.rc(barcode)

    result = {
        'Full description': ':'.join(fields[:5]),
        'Machine Name': fields[0],
        'Channel Number': int(fields[1]),
        'Tile Number': int(fields[2]),
        'X Position': int(fields[3]),
        'Y Position': y_position,
        'Barcode': barcode,
        'Full Y Position Field': fields[4],
        'Sequence': sequence,
        'Quality Score': qual_string}

    return result
def findBestSeq(seqobject):
    dna_seq = str(seqobject.seq)
    my_seq = DNA.makeSequence(dna_seq,seqobject.id)
#    x=0
#    framedict = dict()
#    while x  < 3:
#        temp1 = my_seq[x:]
#        temp2 = temp1..withoutTerminalStopCodon()
#        framedict[x] = temp2.getTranslation()
#        x+=1
    
    all_six = standard_code.sixframes(my_seq)
    seqlist = list()
    for frame in all_six:
        seqreturned = frame.split('*')[0]
        seqlist.append(seqreturned)
    longestseq = ''
    x=0
    while x < 3:
        if len(longestseq) < len(seqlist[x]):
            longestseq = seqlist[x]
            correctdnaseq = my_seq[x:]
        x+=1
    #longest_seq = max(seqlist, key=len)
    return longestseq, correctdnaseq 
Esempio n. 28
0
    def test_inherit_feature(self):
        """should be able to subclass and extend _Feature"""
        class NewFeat(_Feature):
            def __init__(self, *args, **kwargs):
                super(NewFeat, self).__init__(*args, **kwargs)

            def newMethod(self):
                if len(self.map.spans) > 1:
                    as_one = self.asOneSpan(
                    )  # should create new instance of NewFeat
                    return as_one.newMethod()
                return True

        seq = DNA.makeSequence('ACGTACGTACGT')
        f = seq.addAnnotation(NewFeat,
                              as_map([(1, 3), (5, 7)], len(seq)),
                              type='gene',
                              Name='abcd')
        self.assertEqual(type(f.asOneSpan()), NewFeat)
        self.assertEqual(type(f.getShadow()), NewFeat)
        f2 = seq.addAnnotation(NewFeat,
                               as_map([(3, 5)], len(seq)),
                               type='gene',
                               Name='def')

        self.assertEqual(
            type(seq.getRegionCoveringAll([f, f2], feature_class=NewFeat)),
            NewFeat)
        # now use the new method
        f.newMethod()
Esempio n. 29
0
 def _get_flanking_seq_data(self):
     # maps to flanking_sequence through variation_feature_id
     # if this fails, we grab from genomic sequence
     variation_id = self._table_rows['variation_feature']['variation_id']
     flanking_seq_table = self.flanking_sequence_table
     query = sql.select([flanking_seq_table],
                 flanking_seq_table.c.variation_id == variation_id)
     record = asserted_one(query.execute())
     self._table_rows['flanking_sequence'] = record
     up_seq = record['up_seq']
     down_seq = record['down_seq']
     # the following two lines are because -- wait for it -- someone has
     # entered the string 'NULL' instead of NULL in the MySQL tables!!!
     up_seq = [up_seq, None][up_seq == 'NULL']
     down_seq = [down_seq, None][down_seq == 'NULL']
     seqs = dict(up=up_seq, down=down_seq)
     for name, seq in seqs.items():
         if seq is not None:
             seq = DNA.makeSequence(seq)
         else:
             resized = [(-301, -1), (1, 301)][name == 'down']
             if self.Location.Strand == -1:
                 resized = [(1, 301), (-301, -1)][name == 'down']
             flank = self.Location.resized(*resized)
             flanking = self.genome.getRegion(region=flank)
             seq = flanking.Seq
         seqs[name] = seq
     
     self._cached[('FlankingSeq')] = (seqs['up'][-300:],seqs['down'][:300])
    def _generate_unambiguous_sequences(self):
        unambiguous_conserved_sequences = dict()
        rev_unambiguous_conserved_sequences = dict()
        for pos,seq in self._CONSERVED_SEQUENCES.items():
            dnaseq = DNA.makeSequence(seq)
            ret = self._disambiguate(dnaseq)
            if isinstance(ret, list):
                for dnaseq_r in ret:
                    self.conserved_sequences[str(dnaseq_r)] = ConservedSequence(dnaseq_r, pos)
            else:
                self.conserved_sequences[str(ret)] = ConservedSequence(ret, pos)

        for seq,con_seq in self.conserved_sequences.items():
            rc_seq = DNA.makeSequence(seq)
            rc_seq.rc()
            self.conserved_sequences[str(rc_seq)] = ConservedSequence(rc_seq, con_seq.pos, 
                    rc=True)
Esempio n. 31
0
def rc_fasta_lines(fasta_lines, seq_desc_mapper=append_rc):
    """
    """
    for seq_id, seq in parse_fasta(fasta_lines):
        seq_id = seq_desc_mapper(seq_id)
        seq = DNA.rc(seq.upper())
        yield seq_id, seq
    return
Esempio n. 32
0
 def test_stop_indexes(self):
     """should return stop codon indexes for a specified frame"""
     sgc = GeneticCode(self.SGC)
     seq = DNA.makeSequence('ATGCTAACATAAA')
     expected = [[9], [4], []]
     for frame, expect in enumerate(expected):
         got = sgc.getStopIndices(seq, start=frame)
         self.assertEqual(got, expect)
Esempio n. 33
0
def makeSampleSequence():
    seq = DNA.makeSequence('aaaccggttt' * 10)
    v = seq.addAnnotation(annotation.Feature, 'exon', 'exon', [(20, 35)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'repeat_unit',
                          [(39, 49)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'rep2',
                          [(49, 60)])
    return seq
Esempio n. 34
0
 def test_stop_indexes(self):
     """should return stop codon indexes for a specified frame"""
     sgc = GeneticCode(self.SGC)
     seq = DNA.makeSequence("ATGCTAACATAAA")
     expected = [[9], [4], []]
     for frame, expect in enumerate(expected):
         got = sgc.getStopIndices(seq, start=frame)
         self.assertEqual(got, expect)
Esempio n. 35
0
def adjust_alignment(template, candidate, new_gaps):
    """adjust template/candidate aln to remove gaps added by pairwise alignment
    
        This step adjusts the alignment to reduce the length back to the 
         template alignment length by introducing local misalignments to
         remove gap characters that are present in the pairwise alignment
         but not in the template alignment.
    
    """
    template_l = list(template)
    candidate_l = list(candidate)
    new_gaps.reverse()
    for pos in new_gaps:
        del template_l[pos]
        del candidate_l[nearest_gap(candidate_l, pos)]

    return (DNA.makeSequence(''.join(template_l)), \
            DNA.makeSequence(''.join(candidate_l)))
Esempio n. 36
0
def adjust_alignment(template,candidate,new_gaps):
    """adjust template/candidate aln to remove gaps added by pairwise alignment
    
        This step adjusts the alignment to reduce the length back to the 
         template alignment length by introducing local misalignments to
         remove gap characters that are present in the pairwise alignment
         but not in the template alignment.
    
    """
    template_l = list(template)
    candidate_l = list(candidate)
    new_gaps.reverse()
    for pos in new_gaps:
        del template_l[pos]
        del candidate_l[nearest_gap(candidate_l,pos)]
        
    return (DNA.makeSequence(''.join(template_l)), \
            DNA.makeSequence(''.join(candidate_l)))
Esempio n. 37
0
def makeSampleSequence():
    seq = 'tgccnwsrygagcgtgttaaacaatggccaactctctaccttcctatgttaaacaagtgagatcgcaggcgcgccaaggc'
    seq = DNA.makeSequence(seq)
    v = seq.addAnnotation(annotation.Feature, 'exon', 'exon', [(20, 35)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'repeat_unit',
                          [(39, 49)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'rep2',
                          [(49, 60)])
    return seq
Esempio n. 38
0
def makeSampleSequence(with_gaps=False):
    raw_seq = 'AACCCAAAATTTTTTGGGGGGGGGGCCCC'
    cds = (15, 25)
    utr = (12, 15)
    if with_gaps:
        raw_seq = raw_seq[:5] + '-----' +raw_seq[10:-2] + '--'
    seq = DNA.makeSequence(raw_seq)
    seq.addAnnotation(Feature, 'CDS', 'CDS', [cds])
    seq.addAnnotation(Feature, "5'UTR", "5' UTR", [utr])
    return seq
Esempio n. 39
0
def makeSampleSequence(with_gaps=False):
    raw_seq = 'AACCCAAAATTTTTTGGGGGGGGGGCCCC'
    cds = (15, 25)
    utr = (12, 15)
    if with_gaps:
        raw_seq = raw_seq[:5] + '-----' + raw_seq[10:-2] + '--'
    seq = DNA.makeSequence(raw_seq)
    seq.addAnnotation(Feature, 'CDS', 'CDS', [cds])
    seq.addAnnotation(Feature, "5'UTR", "5' UTR", [utr])
    return seq
Esempio n. 40
0
    def test_maps_on_maps(self):
        seq = DNA.makeSequence("ATCGATCGAT" * 5, Name="base")
        feat1 = annotate(seq, 10, 20, "fake")
        feat2 = annotate(feat1, 3, 5, "fake2")
        feat3 = annotate(seq, 1, 3, "left")

        seq2 = seq[5:]
        self.assertEqual(
            structure(seq), ("seq", 50, [("fake", "[10:20]/50", [("fake2", "[3:5]/10")]), ("left", "[1:3]/50")])
        )
        self.assertEqual(structure(seq2), ("seq", 45, [("fake", "[5:15]/45", [("fake2", "[3:5]/10")])]))
Esempio n. 41
0
def introduce_terminal_gaps(template, aligned_template, aligned_candidate):
    """ introduce terminal gaps from template into the aligned candidate seq
    """

    # count the 5' gaps in the original aligned template
    original_five_prime_gaps = 0
    for c in template:
        if c == '-':
            original_five_prime_gaps += 1
        else:
            break

    # count the 5' gaps already existing in the pairwise aligned template
    # (because we don't need to add these)
    aligned_template_five_prime_gaps = 0
    for c in aligned_template:
        if c == '-':
            aligned_template_five_prime_gaps += 1
        else:
            break

    # compute the number of 5' gaps that need to be added to get to the
    # original alignment length
    five_prime_gaps_to_add = \
     original_five_prime_gaps - aligned_template_five_prime_gaps

    # count the 3' gaps in the original aligned template
    original_three_prime_gaps = 0
    for c in reversed(template):
        if c == '-':
            original_three_prime_gaps += 1
        else:
            break

    # count the 3' gaps already existing in the pairwise aligned template
    # (because we don't need to add these)
    aligned_template_three_prime_gaps = 0
    for c in reversed(aligned_template):
        if c == '-':
            aligned_template_three_prime_gaps += 1
        else:
            break

    # compute the number of 3' gaps that need to be added to get to the
    # original alignment length
    three_prime_gaps_to_add = \
     original_three_prime_gaps - aligned_template_three_prime_gaps

    # return the sequence with the 5' and 3' gaps added
    return DNA.makeSequence(''.join([\
     '-'*five_prime_gaps_to_add,\
     str(aligned_candidate),\
     '-'*three_prime_gaps_to_add]),\
     Name=aligned_candidate.Name)
Esempio n. 42
0
def introduce_terminal_gaps(template,aligned_template,aligned_candidate):
    """ introduce terminal gaps from template into the aligned candidate seq
    """
    
    # count the 5' gaps in the original aligned template
    original_five_prime_gaps = 0
    for c in template:
        if c == '-':
            original_five_prime_gaps +=1
        else:
            break
            
    # count the 5' gaps already existing in the pairwise aligned template
    # (because we don't need to add these)
    aligned_template_five_prime_gaps = 0
    for c in aligned_template:
        if c == '-':
            aligned_template_five_prime_gaps += 1
        else:
            break
            
    # compute the number of 5' gaps that need to be added to get to the
    # original alignment length
    five_prime_gaps_to_add = \
     original_five_prime_gaps - aligned_template_five_prime_gaps
            
    # count the 3' gaps in the original aligned template
    original_three_prime_gaps = 0
    for c in reversed(template):
        if c == '-':
            original_three_prime_gaps +=1
        else:
            break
            
    # count the 3' gaps already existing in the pairwise aligned template
    # (because we don't need to add these)
    aligned_template_three_prime_gaps = 0
    for c in reversed(aligned_template):
        if c == '-':
            aligned_template_three_prime_gaps += 1
        else:
            break
            
    # compute the number of 3' gaps that need to be added to get to the
    # original alignment length
    three_prime_gaps_to_add = \
     original_three_prime_gaps - aligned_template_three_prime_gaps

    # return the sequence with the 5' and 3' gaps added
    return DNA.makeSequence(''.join([\
     '-'*five_prime_gaps_to_add,\
     str(aligned_candidate),\
     '-'*three_prime_gaps_to_add]),\
     Name=aligned_candidate.Name)
Esempio n. 43
0
def get_reverse_primers(id_map):
    """ Return a dictionary with barcodes and rev-complement of rev primers """
    
    rev_primers = {}
    for n in id_map.items():
        # Generate a dictionary with Barcode:reverse primer
        # Convert to reverse complement of the primer so its in the 
        # proper orientation with the input fasta sequences
        rev_primers[n[1]['BarcodeSequence']]=DNA.rc(n[1]['ReversePrimer'])
        
    return rev_primers
Esempio n. 44
0
def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.makeSequence('AAAATGCTTA' * r)
    seq1 = DNA.makeSequence('AATTTTGCTG' * r)

    t0 = time.clock()
    try:
        # return_alignment is False in order to emphasise the quadratic part of the work.
        aln = classic_align_pairwise(seq1,
                                     seq2,
                                     S,
                                     10,
                                     2,
                                     local=False,
                                     return_alignment=False,
                                     **kw)
    except ArithmeticError:
        return '*'
    else:
        t = time.clock() - t0
        return int((len(seq1) * len(seq2)) / t / 1000)
Esempio n. 45
0
 def _get_sequence(self):
     if 'Seq' not in self._cached:
         try:
             seq = get_sequence(self.Location)
         except NoItemError:
             try:
                 alt_loc = assembly_exception_coordinate(self.Location)
                 seq = get_sequence(alt_loc)
             except NoItemError:
                 seq = DNA.makeSequence("N"*len(self))
         seq.Name = str(self.Location)
         self._cached['Seq'] = seq
     return self._cached['Seq']
Esempio n. 46
0
    def test_maps_on_maps(self):
        seq = DNA.makeSequence('ATCGATCGAT' * 5, Name='base')
        feat1 = annotate(seq, 10, 20, 'fake')
        feat2 = annotate(feat1, 3, 5, 'fake2')
        feat3 = annotate(seq, 1, 3, 'left')

        seq2 = seq[5:]
        self.assertEqual(
            structure(seq),
            ('seq', 50, [('fake', '[10:20]/50', [('fake2', '[3:5]/10')]),
                         ('left', '[1:3]/50')]))
        self.assertEqual(
            structure(seq2),
            ('seq', 45, [('fake', '[5:15]/45', [('fake2', '[3:5]/10')])]))
Esempio n. 47
0
def makeSampleSequence(mid_gaps=False):
    raw_seq = 'AACCCAAAATTTTTTGGGGGGGGGGCCCC'
    cds = (15, 25)
    utr = (12, 15)
    if mid_gaps:
        rev_seq = raw_seq[:5] + '-----' +raw_seq[10:]
        raw_seq = rev_seq
        # annotations only make sense when they're on the raw sequence
        cds = (10, 20)
        utr = (5, 8)
    seq = DNA.makeSequence(raw_seq)
    seq.addAnnotation(Feature, 'CDS', 'CDS', [cds])
    seq.addAnnotation(Feature, "5'UTR", "5' UTR", [utr])
    return seq
Esempio n. 48
0
 def _make_aln(self, orig, model=dna_model, param_vals=None, 
         indel_rate=0.1, indel_length=0.5, **kw):
     kw['indel_rate'] = indel_rate
     kw['indel_length'] = indel_length
     seqs = dict((key, DNA.makeSequence(value)) 
             for (key, value) in orig.items())
     if len(seqs) == 2:
         tree = cogent.LoadTree(tip_names=seqs.keys())
         tree = cogent.LoadTree(treestring="(A:.1,B:.1)")
     else:
         tree = cogent.LoadTree(treestring="(((A:.1,B:.1):.1,C:.1):.1,D:.1)")
     aln, tree = cogent.align.progressive.TreeAlign(model, seqs,
             tree=tree, param_vals=param_vals, show_progress=False, **kw)
     return aln
Esempio n. 49
0
def remove_template_terminal_gaps(candidate,template):
    """Remove template terminal gaps and corresponding bases in candidate 
    """
    if len(template) != len(candidate):
        raise ValueError, \
         "Sequences must be aligned, but their "+\
         "lengths aren't equal. %d != %d" % (len(candidate),len(template))
         
    if len(template) == 0:
        return candidate, template
    
    degapped_candidate_len = len(candidate.degap())
    
    candidate = DNA.makeSequence(candidate)
    template = DNA.makeSequence(template)
    
    template_gap_vector = template.gapVector()
    first_non_gap = template_gap_vector.index(False)
    num_three_prime_gaps = template_gap_vector[::-1].index(False)
    last_non_gap = len(template_gap_vector) - num_three_prime_gaps
    
    # Construct the candidate name, which will include the range of bases
    # from the original sequence
    candidate = candidate[first_non_gap:last_non_gap]
    template = template[first_non_gap:last_non_gap]
    candidate_start_pos = first_non_gap + 1
    candidate_end_pos = degapped_candidate_len - num_three_prime_gaps
    candidate_name = candidate.Name
    if candidate_name.endswith('RC'):
        name_delimiter = ':'
    else:
        name_delimiter = ' '
    candidate_name = '%s%s%d..%d' %\
     (candidate_name,name_delimiter,candidate_start_pos,candidate_end_pos)
    
    return DNA.makeSequence(candidate,Name=candidate_name), template
Esempio n. 50
0
def remove_template_terminal_gaps(candidate, template):
    """Remove template terminal gaps and corresponding bases in candidate 
    """
    if len(template) != len(candidate):
        raise ValueError, \
         "Sequences must be aligned, but their "+\
         "lengths aren't equal. %d != %d" % (len(candidate),len(template))

    if len(template) == 0:
        return candidate, template

    degapped_candidate_len = len(candidate.degap())

    candidate = DNA.makeSequence(candidate)
    template = DNA.makeSequence(template)

    template_gap_vector = template.gapVector()
    first_non_gap = template_gap_vector.index(False)
    num_three_prime_gaps = template_gap_vector[::-1].index(False)
    last_non_gap = len(template_gap_vector) - num_three_prime_gaps

    # Construct the candidate name, which will include the range of bases
    # from the original sequence
    candidate = candidate[first_non_gap:last_non_gap]
    template = template[first_non_gap:last_non_gap]
    candidate_start_pos = first_non_gap + 1
    candidate_end_pos = degapped_candidate_len - num_three_prime_gaps
    candidate_name = candidate.Name
    if candidate_name.endswith('RC'):
        name_delimiter = ':'
    else:
        name_delimiter = ' '
    candidate_name = '%s%s%d..%d' %\
     (candidate_name,name_delimiter,candidate_start_pos,candidate_end_pos)

    return DNA.makeSequence(candidate, Name=candidate_name), template
Esempio n. 51
0
def _assemble_seq(frags, start, end, frag_positions):
    """returns a single string in which missing sequence is replaced by 'N'"""
    prev_end = start
    assert len(frag_positions) == len(frags), "Mismatched number of "\
                                                    "fragments and positions"
    assembled = []
    for index, (frag_start, frag_end) in enumerate(frag_positions):
        diff = frag_start - prev_end
        assert diff >= 0, 'fragment position start < previous end: %s, %s' %\
                                                (frag_start, prev_end)
        assembled += ['N' * diff, frags[index]]
        prev_end = frag_end
    diff = end - frag_end
    assert diff >= 0, 'end[%s] < previous frag_end[%s]' % (end, frag_end)
    assembled += ['N' * diff]
    return DNA.makeSequence(''.join(assembled))
Esempio n. 52
0
    def test_simulateAlignment_root_sequence(self):
        """provide a root sequence for simulating an alignment"""
        def use_root_seq(root_sequence):
            al = LoadSeqs(data={'a': 'ggaatt', 'c': 'cctaat'})
            t = LoadTree(treestring="(a,c);")
            sm = substitution_model.Dinucleotide(mprob_model='tuple')
            lf = sm.makeParamController(t)
            lf.setAlignment(al)
            simalign = lf.simulateAlignment(exclude_internal=False,
                                            root_sequence=root_sequence)
            root = simalign.NamedSeqs['root']
            self.assertEqual(str(root), str(root_sequence))

        root_sequence = DNA.makeSequence('GTAATT')
        use_root_seq(root_sequence)  # as a sequence instance
        use_root_seq('GTAATC')  # as a string
Esempio n. 53
0
 def test_maps_on_maps(self):
     seq = DNA.makeSequence('ATCGATCGAT' * 5, Name='base')
     feat1 = annotate(seq, 10, 20, 'fake')
     feat2 = annotate(feat1, 3, 5, 'fake2')
     feat3 = annotate(seq, 1, 3, 'left')
     
     seq2 = seq[5:]
     self.assertEqual(structure(seq), ('seq', 50,
         [('fake', '[10:20]/50',
             [('fake2', '[3:5]/10')]),
         ('left', '[1:3]/50')])
         )
     self.assertEqual(structure(seq2), ('seq', 45,
         [('fake', '[5:15]/45',
             [('fake2', '[3:5]/10')])])
         )
Esempio n. 54
0
def CigarParser(seqs,
                cigars,
                sliced=False,
                ref_seqname=None,
                start=None,
                end=None,
                moltype=DNA):
    """return an alignment from raw sequences and cigar strings
    if sliced, will return an alignment correspondent to ref sequence start to end
    
    Arguments:
        seqs - raw sequences as {seqname: seq}
        cigars - corresponding cigar text as {seqname: cigar_text}
        cigars and seqs should have the same seqnames
        MolType - optional default to DNA
    """
    data = {}
    if not sliced:
        for seqname in list(seqs.keys()):
            aligned_seq = aligned_from_cigar(cigars[seqname],
                                             seqs[seqname],
                                             moltype=moltype)
            data[seqname] = aligned_seq
    else:
        ref_aln_seq = aligned_from_cigar(cigars[ref_seqname],
                                         seqs[ref_seqname],
                                         moltype=moltype)
        m, aln_loc = slice_cigar(cigars[ref_seqname],
                                 start,
                                 end,
                                 by_align=False)
        data[ref_seqname] = ref_aln_seq[aln_loc[0]:aln_loc[1]]
        for seqname in [
                seqname for seqname in list(seqs.keys())
                if seqname != ref_seqname
        ]:
            m, seq_loc = slice_cigar(cigars[seqname], aln_loc[0], aln_loc[1])
            if seq_loc:
                seq = seqs[seqname]
                if isinstance(seq, str):
                    seq = moltype.makeSequence(seq)
                data[seqname] = seq[seq_loc[0]:seq_loc[1]].gappedByMap(m)
            else:
                data[seqname] = DNA.makeSequence('-' *
                                                 (aln_loc[1] - aln_loc[0]))
    aln = LoadSeqs(data=data, aligned=True)
    return aln
Esempio n. 55
0
def get_rev_primer_seqs(mapping_fp):
    """ Parses mapping file to get dictionary of SampleID:Rev primer
    mapping_fp:  mapping filepath
    """
    hds, mapping_data, run_description, errors, warnings = \
        process_id_map(mapping_fp, has_barcodes=False,
         disable_primer_check=True)
        
    if errors:
        for curr_err in errors:
            if curr_err.startswith("Duplicate SampleID"):
                raise ValueError,('Errors were found with mapping file, '+\
                 'please run check_id_map.py to identify problems.')
         
    # create dict of dicts with SampleID:{each header:mapping data}
    
    id_map = {}
    
    for curr_data in mapping_data:
        id_map[curr_data[0]] = {}
        
    
    for header in range(len(hds)):
        for curr_data in mapping_data:
            id_map[curr_data[0]][hds[header]] = curr_data[header]
    
    reverse_primers = {}
    
    for curr_id in id_map.keys():
        try:
            reverse_primers[curr_id] =\
             [DNA.rc(curr_rev_primer) for curr_rev_primer in\
             id_map[curr_id]['ReversePrimer'].split(',')]
        except KeyError:
            raise KeyError,("Reverse primer not found in mapping file, "+\
             "please include a 'ReversePrimer' column.")

             
    # Check for valid reverse primers
    # Will have been detected as warnings from mapping file
    for curr_err in errors:
        if curr_err.startswith("Invalid DNA sequence detected"):
            raise ValueError,("Problems found with reverse primers, please "+\
             "check mapping file with check_id_map.py")
    
    return reverse_primers
Esempio n. 56
0
    def test_other_repeat(self):
        """should apply repeat feature data in a manner consistent with strand"""
        coord = dict(CoordName=13, Start=32890200, End=32890500)
        ps_repeat = self.human.getRegion(Strand=1, **coord)
        ms_repeat = self.human.getRegion(Strand=-1, **coord)
        exp = DNA.makeSequence('CTTACTGTGAGGATGGGAACATTTTACAGCTGTGCTG'\
          'TCCAAACCGGTGCCACTAGCCACATTAAGCACTCGAAACGTGGCTAGTGCGACTAGAGAAGAGGA'\
          'TTTTCATACGATTTAGTTTCAATCACGCTAACCAGTGACGCGTGGCTAGTGG')

        self.assertEquals(ms_repeat.Seq, ps_repeat.Seq.rc())

        ps_annot_seq = ps_repeat.getAnnotatedSeq(feature_types='repeat')
        ms_annot_seq = ms_repeat.getAnnotatedSeq(feature_types='repeat')
        ps_seq = ps_annot_seq.getAnnotationsMatching('repeat')[0]
        ms_seq = ms_annot_seq.getAnnotationsMatching('repeat')[0]
        self.assertEquals(ms_seq.getSlice(), ps_seq.getSlice())
        self.assertEquals(ps_seq.getSlice(), exp)
Esempio n. 57
0
 def test_getByAnnotation(self):
     seq = DNA.makeSequence('ATCGATCGAT' * 5, Name='base')
     seq.addAnnotation(Feature, 'test_type', 'test_label', [(5,10)])
     seq.addAnnotation(Feature, 'test_type', 'test_label2', [(15,18)])
     
     answer = list(seq.getByAnnotation('test_type'))
     self.assertEqual( len(answer), 2)
     self.assertEqual( str(answer[0]), 'TCGAT')
     self.assertEqual( str(answer[1]), 'TCG')
     
     answer = list(seq.getByAnnotation('test_type', 'test_label'))
     self.assertEqual( len(answer), 1)
     self.assertEqual( str(answer[0]), 'TCGAT')
     
     # test ignoring of a partial annotation
     sliced_seq = seq[:17]
     answer = list(sliced_seq.getByAnnotation('test_type', ignore_partial=True))
     self.assertEqual(len(answer), 1)
     self.assertEqual( str(answer[0]), 'TCGAT')
Esempio n. 58
0
    def test_other_repeat(self):
        """should apply repeat feature data in a manner consistent with strand"""
        coord = dict(CoordName=13, Start=32316063, End=32316363)
        # 13:32316063 -32316363
        ps_repeat = self.human.getRegion(Strand=1, **coord)
        ms_repeat = self.human.getRegion(Strand=-1, **coord)
        # note this MER3 repeat is annotated on the -1 strand
        exp = DNA.makeSequence('AGCTTACTGTGAGGATGGGAACATTTTACAGCTGTGCTGTCCAAA'\
                'CCGGTGCCACTAGCCACATTAAGCACTCGAAACGTGGCTAGTGCGACTAGAGAAGAGGAT'\
                'TTTCATACGATTTAGTTTCAATCACGCTAACCAGTGACGCGTGGCTAGTGG')

        self.assertEqual(ms_repeat.Seq, ps_repeat.Seq.rc())

        ps_annot_seq = ps_repeat.getAnnotatedSeq(feature_types='repeat')
        ms_annot_seq = ms_repeat.getAnnotatedSeq(feature_types='repeat')
        ps_seq = ps_annot_seq.getAnnotationsMatching('repeat')[0]
        ms_seq = ms_annot_seq.getAnnotationsMatching('repeat')[0]
        self.assertEqual(ms_seq.getSlice(), ps_seq.getSlice())
        self.assertEqual(ps_seq.getSlice(), exp)