def test_cap_enzyme_filter(): 'It test the cap enzyme filter' seq = 'ATGATGATG' + 'gaaattc' + 'ATGATGATGTGGGAT' alleles = {('AA', INVARIANT):{}, ('A', DELETION) :{}} snv = SeqFeature(type='snv', location=FeatureLocation(10, 11), qualifiers={'alleles':alleles}) seq = SeqWithQuality(seq=Seq(seq), name='ref', features=[snv]) all_enzymes = True filter_ = create_cap_enzyme_filter(all_enzymes) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [False]): result = snv.qualifiers['filters']['cap_enzymes'][all_enzymes] assert result == expected #No cap seq = 'ATGATGATG' + 'ATGATGATGTGGGAT' alleles = {('A', INVARIANT):{}, ('A', DELETION) :{}} snv = SeqFeature(type='snv', location=FeatureLocation(11, 11), qualifiers={'alleles':alleles}) seq = SeqWithQuality(seq=Seq(seq), name='ref', features=[snv]) all_enzymes = True filter_ = create_cap_enzyme_filter(all_enzymes) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True]): result = snv.qualifiers['filters']['cap_enzymes'][all_enzymes] assert result == expected
def test_kind_filter(): 'It test the kind filter' alleles = {('A', INVARIANT):{}, ('T', SNP) :{}} snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={'reference_allele':'A', 'alleles':alleles}) alleles = {('A', INVARIANT):{}, ('TC', INSERTION) :{}} snv2 = SeqFeature(type='snv', location=FeatureLocation(3, 3), qualifiers={'reference_allele':'A', 'alleles':alleles}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2]) kind = SNP filter_ = create_kind_filter(kind) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [False, True]): result = snv.qualifiers['filters']['by_kind'][kind] assert result == expected
def test_snv_prot_change_annotator(): 'It test the snv_prot_changepolia annotator' # first annotate orf seq = 'ATGGCTTCATCCATTCTCTCATCCGCCGNTGTGGCCTTTGNCAACAGGGCTTCCCCTGCTCA' seq += 'AGCTAGCATGGGGGCACCATTCACTGGCCTAAAATCCGCCGCTGCTTTCCCNGTTTTATGTA' seq += 'CTGTTTTNACTCGCANGACCAACGACATCACCACTTTGGTTAGCAATGGGGGAAGAGTTCAG' seq += 'GGCNTGAAGGTGTGCCCACCACTTGGATTGAAGAAGTTCGAGACTCTTTCTTACCTTCCTGA' seq += 'TATGAGTAACGAGCAATTGGGAAAGGAAGTTGACTACCTTCTCAGGAAGGGATGGATTCCCT' seq += 'GCATTGAATTCGACATTCACAGTGGATTCGTTTACCGTGAGACCCACAGGTCACCAGGATAC' seq += 'TTCGATGGACGCTACTGGACCATGTGGAAGCTGCCCATGTTTGGCTGCACCGAT' orf = 'ATGGCTTCATCCATTCTCTCATCCGCCGNTGTGGCCTTTGNCAACAGGGCTTCCCTGCTCAA' orf += 'GCTAGCATGGGGGCACCATTCACTGGCCTAAAATCCGCCGCTGCTTTCCCNGTNACTCGCAN' orf += 'GACCAACGACATCACCACTTTGGTTAGCAATGGGGGAAGAGTTCAGGGCNTGAAGGTGTGCC' orf += 'CACCACTTGGATTGAAGAAGTTCGAGACTCTTTCTTACCTTCCTGATATGAGTAACGAGCAA' orf += 'TTGGGAAAGGAAGTTGACTACCTTCTCAGGAAGGGATGGATTCCCTGCATTGAATTCGACAT' orf += 'TCACAGTGGATTCGTTTACCGTGAGACCCACAGGTCACCAGGATACTTCGATGGACGCTAC' orf += 'TGGACCATGTGGAAGCTGCCCATGTTTGGCTGCACCGAT' qualifiers = {'strand':'forward', 'dna':Seq(orf), 'prot':'prot'} feature = SeqFeature(location=FeatureLocation(0, len(seq)), type='orf', qualifiers=qualifiers) alleles = {('A', SNP): None, ('G', INVARIANT):None} snv = SeqFeature(location=FeatureLocation(24, 24), type='snv', qualifiers = {'alleles':alleles}) alleles = {('A', SNP): None, ('T', INVARIANT):None} snv2 = SeqFeature(location=FeatureLocation(56, 56), type='snv', qualifiers = {'alleles':alleles}) alleles = {('T', SNP): None, ('A', INVARIANT):None} snv3 = SeqFeature(location=FeatureLocation(399, 399), type='snv', qualifiers = {'alleles':alleles}) alleles = {('T', INVARIANT):None, ('AG', INSERTION):None} snv4 = SeqFeature(location=FeatureLocation(250, 250), type='snv', qualifiers = {'alleles':alleles}) alleles = {('G', INVARIANT):None, ('--', DELETION):None} snv5 = SeqFeature(location=FeatureLocation(251, 251), type='snv', qualifiers = {'alleles':alleles}) sequence = SeqWithQuality(seq=Seq(seq), name='query') #print str(sequence[257:265].seq) #print sequence[256] sequence.features = [feature, snv, snv2, snv3, snv4, snv5] annotator = create_prot_change_annotator() annotator(sequence) [feature, snv, snv2, snv3, snv4, snv5] = sequence.features assert snv.qualifiers['protein_change']['kind'] == 'substitution' assert snv3.qualifiers['protein_change']['kind'] == 'substitution' assert snv4.qualifiers['protein_change']['kind'] == 'breakage' # assert snv.qualifiers['protein_change']['location'] == 'codon_1'
def test_close_to_seqvar_filter(): 'It tests that we can detect snvs by its proximity to another snv' alleles = {('A', SNP): None, ('T', INVARIANT):None} snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={'alleles':alleles}) snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 4), qualifiers={'alleles':alleles}) snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 6), qualifiers={'alleles':alleles}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2, snv3]) proximity = 3 filter_ = create_close_to_snv_filter(proximity) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, True, True]): result = snv.qualifiers['filters']['close_to_snv'][(proximity, None, None)] assert result == expected snv.qualifiers['filters']['close_to_snv'] alleles2 = {('A', DELETION): None, ('AT', INVARIANT):None} snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 3), qualifiers={'reference_allele':'AT', 'alleles':alleles2}) snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 6), qualifiers={'reference_allele':'AT', 'alleles':alleles2}) snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 8), qualifiers={'reference_allele':'AT', 'alleles':alleles2}) alleles3 = {('AC', INSERTION): None, ('A', INVARIANT):None} snv4 = SeqFeature(type='snv', location=FeatureLocation(9, 10), qualifiers={'reference_allele':'A', 'alleles':alleles3}) seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2, snv3, snv4]) filter_ = create_close_to_snv_filter(proximity, INDEL) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, True, True, True]): result = snv.qualifiers['filters']['close_to_snv'][(proximity, INDEL, None)] assert result == expected
def test_major_allele_freq_filter_snv(): 'It test the first allele percent filter' read_groups = {'g1':{}, 'g2':{}} alleles = {('A', INVARIANT):{'read_groups':{'g1':4}}, ('T', SNP) :{'read_groups':{'g2':2}}} snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={'alleles':alleles, 'read_groups':read_groups}) alleles = {('A', INVARIANT):{'read_groups':{'g1':3}}, ('T', SNP) :{'read_groups':{'g2':2}}} snv2 = SeqFeature(type='snv', location=FeatureLocation(3, 3), qualifiers={'alleles':alleles, 'read_groups':read_groups}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2]) frecuency = 0.59999999999999998 filter_ = create_major_allele_freq_filter(frecuency) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, False]): result = snv.qualifiers['filters']['maf'][(frecuency, )] assert result == expected #now we do it only for one read group snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={'alleles':alleles, 'read_groups':read_groups}) snv2 = SeqFeature(type='snv', location=FeatureLocation(3, 3), qualifiers={'alleles':alleles, 'read_groups':read_groups}) seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2]) frecuency = 0.59999999999999998 filter_ = create_major_allele_freq_filter(frecuency, groups=['g1'], group_kind='read_groups') filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, True]): parameters = (0.59999999999999998, ('g1',), 'read_groups') result = snv.qualifiers['filters']['maf'][parameters] assert result == expected
def test_close_to_limit_filter(): 'It tests that we can detect snvs close to the limit' snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={}) snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 4), qualifiers={}) snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 6), qualifiers={}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2, snv3]) distance = 2 filter_ = create_snv_close_to_limit_filter(distance) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, False, True]): result = snv.qualifiers['filters']['close_to_limit'][distance] assert result == expected
def test_seqs_string(): ''' Here we test it we can initialice a seq with quality and if we can print it. Seq is going to be a normal string''' #sequence1 = Seq('aaavvttt') # First we initialice the quality in the init seq1 = SeqWithQuality(name='seq1', seq=Seq('aaavvttt'), qual=[2, 4 , 1, 4, 5, 6, 12, 34]) assert seq1 # Here we add the quality after the initialization seq2 = SeqWithQuality(name='seq2', seq=Seq('aaavvttt')) seq2.qual = [2, 4 , 1, 4, 5, 6, 12, 34] assert seq2 # Let's check the sliceability of the seq for num in range(len(seq1)): seq3 = seq1[num] assert seq3
def test_ref_in_list_filter(): 'We filter out the snv close to an intron' snv = SeqFeature(type='snv', location=FeatureLocation(100, 100), qualifiers={}) seq1 = SeqWithQuality(name='seq1', seq=Seq('A'), features=[snv]) snv1 = SeqFeature(type='snv', location=FeatureLocation(100, 100), qualifiers={}) seq2 = SeqWithQuality(name='seq2', seq=Seq('A'), features=[snv1]) seq_list = ['seq1'] filter_ = create_reference_in_list_filter(seq_list) filter_(seq1) for snv, expected in zip(seq1.get_features(kind='snv'), [True]): result = snv.qualifiers['filters']['ref_not_in_list'][None] assert result == expected filter_(seq2) for snv, expected in zip(seq2.get_features(kind='snv'), [False]): result = snv.qualifiers['filters']['ref_not_in_list'][None] assert result == expected
def test_high_variable_region_filter(): 'It test high_variable_region_filter' snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={}) snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 4), qualifiers={}) snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 6), qualifiers={}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2, snv3]) max_variability = 0.4 filter_ = create_high_variable_region_filter(max_variability) filter_(seq) threshold = (max_variability, None) for snv, expected in zip(seq.get_features(kind='snv'), [True, True, True]): result = snv.qualifiers['filters']['high_variable_reg'][threshold] assert result == expected max_variability = 0.6 filter_ = create_high_variable_region_filter(max_variability) filter_(seq) filter_(seq) threshold = (max_variability, None) for snv, expected in zip(seq.get_features(kind='snv'), [False, False, False]): result = snv.qualifiers['filters']['high_variable_reg'][threshold] assert result == expected max_variability = 0.25 window = 6 threshold = (max_variability, window) filter_ = create_high_variable_region_filter(max_variability, window=window) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [False, True, False]): result = snv.qualifiers['filters']['high_variable_reg'][threshold] assert result == expected
def test_seq_seq(): ''' We are going to check the same tests but with a BioPython seq class object inside seq''' sequence1 = Seq('aaaccttt') # First we initialice the quality in the init seq1 = SeqWithQuality(name='seq1', seq=sequence1, \ qual=[2, 4 , 1, 4, 5, 6, 12, 34]) assert seq1 # Here we add the quality after the initialization seq2 = SeqWithQuality(name='seq2', seq=sequence1) seq2.qual = [2, 4 , 1, 4, 5, 6, 12, 34] assert seq2 # We check if the seq can be complemented seq3 = seq2.complement() assert seq3.seq == 'tttggaaa' # Let's check the sliceability of the seq assert seq2[0:2].seq == 'aa' assert seq2[0:2].qual == [2, 4]
def test_close_to_intron_filter(): 'We filter out the snv close to an intron' intron = SeqFeature(location=FeatureLocation(478, 478), type='intron') snv1 = SeqFeature(type='snv', location=FeatureLocation(100, 100), qualifiers={}) snv2 = SeqFeature(type='snv', location=FeatureLocation(450, 450), qualifiers={}) snv3 = SeqFeature(type='snv', location=FeatureLocation(640, 640), qualifiers={}) snv4 = SeqFeature(type='snv', location=FeatureLocation(700, 700), qualifiers={}) seq = SeqWithQuality(seq=Seq('A' * 1000), features=[intron, snv1, snv2, snv3, snv4]) filter_ = create_close_to_intron_filter(distance=60) filter_(seq) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [False, True, False, False]): result = snv.qualifiers['filters']['close_to_intron'][60] assert result == expected
def test_remove_annotations(): 'It test ermove annotations from seq_with quality' seq = SeqWithQuality(Seq('actg'), qual=[30, 30, 30, 30]) seq.description = 'Description' annotations = {'arabidopsis-orthologs':'justone', 'GOs':'GO12345', } snv = SeqFeature(location=FeatureLocation(4, 4), type='snv') intron = SeqFeature(location=FeatureLocation(4, 4), type='intron') orf = SeqFeature(location=FeatureLocation(4, 4), type='orf') ssr = SeqFeature(location=FeatureLocation(4, 4), type='microsatellite') features = [snv, intron, orf, ssr] seq.annotations = annotations seq.features = features seq.remove_annotations('GOs') seq.remove_annotations('orthologs') assert seq.annotations == {} seq.remove_annotations('snv') seq.remove_annotations('intron') seq.remove_annotations('microsatellite') seq.remove_annotations('orf') seq.remove_annotations('description') assert seq.features == [] assert seq.description == UNKNOWN_DESCRIPTION # remome snv filters seq = SeqWithQuality(Seq('actg'), qual=[30, 30, 30, 30]) seq.description = 'Description' filters = {'not_intron':True, 'puff':False} snv = SeqFeature(location=FeatureLocation(4, 4), type='snv', qualifiers={'alleles':'alleles', 'filters':filters}) seq.features = [snv] seq.remove_annotations('snv_filters') assert seq.features[0].qualifiers['filters'] == {} assert seq.features[0].qualifiers['alleles'] == 'alleles'
def test_upper(): 'It test the uppercase funtion' seq = SeqWithQuality(Seq('actg')) seq.upper() assert seq.seq == 'actg'
def test_is_variable_filter(): 'It tests variable filter function' alleles = {('A', SNP): {'read_groups':{'rg1':1, 'rg2':2, 'rg4':2}}, ('T', INVARIANT): {'read_groups':{'rg1':1, 'rg3':2}}} snv = SeqFeature(type='snv', location=FeatureLocation(11, 11), qualifiers={'alleles':alleles, 'read_groups':{}}) seq = 'ATGATGATGgaaattcATGATGATGTGGGAT' seq = SeqWithQuality(seq=Seq(seq), name='ref', features=[snv]) alleles2 = {('A', SNP): {'read_groups':{'rg1':2}}} snv2 = SeqFeature(type='snv', location=FeatureLocation(11, 11), qualifiers={'alleles':alleles2, 'read_groups':{}}) seq2 = 'ATGATGATGgaaattcATGATGATGTGGGAT' seq2 = SeqWithQuality(seq=Seq(seq2), name='ref2', features=[snv2]) filters = [] parameters = [] results = [] reference_free = True maf= None in_all_groups = True min_num_reads = None min_reads_per_allele = None kind = 'read_groups' groups = ('rg1',) in_union = False params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filter_ = create_is_variable_filter(*params) filters.append(filter_) results.append(False) kind = 'read_groups' groups = ('rg1',) in_union = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filter_ = create_is_variable_filter(*params) filters.append(filter_) results.append(False) kind = 'read_groups' groups = ('rg1',) in_union = True min_reads_per_allele = 2 params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filter_ = create_is_variable_filter(*params) filters.append(filter_) results.append(True) min_reads_per_allele = None kind = 'read_groups' groups = ('rg2', 'rg4') in_union = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) kind = 'read_groups' groups = 'fake' in_union = True params = (kind, (groups,), in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) kind = 'read_groups' groups = ('rg2', 'rg3') in_union = False params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) kind = 'read_groups' groups = ('rg2', 'rg3') in_union = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(False) kind = 'read_groups' groups = ('rg5',) in_union = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) kind = 'read_groups' groups = ('rg2',) in_union = True reference_free = False params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(False) kind = 'read_groups' groups = ('rg2',) in_union = True reference_free = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) for filter_ in filters: filter_(seq) filter_(seq2) for params, expected in zip(parameters, results): for snv, expected in zip(seq.get_features(kind='snv'), [expected]): result = snv.qualifiers['filters']['is_variable'][params] assert result == expected for params in parameters: for snv in seq2.get_features(kind='snv'): assert snv.qualifiers['filters']['is_variable'][params]
def test_repr(): 'It test the repr reader' assert u'a (h)' == _cast_to_class("u'a (h)'") assert ('adios',) == _cast_to_class("('adios',)") assert ['arab1', 'arab2'] == _cast_to_class("['arab1', 'arab2']") assert ('arab1', 'arab2') == _cast_to_class("('arab1', 'arab2')") result = _cast_to_class("{1: 2}") assert {1: 2} == result assert {'al': {'c': 1}, 'T': 2} == _cast_to_class("{'al': {'c': 1}, 'T': 2}") seq1 = SeqWithQuality(seq=Seq('ATCT')) seq2 = SeqWithQuality(seq=Seq('AAAA')) fcontent = repr(seq1) + '\n' + repr(seq2) + '\n' fhand = StringIO.StringIO(fcontent) seqs = list(seqs_in_file(fhand, format='repr')) assert repr(seqs[0]) == repr(seq1) assert repr(seqs[1]) == repr(seq2) #with quality seq1 = SeqWithQuality(seq=Seq('ATCT'), qual=[10, 2, 3, 4]) fcontent = repr(seq1) + '\n' fhand = StringIO.StringIO(fcontent) seqs = list(seqs_in_file(fhand, format='repr')) assert repr(seqs[0]) == repr(seq1) #a seq with features seq1 = SeqWithQuality(seq=Seq('GAAAAGATGTG', SingleLetterAlphabet()), id='seq', name='seq', description='', dbxrefs=[], features=[SeqFeature(FeatureLocation(ExactPosition(478), ExactPosition(478)), type='intron', qualifiers={'db': 'tomato'} ), SeqFeature(FeatureLocation(ExactPosition(572), ExactPosition(572)), type='intron', qualifiers={'db': 'tomato'} )], annotations={}, qual=None) fcontent = repr(seq1) + '\n' fhand = StringIO.StringIO(fcontent) seq0 = list(seqs_in_file(fhand, format='repr'))[0] assert seq0.seq == seq1.seq assert seq0.qual == seq1.qual assert seq0.description == seq1.description assert seq0.annotations == seq1.annotations feat0 = seq0.features[0] feat1 = seq1.features[0] assert feat0.type == feat1.type assert feat0.qualifiers == feat1.qualifiers assert str(feat0.location) == str(feat1.location) #some more qualifiers fhand = tempfile.NamedTemporaryFile(suffix='.repr') seq1 = SeqWithQuality(id='seqid', name='seqname', description='seqdescription', seq=Seq('ATGAT')) seq1.letter_annotations["phred_quality"] = [40, 40, 38, 30, 30] seq1.annotations['source'] = 'ara1' seqfeature = SeqFeature(location=FeatureLocation(5, 8), type='orthologs', qualifiers={'arabidposis':['arab1', 'arab2']}) seq1.features.append(seqfeature) fcontent = repr(seq1) + '\n' fhand = StringIO.StringIO(fcontent) seq0 = list(seqs_in_file(fhand, format='repr'))[0] assert seq0.seq == seq1.seq assert seq0.qual == seq1.qual assert seq0.description == seq1.description assert seq0.annotations == seq1.annotations feat0 = seq0.features[0] feat1 = seq1.features[0] assert feat0.type == feat1.type assert feat0.qualifiers == feat1.qualifiers assert str(feat0.location) == str(feat1.location) #with snps repr_ = "SeqWithQuality(seq=Seq('GGGGATTTG', Alphabet()), features=[SeqFeature(FeatureLocation(ExactPosition(213),ExactPosition(213)), type='snv', qualifiers={'alleles': {('C', 3): {'read_groups': ['group1+454', 'group1+454', 'group1+454'], 'qualities': [44.0, 44.0, 44.0], 'libraries': ['group1', 'group1', 'group1'], 'read_names': ['seq1', 'seq4', 'seq7'], 'orientations': [True, True, True], 'samples': ['group1+454', 'group1+454', 'group1+454'], 'quality': 66.0, 'mapping_qualities': [149, 149, 149]}, ('T', 0): {'read_groups': ['group1+454', 'group1+454', 'group1+454', 'group1+454', 'group1+454', 'group1+454'], 'qualities': [44.0, 44.0, 44.0, 44.0, 44.0, 44.0], 'libraries': ['group1', 'group1', 'group1', 'group1', 'group1', 'group1'], 'read_names': ['seq2', 'seq3', 'seq5', 'seq6', 'seq8', 'seq9'], 'orientations': [True, True, True, True, True, True], 'samples': ['group1+454', 'group1+454', 'group1+454', 'group1+454', 'group1+454', 'group1+454'], 'quality': 66.0, 'mapping_qualities': [28, 28, 28, 28, 28, 28]}}, 'reference_allele': 'C'} )])\n" alleles = {('C', 3): {'read_groups': ['group1+454', 'group1+454', 'group1+454'], 'qualities': [44.0, 44.0, 44.0], 'libraries': ['group1', 'group1', 'group1'], 'read_names': ['seq1', 'seq4', 'seq7'], 'orientations': [True, True, True], 'samples': ['group1+454', 'group1+454', 'group1+454'], 'quality': 66.0, 'mapping_qualities': [149, 149, 149]}, ('T', 0): {'read_groups': ['group1+454', 'group1+454', 'group1+454', 'group1+454', 'group1+454', 'group1+454'], 'qualities': [44.0, 44.0, 44.0, 44.0, 44.0, 44.0], 'libraries': ['group1', 'group1', 'group1', 'group1', 'group1', 'group1'], 'read_names': ['seq2', 'seq3', 'seq5', 'seq6', 'seq8', 'seq9'], 'orientations': [True, True, True, True, True, True], 'samples': ['group1+454', 'group1+454', 'group1+454', 'group1+454', 'group1+454', 'group1+454'], 'quality': 66.0, 'mapping_qualities': [28, 28, 28, 28, 28, 28]} } fcontent = repr_ fhand = StringIO.StringIO(fcontent) seq0 = list(seqs_in_file(fhand, format='repr'))[0] alleles0 = seq0.features[0].qualifiers['alleles'] assert alleles == alleles0