def test_cap_enzyme_filter(): 'It test the cap enzyme filter' seq = 'ATGATGATG' + 'gaaattc' + 'ATGATGATGTGGGAT' alleles = {('AA', INVARIANT):{}, ('A', DELETION) :{}} snv = SeqFeature(type='snv', location=FeatureLocation(10, 11), qualifiers={'alleles':alleles}) seq = SeqWithQuality(seq=Seq(seq), name='ref', features=[snv]) all_enzymes = True filter_ = create_cap_enzyme_filter(all_enzymes) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [False]): result = snv.qualifiers['filters']['cap_enzymes'][all_enzymes] assert result == expected #No cap seq = 'ATGATGATG' + 'ATGATGATGTGGGAT' alleles = {('A', INVARIANT):{}, ('A', DELETION) :{}} snv = SeqFeature(type='snv', location=FeatureLocation(11, 11), qualifiers={'alleles':alleles}) seq = SeqWithQuality(seq=Seq(seq), name='ref', features=[snv]) all_enzymes = True filter_ = create_cap_enzyme_filter(all_enzymes) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True]): result = snv.qualifiers['filters']['cap_enzymes'][all_enzymes] assert result == expected
def test_kind_filter(): 'It test the kind filter' alleles = {('A', INVARIANT):{}, ('T', SNP) :{}} snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={'reference_allele':'A', 'alleles':alleles}) alleles = {('A', INVARIANT):{}, ('TC', INSERTION) :{}} snv2 = SeqFeature(type='snv', location=FeatureLocation(3, 3), qualifiers={'reference_allele':'A', 'alleles':alleles}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2]) kind = SNP filter_ = create_kind_filter(kind) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [False, True]): result = snv.qualifiers['filters']['by_kind'][kind] assert result == expected
def test_close_to_seqvar_filter(): 'It tests that we can detect snvs by its proximity to another snv' alleles = {('A', SNP): None, ('T', INVARIANT):None} snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={'alleles':alleles}) snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 4), qualifiers={'alleles':alleles}) snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 6), qualifiers={'alleles':alleles}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2, snv3]) proximity = 3 filter_ = create_close_to_snv_filter(proximity) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, True, True]): result = snv.qualifiers['filters']['close_to_snv'][(proximity, None, None)] assert result == expected snv.qualifiers['filters']['close_to_snv'] alleles2 = {('A', DELETION): None, ('AT', INVARIANT):None} snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 3), qualifiers={'reference_allele':'AT', 'alleles':alleles2}) snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 6), qualifiers={'reference_allele':'AT', 'alleles':alleles2}) snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 8), qualifiers={'reference_allele':'AT', 'alleles':alleles2}) alleles3 = {('AC', INSERTION): None, ('A', INVARIANT):None} snv4 = SeqFeature(type='snv', location=FeatureLocation(9, 10), qualifiers={'reference_allele':'A', 'alleles':alleles3}) seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2, snv3, snv4]) filter_ = create_close_to_snv_filter(proximity, INDEL) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, True, True, True]): result = snv.qualifiers['filters']['close_to_snv'][(proximity, INDEL, None)] assert result == expected
def test_major_allele_freq_filter_snv(): 'It test the first allele percent filter' read_groups = {'g1':{}, 'g2':{}} alleles = {('A', INVARIANT):{'read_groups':{'g1':4}}, ('T', SNP) :{'read_groups':{'g2':2}}} snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={'alleles':alleles, 'read_groups':read_groups}) alleles = {('A', INVARIANT):{'read_groups':{'g1':3}}, ('T', SNP) :{'read_groups':{'g2':2}}} snv2 = SeqFeature(type='snv', location=FeatureLocation(3, 3), qualifiers={'alleles':alleles, 'read_groups':read_groups}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2]) frecuency = 0.59999999999999998 filter_ = create_major_allele_freq_filter(frecuency) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, False]): result = snv.qualifiers['filters']['maf'][(frecuency, )] assert result == expected #now we do it only for one read group snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={'alleles':alleles, 'read_groups':read_groups}) snv2 = SeqFeature(type='snv', location=FeatureLocation(3, 3), qualifiers={'alleles':alleles, 'read_groups':read_groups}) seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2]) frecuency = 0.59999999999999998 filter_ = create_major_allele_freq_filter(frecuency, groups=['g1'], group_kind='read_groups') filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, True]): parameters = (0.59999999999999998, ('g1',), 'read_groups') result = snv.qualifiers['filters']['maf'][parameters] assert result == expected
def test_ref_in_list_filter(): 'We filter out the snv close to an intron' snv = SeqFeature(type='snv', location=FeatureLocation(100, 100), qualifiers={}) seq1 = SeqWithQuality(name='seq1', seq=Seq('A'), features=[snv]) snv1 = SeqFeature(type='snv', location=FeatureLocation(100, 100), qualifiers={}) seq2 = SeqWithQuality(name='seq2', seq=Seq('A'), features=[snv1]) seq_list = ['seq1'] filter_ = create_reference_in_list_filter(seq_list) filter_(seq1) for snv, expected in zip(seq1.get_features(kind='snv'), [True]): result = snv.qualifiers['filters']['ref_not_in_list'][None] assert result == expected filter_(seq2) for snv, expected in zip(seq2.get_features(kind='snv'), [False]): result = snv.qualifiers['filters']['ref_not_in_list'][None] assert result == expected
def test_high_variable_region_filter(): 'It test high_variable_region_filter' snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={}) snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 4), qualifiers={}) snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 6), qualifiers={}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2, snv3]) max_variability = 0.4 filter_ = create_high_variable_region_filter(max_variability) filter_(seq) threshold = (max_variability, None) for snv, expected in zip(seq.get_features(kind='snv'), [True, True, True]): result = snv.qualifiers['filters']['high_variable_reg'][threshold] assert result == expected max_variability = 0.6 filter_ = create_high_variable_region_filter(max_variability) filter_(seq) filter_(seq) threshold = (max_variability, None) for snv, expected in zip(seq.get_features(kind='snv'), [False, False, False]): result = snv.qualifiers['filters']['high_variable_reg'][threshold] assert result == expected max_variability = 0.25 window = 6 threshold = (max_variability, window) filter_ = create_high_variable_region_filter(max_variability, window=window) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [False, True, False]): result = snv.qualifiers['filters']['high_variable_reg'][threshold] assert result == expected
def test_close_to_limit_filter(): 'It tests that we can detect snvs close to the limit' snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1), qualifiers={}) snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 4), qualifiers={}) snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 6), qualifiers={}) seq_str = 'AATATA' seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str), features=[snv1, snv2, snv3]) distance = 2 filter_ = create_snv_close_to_limit_filter(distance) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [True, False, True]): result = snv.qualifiers['filters']['close_to_limit'][distance] assert result == expected
def test_close_to_intron_filter(): 'We filter out the snv close to an intron' intron = SeqFeature(location=FeatureLocation(478, 478), type='intron') snv1 = SeqFeature(type='snv', location=FeatureLocation(100, 100), qualifiers={}) snv2 = SeqFeature(type='snv', location=FeatureLocation(450, 450), qualifiers={}) snv3 = SeqFeature(type='snv', location=FeatureLocation(640, 640), qualifiers={}) snv4 = SeqFeature(type='snv', location=FeatureLocation(700, 700), qualifiers={}) seq = SeqWithQuality(seq=Seq('A' * 1000), features=[intron, snv1, snv2, snv3, snv4]) filter_ = create_close_to_intron_filter(distance=60) filter_(seq) filter_(seq) for snv, expected in zip(seq.get_features(kind='snv'), [False, True, False, False]): result = snv.qualifiers['filters']['close_to_intron'][60] assert result == expected
def test_is_variable_filter(): 'It tests variable filter function' alleles = {('A', SNP): {'read_groups':{'rg1':1, 'rg2':2, 'rg4':2}}, ('T', INVARIANT): {'read_groups':{'rg1':1, 'rg3':2}}} snv = SeqFeature(type='snv', location=FeatureLocation(11, 11), qualifiers={'alleles':alleles, 'read_groups':{}}) seq = 'ATGATGATGgaaattcATGATGATGTGGGAT' seq = SeqWithQuality(seq=Seq(seq), name='ref', features=[snv]) alleles2 = {('A', SNP): {'read_groups':{'rg1':2}}} snv2 = SeqFeature(type='snv', location=FeatureLocation(11, 11), qualifiers={'alleles':alleles2, 'read_groups':{}}) seq2 = 'ATGATGATGgaaattcATGATGATGTGGGAT' seq2 = SeqWithQuality(seq=Seq(seq2), name='ref2', features=[snv2]) filters = [] parameters = [] results = [] reference_free = True maf= None in_all_groups = True min_num_reads = None min_reads_per_allele = None kind = 'read_groups' groups = ('rg1',) in_union = False params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filter_ = create_is_variable_filter(*params) filters.append(filter_) results.append(False) kind = 'read_groups' groups = ('rg1',) in_union = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filter_ = create_is_variable_filter(*params) filters.append(filter_) results.append(False) kind = 'read_groups' groups = ('rg1',) in_union = True min_reads_per_allele = 2 params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filter_ = create_is_variable_filter(*params) filters.append(filter_) results.append(True) min_reads_per_allele = None kind = 'read_groups' groups = ('rg2', 'rg4') in_union = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) kind = 'read_groups' groups = 'fake' in_union = True params = (kind, (groups,), in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) kind = 'read_groups' groups = ('rg2', 'rg3') in_union = False params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) kind = 'read_groups' groups = ('rg2', 'rg3') in_union = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(False) kind = 'read_groups' groups = ('rg5',) in_union = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) kind = 'read_groups' groups = ('rg2',) in_union = True reference_free = False params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(False) kind = 'read_groups' groups = ('rg2',) in_union = True reference_free = True params = (kind, groups, in_union, in_all_groups, reference_free, maf, min_num_reads, min_reads_per_allele) parameters.append(params) filters.append(create_is_variable_filter(*params)) results.append(True) for filter_ in filters: filter_(seq) filter_(seq2) for params, expected in zip(parameters, results): for snv, expected in zip(seq.get_features(kind='snv'), [expected]): result = snv.qualifiers['filters']['is_variable'][params] assert result == expected for params in parameters: for snv in seq2.get_features(kind='snv'): assert snv.qualifiers['filters']['is_variable'][params]