def test_cap_enzyme_filter():
        'It test the cap enzyme filter'
        seq = 'ATGATGATG' + 'gaaattc' + 'ATGATGATGTGGGAT'

        alleles = {('AA', INVARIANT):{},
                   ('A', DELETION) :{}}
        snv = SeqFeature(type='snv', location=FeatureLocation(10, 11),
                         qualifiers={'alleles':alleles})
        seq = SeqWithQuality(seq=Seq(seq), name='ref', features=[snv])

        all_enzymes = True
        filter_ = create_cap_enzyme_filter(all_enzymes)
        filter_(seq)
        for snv, expected in zip(seq.get_features(kind='snv'), [False]):
            result = snv.qualifiers['filters']['cap_enzymes'][all_enzymes]
            assert result == expected

        #No cap
        seq = 'ATGATGATG' + 'ATGATGATGTGGGAT'

        alleles = {('A', INVARIANT):{},
                   ('A', DELETION) :{}}
        snv = SeqFeature(type='snv', location=FeatureLocation(11, 11),
                         qualifiers={'alleles':alleles})
        seq = SeqWithQuality(seq=Seq(seq), name='ref', features=[snv])

        all_enzymes = True
        filter_ = create_cap_enzyme_filter(all_enzymes)
        filter_(seq)
        for snv, expected in zip(seq.get_features(kind='snv'), [True]):
            result = snv.qualifiers['filters']['cap_enzymes'][all_enzymes]
            assert result == expected
    def test_kind_filter():
        'It test the kind filter'
        alleles = {('A', INVARIANT):{},
                   ('T', SNP)      :{}}

        snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1),
                          qualifiers={'reference_allele':'A',
                                      'alleles':alleles})
        alleles = {('A', INVARIANT):{},
                   ('TC', INSERTION)    :{}}

        snv2 = SeqFeature(type='snv', location=FeatureLocation(3, 3),
                          qualifiers={'reference_allele':'A',
                                      'alleles':alleles})

        seq_str = 'AATATA'
        seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str),
                             features=[snv1, snv2])
        kind = SNP
        filter_ = create_kind_filter(kind)
        filter_(seq)

        for snv, expected in zip(seq.get_features(kind='snv'), [False, True]):
            result = snv.qualifiers['filters']['by_kind'][kind]
            assert result == expected
    def test_close_to_seqvar_filter():
        'It tests that we can detect snvs by its proximity to another snv'
        alleles = {('A', SNP): None, ('T', INVARIANT):None}
        snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1),
                          qualifiers={'alleles':alleles})
        snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 4),
                          qualifiers={'alleles':alleles})
        snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 6),
                          qualifiers={'alleles':alleles})
        seq_str = 'AATATA'
        seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str),
                             features=[snv1, snv2, snv3])
        proximity = 3

        filter_ = create_close_to_snv_filter(proximity)
        filter_(seq)
        for snv, expected in zip(seq.get_features(kind='snv'),
                                 [True, True, True]):
            result = snv.qualifiers['filters']['close_to_snv'][(proximity, None, None)]
            assert result == expected

        snv.qualifiers['filters']['close_to_snv']
        alleles2 = {('A', DELETION): None, ('AT', INVARIANT):None}
        snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 3),
                          qualifiers={'reference_allele':'AT',
                                      'alleles':alleles2})
        snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 6),
                          qualifiers={'reference_allele':'AT',
                                      'alleles':alleles2})
        snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 8),
                          qualifiers={'reference_allele':'AT',
                                      'alleles':alleles2})
        alleles3 = {('AC', INSERTION): None, ('A', INVARIANT):None}
        snv4 = SeqFeature(type='snv', location=FeatureLocation(9, 10),
                          qualifiers={'reference_allele':'A',
                                      'alleles':alleles3})
        seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str),
                             features=[snv1, snv2, snv3, snv4])
        filter_ = create_close_to_snv_filter(proximity, INDEL)
        filter_(seq)
        for snv, expected in zip(seq.get_features(kind='snv'),
                                 [True, True, True, True]):
            result = snv.qualifiers['filters']['close_to_snv'][(proximity, INDEL, None)]
            assert result == expected
    def test_major_allele_freq_filter_snv():
        'It test the first allele percent filter'
        read_groups = {'g1':{}, 'g2':{}}
        alleles = {('A', INVARIANT):{'read_groups':{'g1':4}},
                   ('T', SNP)      :{'read_groups':{'g2':2}}}
        snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1),
                          qualifiers={'alleles':alleles,
                                      'read_groups':read_groups})
        alleles = {('A', INVARIANT):{'read_groups':{'g1':3}},
                   ('T', SNP)      :{'read_groups':{'g2':2}}}

        snv2 = SeqFeature(type='snv', location=FeatureLocation(3, 3),
                          qualifiers={'alleles':alleles,
                                      'read_groups':read_groups})

        seq_str = 'AATATA'
        seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str),
                             features=[snv1, snv2])
        frecuency = 0.59999999999999998
        filter_ = create_major_allele_freq_filter(frecuency)
        filter_(seq)
        for snv, expected in zip(seq.get_features(kind='snv'), [True, False]):
            result = snv.qualifiers['filters']['maf'][(frecuency, )]
            assert result == expected

        #now we do it only for one read group
        snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1),
                          qualifiers={'alleles':alleles,
                                      'read_groups':read_groups})
        snv2 = SeqFeature(type='snv', location=FeatureLocation(3, 3),
                          qualifiers={'alleles':alleles,
                                      'read_groups':read_groups})
        seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str),
                             features=[snv1, snv2])
        frecuency = 0.59999999999999998
        filter_ = create_major_allele_freq_filter(frecuency,
                                                   groups=['g1'],
                                                   group_kind='read_groups')
        filter_(seq)
        for snv, expected in zip(seq.get_features(kind='snv'), [True, True]):
            parameters = (0.59999999999999998, ('g1',), 'read_groups')
            result = snv.qualifiers['filters']['maf'][parameters]
            assert result == expected
    def test_ref_in_list_filter():
        'We filter out the snv close to an intron'
        snv = SeqFeature(type='snv', location=FeatureLocation(100, 100),
                          qualifiers={})
        seq1 = SeqWithQuality(name='seq1', seq=Seq('A'), features=[snv])
        snv1 = SeqFeature(type='snv', location=FeatureLocation(100, 100),
                          qualifiers={})
        seq2 = SeqWithQuality(name='seq2', seq=Seq('A'), features=[snv1])
        seq_list = ['seq1']
        filter_ = create_reference_in_list_filter(seq_list)

        filter_(seq1)
        for snv, expected in zip(seq1.get_features(kind='snv'), [True]):
            result = snv.qualifiers['filters']['ref_not_in_list'][None]
            assert result == expected

        filter_(seq2)
        for snv, expected in zip(seq2.get_features(kind='snv'), [False]):
            result = snv.qualifiers['filters']['ref_not_in_list'][None]
            assert result == expected
    def test_high_variable_region_filter():
        'It test  high_variable_region_filter'
        snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1),
                          qualifiers={})
        snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 4),
                          qualifiers={})
        snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 6),
                          qualifiers={})

        seq_str = 'AATATA'
        seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str),
                             features=[snv1, snv2, snv3])
        max_variability = 0.4
        filter_ = create_high_variable_region_filter(max_variability)
        filter_(seq)
        threshold = (max_variability, None)
        for snv, expected in zip(seq.get_features(kind='snv'),
                                 [True, True, True]):
            result = snv.qualifiers['filters']['high_variable_reg'][threshold]
            assert result == expected

        max_variability = 0.6
        filter_ = create_high_variable_region_filter(max_variability)
        filter_(seq)
        filter_(seq)
        threshold = (max_variability, None)
        for snv, expected in zip(seq.get_features(kind='snv'),
                                 [False, False, False]):
            result = snv.qualifiers['filters']['high_variable_reg'][threshold]
            assert result == expected
        max_variability = 0.25
        window = 6
        threshold = (max_variability, window)
        filter_ = create_high_variable_region_filter(max_variability,
                                                       window=window)
        filter_(seq)
        for snv, expected in zip(seq.get_features(kind='snv'),
                                 [False, True, False]):
            result = snv.qualifiers['filters']['high_variable_reg'][threshold]
            assert result == expected
    def test_close_to_limit_filter():
        'It tests that we can detect snvs close to the limit'
        snv1 = SeqFeature(type='snv', location=FeatureLocation(1, 1),
                          qualifiers={})
        snv2 = SeqFeature(type='snv', location=FeatureLocation(4, 4),
                          qualifiers={})
        snv3 = SeqFeature(type='snv', location=FeatureLocation(6, 6),
                          qualifiers={})
        seq_str = 'AATATA'
        seq = SeqWithQuality(seq=Seq(seq_str), qual=[30] * len(seq_str),
                             features=[snv1, snv2, snv3])
        distance = 2

        filter_ = create_snv_close_to_limit_filter(distance)
        filter_(seq)
        for snv, expected in zip(seq.get_features(kind='snv'),
                                 [True, False, True]):
            result = snv.qualifiers['filters']['close_to_limit'][distance]
            assert result == expected
    def test_close_to_intron_filter():
        'We filter out the snv close to an intron'
        intron = SeqFeature(location=FeatureLocation(478, 478), type='intron')

        snv1 = SeqFeature(type='snv', location=FeatureLocation(100, 100),
                          qualifiers={})
        snv2 = SeqFeature(type='snv', location=FeatureLocation(450, 450),
                          qualifiers={})
        snv3 = SeqFeature(type='snv', location=FeatureLocation(640, 640),
                          qualifiers={})
        snv4 = SeqFeature(type='snv', location=FeatureLocation(700, 700),
                          qualifiers={})

        seq = SeqWithQuality(seq=Seq('A' * 1000), features=[intron, snv1, snv2,
                                                            snv3, snv4])

        filter_ = create_close_to_intron_filter(distance=60)
        filter_(seq)
        filter_(seq)

        for snv, expected in zip(seq.get_features(kind='snv'),
                                 [False, True, False, False]):
            result = snv.qualifiers['filters']['close_to_intron'][60]
            assert result == expected
    def test_is_variable_filter():
        'It tests variable filter function'
        alleles = {('A', SNP): {'read_groups':{'rg1':1, 'rg2':2, 'rg4':2}},
                   ('T', INVARIANT): {'read_groups':{'rg1':1, 'rg3':2}}}
        snv = SeqFeature(type='snv', location=FeatureLocation(11, 11),
                         qualifiers={'alleles':alleles,
                                     'read_groups':{}})
        seq = 'ATGATGATGgaaattcATGATGATGTGGGAT'
        seq = SeqWithQuality(seq=Seq(seq), name='ref', features=[snv])

        alleles2 = {('A', SNP): {'read_groups':{'rg1':2}}}
        snv2 = SeqFeature(type='snv', location=FeatureLocation(11, 11),
                         qualifiers={'alleles':alleles2,
                                     'read_groups':{}})
        seq2 = 'ATGATGATGgaaattcATGATGATGTGGGAT'
        seq2 = SeqWithQuality(seq=Seq(seq2), name='ref2', features=[snv2])

        filters = []
        parameters = []
        results = []
        reference_free = True
        maf= None
        in_all_groups = True
        min_num_reads = None
        min_reads_per_allele = None


        kind = 'read_groups'
        groups = ('rg1',)
        in_union = False
        params = (kind, groups, in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filter_ = create_is_variable_filter(*params)
        filters.append(filter_)
        results.append(False)

        kind = 'read_groups'
        groups = ('rg1',)
        in_union = True
        params = (kind, groups, in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filter_ = create_is_variable_filter(*params)
        filters.append(filter_)
        results.append(False)

        kind = 'read_groups'
        groups = ('rg1',)
        in_union = True
        min_reads_per_allele = 2
        params = (kind, groups, in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filter_ = create_is_variable_filter(*params)
        filters.append(filter_)
        results.append(True)

        min_reads_per_allele = None


        kind = 'read_groups'
        groups = ('rg2', 'rg4')
        in_union = True
        params = (kind, groups, in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filters.append(create_is_variable_filter(*params))
        results.append(True)

        kind = 'read_groups'
        groups = 'fake'
        in_union = True
        params = (kind, (groups,), in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filters.append(create_is_variable_filter(*params))
        results.append(True)

        kind = 'read_groups'
        groups = ('rg2', 'rg3')
        in_union = False
        params = (kind, groups, in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filters.append(create_is_variable_filter(*params))
        results.append(True)

        kind = 'read_groups'
        groups = ('rg2', 'rg3')
        in_union = True
        params = (kind, groups, in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filters.append(create_is_variable_filter(*params))
        results.append(False)

        kind = 'read_groups'
        groups = ('rg5',)
        in_union = True
        params = (kind, groups, in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filters.append(create_is_variable_filter(*params))
        results.append(True)

        kind = 'read_groups'
        groups = ('rg2',)
        in_union = True
        reference_free = False
        params = (kind, groups, in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filters.append(create_is_variable_filter(*params))
        results.append(False)

        kind = 'read_groups'
        groups = ('rg2',)
        in_union = True
        reference_free = True
        params = (kind, groups, in_union, in_all_groups, reference_free, maf,
                  min_num_reads, min_reads_per_allele)
        parameters.append(params)
        filters.append(create_is_variable_filter(*params))
        results.append(True)


        for filter_ in filters:
            filter_(seq)
            filter_(seq2)

        for params, expected in zip(parameters, results):
            for snv, expected in zip(seq.get_features(kind='snv'), [expected]):
                result = snv.qualifiers['filters']['is_variable'][params]
                assert result == expected

        for params in parameters:
            for snv in seq2.get_features(kind='snv'):
                assert snv.qualifiers['filters']['is_variable'][params]