def test_variant_equal_throws_below_reference(): ref_id = chrom_ref['13'].keys()[0] v1 = ('13', chrom_ref['13'][ref_id]['start'], 'A', 'C') v2 = ('13', chrom_ref['13'][ref_id]['start'] + 10, 'A', 'C') with pytest.raises(AssertionError): variant_equal(v1, v2, ref_id, seq_provider) with pytest.raises(AssertionError): variant_equal(v2, v1, ref_id, seq_provider)
def test_variant_equal_throws_above_reference(): ref_id = chrom_ref['13'].keys()[0] start = chrom_ref['13'][ref_id]['start'] v1 = ('13', start + reference_length + 2, 'A', 'C') v2 = ('13', start + 10, 'A', 'C') with pytest.raises(AssertionError): variant_equal(v1, v2, ref_id, seq_provider) with pytest.raises(AssertionError): variant_equal(v2, v1, ref_id, seq_provider)
def test_variant_equal_commutative(v1, v2, ref_id): "Comparing x, y should be the same as comparing y, x" (_, pos1, ref1, _) = v1 (_, pos2, ref2, _) = v2 assume(pos1 + len(ref1) <= reference_length) assume(pos2 + len(ref2) <= reference_length) v1 = add_start(v1, ref_id) v2 = add_start(v2, ref_id) assert variant_equal(v1, v2, ref_id, seq_provider) == variant_equal(v2, v1, ref_id, seq_provider)
def test_variant_equal_commutative(v1, v2, ref_id): "Comparing x, y should be the same as comparing y, x" (_, pos1, ref1, _) = v1 (_, pos2, ref2, _) = v2 assume(pos1 + len(ref1) <= reference_length) assume(pos2 + len(ref2) <= reference_length) v1 = add_start(v1, ref_id) v2 = add_start(v2, ref_id) assert variant_equal(v1, v2, ref_id, seq_provider) == variant_equal( v2, v1, ref_id, seq_provider)
def test_variant_equal_not_equiv(v1, v2, ref_id): (chrom1, pos1, reflen1, alt1) = v1 (chrom2, pos2, reflen2, alt2) = v2 assume(pos1 + reflen1 <= reference_length) assume(pos2 + reflen2 <= reference_length) assume(not alt1 == '' and reflen1 == 0) assume(not alt2 == '' and reflen2 == 0) refsequence1 = chrom_ref[chrom1][ref_id]["sequence"] refsequence2 = chrom_ref[chrom2][ref_id]["sequence"] v1 = inject_ref(refsequence1, v1) v2 = inject_ref(refsequence2, v2) eq1 = equiv_set(refsequence1, v1) eq2 = equiv_set(refsequence2, v2) if len(eq1.intersection(eq2)) == 0: # should test not-equal assert not variant_equal(add_start(v1, ref_id), add_start(v2, ref_id), ref_id, seq_provider) else: assert variant_equal(add_start(v1, ref_id), add_start(v2, ref_id), ref_id, seq_provider)
def test_variant_equal_equiv(v, ref_id): (chrom, pos, reflen, alt) = v refsequence = chrom_ref[chrom][ref_id]["sequence"] assume(pos + reflen <= len(refsequence)) v = inject_ref(refsequence, v) equivs = all_norm_equiv(refsequence, v) for veq in equivs: if is_in_bounds(veq): assert variant_equal(add_start(v, ref_id), add_start(veq, ref_id), ref_id, seq_provider)
def test_variant_equal_identity(v, ref_id): "A variant should be equal to itself" (_, pos, ref, _) = v assume(pos + len(ref) <= reference_length) v = add_start(v, ref_id) assert variant_equal(v, v, ref_id, seq_provider)
assert frozenset(example_variants) == frozenset( find_equivalent_variants_whole_seq(variant_dict, whole_seq_provider)) def test_chunking(): def chunker(vars, margin): return seq_utils.ChunkBasedSeqProvider.generate_chunks(vars, margin) margin = 2 assert chunker([], margin) == [] some_chr = 7 v1 = VCFVariant(some_chr, 10, 'AGAGT', 'G') assert chunker([v1], margin) == [(some_chr, 10 - margin, 10 + 5 + margin)] # merging intervals of v1 and v2 together v2 = VCFVariant(some_chr, 11, 'GAGT', 'G') assert chunker([v1, v2], margin) == [(some_chr, min(10 - margin, 11 - margin), max(10 + 5 + margin, 11 + 4 + margin))] if __name__ == "__main__": # To reproduce failure conditions, paste them in here and run as # python ./test_variant_merging.py # print variant_equal(v1 = ('17', 41100001, 'gcttccca', ''), v2 = ('17', 41100002, 'cttcccag', ''), version = 'hg38') print variant_equal(('13', 32800003, '', 'A'), ('13', 32800005, '', 'A'), 'hg19') pass
# empty case assert [] == find_equivalent_variant({}, seq_provider) # a bunch of variants. If they appear in the same set, they are considered equivalent example_variants = [ frozenset({'chr13:g.32355030:A>AA'}), frozenset({'chr13:g.32339774:GAT>G', 'chr13:g.32339776:TAT>T'}), frozenset({'chr17:g.43090921:G>GCA', 'chr17:g.43090921:GCA>GCACA'}) ] # construct variant dict (flattening example_variants!) variant_dict = {v: [None, None, None, v.split(':')[0].lstrip('chr'), v.split(':')[1].lstrip('g.'), v.split(':')[2].split('>')[0], v.split(':')[2].split('>')[1], ] for eq_variants in example_variants for v in eq_variants } assert frozenset(example_variants) == frozenset(find_equivalent_variant(variant_dict, seq_provider)) if __name__ == "__main__": # To reproduce failure conditions, paste them in here and run as # python ./test_variant_merging.py # print variant_equal(v1 = ('17', 41100001, 'gcttccca', ''), v2 = ('17', 41100002, 'cttcccag', ''), version = 'hg38') print variant_equal(('13', 32800003, '', 'A'), ('13', 32800005, '', 'A'), 'hg19') pass