Beispiel #1
0
def test_vcf_variant_hashability():
    v = [
        VcfVariant(10, 'A', 'TC'),
        VcfVariant(10, 'A', 'TCA'),
        VcfVariant(10, 'C', 'TC'),
        VcfVariant(20, 'A', 'TC'),
        VcfVariant(10, 'A', 'TCA'),
        VcfVariant(20, 'A', 'TC')
    ]
    assert len(set(v)) == 4
Beispiel #2
0
def test_vcf_variant_hashability():
    v = [
        VcfVariant(10, "A", "TC"),
        VcfVariant(10, "A", "TCA"),
        VcfVariant(10, "C", "TC"),
        VcfVariant(20, "A", "TC"),
        VcfVariant(10, "A", "TCA"),
        VcfVariant(20, "A", "TC"),
    ]
    assert len(set(v)) == 4
Beispiel #3
0
def test_normalize():
    assert VcfVariant(100, 'A', 'C').normalized() == VcfVariant(100, 'A', 'C')
    assert VcfVariant(100, '', 'A').normalized() == VcfVariant(100, '', 'A')
    assert VcfVariant(100, 'A', '').normalized() == VcfVariant(100, 'A', '')
    assert VcfVariant(100, 'A', 'AC').normalized() == VcfVariant(101, '', 'C')
    assert VcfVariant(100, 'AC', 'A').normalized() == VcfVariant(101, 'C', '')
    assert VcfVariant(100, 'ACAGACC',
                      'ACAGACT').normalized() == VcfVariant(106, 'C', 'T')
    assert VcfVariant(100, 'GCTG',
                      'GCTAAA').normalized() == VcfVariant(103, 'G', 'AAA')
    assert VcfVariant(100, 'ATTA',
                      'ATA').normalized() == VcfVariant(101, 'T', '')
    assert VcfVariant(100, 'ATTTC',
                      'ATTTTTTC').normalized() == VcfVariant(101, '', 'TTT')
    assert VcfVariant(100, 'GCTGTT',
                      'GCTAAATT').normalized() == VcfVariant(103, 'G', 'AAA')
Beispiel #4
0
def test_phasing_to_reads():
    for filename in [
            'tests/data/phased-via-HP.vcf', 'tests/data/phased-via-PS.vcf'
    ]:
        tables = list(VcfReader(filename, phases=True))
        assert len(tables) == 2
        table_a, table_b = tables
        phase_reads_sample1 = list(
            table_a.phased_blocks_as_reads('sample1',
                                           table_a.variants,
                                           17,
                                           18,
                                           default_quality=90,
                                           mapq=101))
        print(phase_reads_sample1)
        assert len(phase_reads_sample1) == 1
        read = phase_reads_sample1[0]
        assert len(read) == 2
        assert read.name == 'sample1_block_300'
        assert read.source_id == 17
        assert read.mapqs == (101, )
        assert read[0].position == 300 - 1
        assert read[0].allele == 1
        assert read[0].quality == 23
        assert read[1].position == 350 - 1
        assert read[1].allele == 0
        assert read[1].quality == 42

        phase_reads_sample2 = list(
            table_a.phased_blocks_as_reads('sample2',
                                           table_a.variants,
                                           11,
                                           12,
                                           default_quality=91,
                                           mapq=102))
        print(phase_reads_sample2)
        assert len(phase_reads_sample2) == 2
        read1, read2 = phase_reads_sample2
        assert len(read1) == len(read2) == 2
        if read1[0].position > read2[0].position:
            read1, read2 = read2, read1
        assert read1.name == 'sample2_block_100'
        assert read1.source_id == 11
        assert read1.mapqs == (102, )
        assert read1[0].position == 100 - 1
        assert read1[0].allele == 0
        assert read1[0].quality == 10
        assert read1[1].position == 150 - 1
        assert read1[1].allele == 1
        assert read1[1].quality == 20
        assert read2.name == 'sample2_block_300'
        assert read2.source_id == 11
        assert read2.mapqs == (102, )
        assert read2[0].position == 300 - 1
        assert read2[0].allele == 0
        assert read2[0].quality == 30
        assert read2[1].position == 350 - 1
        assert read2[1].allele == 0
        assert read2[1].quality == 91

        variants = [
            VcfVariant(350 - 1, 'G', 'T'),
            VcfVariant(300 - 1, 'G', 'T'),
            VcfVariant(17, 'A', 'TTC'),
            VcfVariant(1000, 'C', 'G')
        ]
        phase_reads_sample2 = list(
            table_a.phased_blocks_as_reads('sample2',
                                           variants,
                                           11,
                                           12,
                                           default_quality=91,
                                           mapq=102))
        print(phase_reads_sample2)
        assert len(phase_reads_sample2) == 1
        read = phase_reads_sample2[0]
        assert len(read) == 2
        assert read.name == 'sample2_block_300'
        assert read.source_id == 11
        assert read.mapqs == (102, )
        assert read[0].position == 300 - 1
        assert read[0].allele == 0
        assert read[0].quality == 30
        assert read[1].position == 350 - 1
        assert read[1].allele == 0
        assert read[1].quality == 91
Beispiel #5
0
def test_normalize():
    assert VcfVariant(100, "A", "C").normalized() == VcfVariant(100, "A", "C")
    assert VcfVariant(100, "", "A").normalized() == VcfVariant(100, "", "A")
    assert VcfVariant(100, "A", "").normalized() == VcfVariant(100, "A", "")
    assert VcfVariant(100, "A", "AC").normalized() == VcfVariant(101, "", "C")
    assert VcfVariant(100, "AC", "A").normalized() == VcfVariant(101, "C", "")
    assert VcfVariant(100, "ACAGACC", "ACAGACT").normalized() == VcfVariant(106, "C", "T")
    assert VcfVariant(100, "GCTG", "GCTAAA").normalized() == VcfVariant(103, "G", "AAA")
    assert VcfVariant(100, "ATTA", "ATA").normalized() == VcfVariant(101, "T", "")
    assert VcfVariant(100, "ATTTC", "ATTTTTTC").normalized() == VcfVariant(101, "", "TTT")
    assert VcfVariant(100, "GCTGTT", "GCTAAATT").normalized() == VcfVariant(103, "G", "AAA")