Example #1
0
def test_TRRecord_GetGenotypes():
    # Test good example
    ref_allele = "CAGCAGCAG"
    alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"]
    rec = trh.TRRecord(dummy_record1, ref_allele, alt_alleles, "CAG", "", None)
    print(rec)  # To test str function
    true_gts = [[ref_allele, alt_alleles[0]], [alt_alleles[0], alt_alleles[0]],
                [alt_alleles[0], alt_alleles[0]],
                [alt_alleles[0], alt_alleles[1]],
                [alt_alleles[1], alt_alleles[1]], [ref_allele]]
    true_len_gts = [[3, 4], [4, 4], [4, 4], [4, 6], [6, 6], [3]]
    ind = 0
    for sample in rec.vcfrecord:
        stringt = rec.GetStringGenotype(sample)
        lengt = rec.GetLengthGenotype(sample)
        assert (all([(stringt[i] == true_gts[ind][i])
                     for i in range(len(stringt))]))
        assert (all([(lengt[i] == true_len_gts[ind][i])
                     for i in range(len(lengt))]))
        ind += 1
    # Test example where alt=[]
    rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None)
    print(rec)  # To test str function
    for sample in rec.vcfrecord:
        stringt = rec.GetStringGenotype(sample)
        lengt = rec.GetLengthGenotype(sample)
        assert (all([item == ref_allele for item in stringt]))
        assert (all([item == 3 for item in lengt]))
    # Test example with discrepancy between alt_alleles and genotypes given
    with pytest.raises(ValueError):
        trh.TRRecord(dummy_record1, ref_allele, [], "CAG", "", None)
Example #2
0
def test_GetMaxAllele():
    # Test good example, no samplelist, uselength=True (default)
    ref_allele = "CAGCAGCAG"
    alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"]
    rec = trh.TRRecord(dummy_record1, ref_allele, alt_alleles, "CAG", "", None)
    print(rec)  # To test str function
    true_al_max = 6.0
    al_max = rec.GetMaxAllele()
    assert al_max == true_al_max

    # Test good example with samplelist
    true_al_freqs_slist = {ref_allele: 0.4, alt_alleles[0]: 0.6}
    true_len_al_freqs_slist = {3: 0.4, 4: 0.6}
    slist = ['S1', 'S3', 'S6']
    true_al_max_slist = 4.0
    al_max_slist = rec.GetMaxAllele(samplelist=slist)
    assert al_max_slist == true_al_max_slist

    # Test example where alt=[]
    rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None)
    true_al_max = 3.0
    al_max = rec.GetMaxAllele()
    assert al_max == true_al_max

    # Test example with non of samples in samplelist in VCF
    rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None)
    true_al_max_slist = np.nan
    al_max_slist = rec.GetMaxAllele(samplelist=['NonExistentSample'])
    assert np.isnan(al_max_slist) == True

    # Test example where that has one uncalled sample only
    rec = trh.TRRecord(dummy_record4, ref_allele, [], "CAG", "", None)
    true_al_max = np.nan
    al_max = rec.GetMaxAllele()
    assert np.isnan(al_max) == True
Example #3
0
def test_TRRecord_full_alleles():
    full_ref = "TCAGCAGCAGA"
    full_alts = [
        "ACAGCAGCAGCAGC",
        "ACAGCAGCAGCAGCAGCAGG",
        "ACAGCAGCAGCAGG",
        "ACAGCAGCAGG",
        "TCAGCAGG",
    ]
    ref_allele = full_ref[1:-1]
    alt_alleles = []
    for allele in full_alts:
        alt_alleles.append(allele[1:-1])
    motif = 'FOO'
    ID = 'BAR'

    with pytest.raises(ValueError):
        trh.TRRecord(dummy_record1,
                     None,
                     None,
                     motif,
                     ID,
                     None,
                     full_alleles=(full_ref, full_alts))
    with pytest.raises(ValueError):
        trh.TRRecord(dummy_record1,
                     ref_allele,
                     alt_alleles,
                     motif,
                     ID,
                     None,
                     full_alleles=(["CAGCAGCAQQQQQQQQQQQQQQQ"], full_alts))
    with pytest.raises(ValueError):
        bad_alts = ["CAGCAGCAQQQQQQQQQQQQQQQ", full_alts[1]]
        trh.TRRecord(dummy_record1,
                     ref_allele,
                     alt_alleles,
                     motif,
                     ID,
                     None,
                     full_alleles=(ref_allele, bad_alts))

    record = trh.TRRecord(dummy_record1,
                          ref_allele,
                          alt_alleles,
                          motif,
                          ID,
                          None,
                          full_alleles=(ref_allele, alt_alleles))

    assert record.UniqueStringGenotypes() == {0, 1, 2, 5}
    assert record.UniqueStringGenotypeMapping() == {
        0: 0,
        1: 1,
        2: 2,
        3: 1,
        4: 0,
        5: 5
    }
Example #4
0
def test_GetAlleleCounts():
    # Test good example, no samplelist, uselength=True (default)
    ref_allele = "CAGCAGCAG"
    alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"]
    rec = trh.TRRecord(dummy_record1, ref_allele, alt_alleles, "CAG", "", None)
    print(rec)  # To test str function
    true_al_counts = {ref_allele: 2, alt_alleles[0]: 6, alt_alleles[1]: 3}
    true_len_al_counts = {3: 2, 4: 6, 6: 3}

    al_counts_uselength = rec.GetAlleleCounts()
    al_counts_nolength = rec.GetAlleleCounts(uselength=False)
    assert (all(v == true_len_al_counts[k]
                for k, v in al_counts_uselength.items())
            and len(al_counts_uselength) == len(true_len_al_counts))
    assert (all(v == true_al_counts[k] for k, v in al_counts_nolength.items())
            and len(al_counts_nolength) == len(true_al_counts))

    # Test good example with samplelist
    true_al_counts_slist = {ref_allele: 2, alt_alleles[0]: 3}
    true_len_al_counts_slist = {3: 2, 4: 3}
    slist = ['S1', 'S3', 'S6']
    al_counts_uselength_slist = rec.GetAlleleCounts(samplelist=slist)
    al_counts_nolength_slist = rec.GetAlleleCounts(samplelist=slist,
                                                   uselength=False)

    assert (all(v == true_len_al_counts_slist[k]
                for k, v in al_counts_uselength_slist.items()) and
            len(al_counts_uselength_slist) == len(true_len_al_counts_slist))
    assert (all(v == true_al_counts_slist[k]
                for k, v in al_counts_nolength_slist.items())
            and len(al_counts_nolength_slist) == len(true_al_counts_slist))

    # Test example where alt=[]
    rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None)
    true_len_al_counts = {3: 9}
    al_counts_uselength = rec.GetAlleleCounts()
    assert (all(v == true_len_al_counts[k]
                for k, v in al_counts_uselength.items())
            and len(al_counts_uselength) == len(true_len_al_counts))

    # Test example with non of samples in samplelist in VCF
    rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None)
    true_len_al_counts_slist = {}
    al_counts_uselength_slist = rec.GetAlleleCounts(
        samplelist=['NonExistentSample'])
    assert (all(v == true_len_al_counts_slist[k]
                for k, v in al_counts_uselength_slist.items()) and
            len(al_counts_uselength_slist) == len(true_len_al_counts_slist))

    # Test example where that has one uncalled sample only
    rec = trh.TRRecord(dummy_record4, ref_allele, [], "CAG", "", None)
    true_len_al_counts = {}
    al_counts_uselength = rec.GetAlleleCounts()
    assert (all(v == true_len_al_counts[k]
                for k, v in al_counts_uselength.items())
            and len(al_counts_uselength) == len(true_len_al_counts))
Example #5
0
def test_TRRecord_allele_lengths():
    ref_allele = "CAGCAGCAG"
    alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"]
    motif = 'FOO'
    ID = 'BAR'

    # alt alleles
    with pytest.raises(ValueError):
        trh.TRRecord(dummy_record1,
                     ref_allele,
                     alt_alleles,
                     motif,
                     ID,
                     "some_field",
                     alt_allele_lengths=[4, 6])

    record = trh.TRRecord(dummy_record1,
                          ref_allele,
                          None,
                          motif,
                          ID,
                          "some_field",
                          alt_allele_lengths=[4, 5.5])
    assert record.alt_alleles == [motif * 4, motif * 5 + "F"]

    # ref allele
    with pytest.raises(ValueError):
        trh.TRRecord(dummy_record1,
                     ref_allele,
                     alt_alleles,
                     motif,
                     ID,
                     None,
                     ref_allele_length=5)

    with pytest.raises(ValueError):
        trh.TRRecord(dummy_record1,
                     None,
                     alt_alleles,
                     motif,
                     ID,
                     None,
                     ref_allele_length=5)

    record = trh.TRRecord(dummy_record1,
                          None,
                          None,
                          motif,
                          ID,
                          None,
                          ref_allele_length=5.5,
                          alt_allele_lengths=[4, 5.5])
    assert record.ref_allele == motif * 5 + 'F'
Example #6
0
def test_TRRecord_unique_lengths():
    record = trh.TRRecord(dummy_record2, "ACGACGACG",
                          ["ACGAAGACG", "ACGACGACGACG", "ACGACGACAACG"], "ACG",
                          "ACG-repeat", None)

    assert record.UniqueLengthGenotypes() == {0, 2}
    assert record.UniqueLengthGenotypeMapping() == {0: 0, 1: 0, 2: 2, 3: 2}
Example #7
0
def test_TRRecord_print():
    ref = "ABC"
    alt = ["DEF", "GHI"]
    motif = "foo"
    ID = "bar"
    record = trh.TRRecord(dummy_record1, ref, alt, motif, ID)
    assert str(record) == "{} {} {} {},{}".format(ID, motif, ref, alt[0],
                                                  alt[1])

    record = trh.TRRecord(dummy_record1, ref, alt, motif, None)
    assert str(record) == "{}:{} {} {} {},{}".format(dummy_record1.CHROM,
                                                     dummy_record1.POS, motif,
                                                     ref, alt[0], alt[1])

    record = trh.TRRecord(dummy_record1,
                          "B", ["E", "H"],
                          motif,
                          ID,
                          full_alleles=(ref, alt))
    assert str(record) == "{} {} {} {},{}".format(ID, motif, ref, alt[0],
                                                  alt[1])
    record = trh.TRRecord(dummy_record1,
                          ref,
                          None,
                          motif,
                          ID,
                          alt_allele_lengths=[3, 5.5])
    assert str(record) == "{} {} {} n_reps:3,n_reps:5.5".format(ID, motif, ref)
    record = trh.TRRecord(dummy_record1,
                          None,
                          None,
                          motif,
                          ID,
                          ref_allele_length=7,
                          alt_allele_lengths=[3, 5.5])
    assert str(record) == ("{} {} n_reps:7 n_reps:3,n_reps:5.5".format(
        ID, motif))
Example #8
0
def test_GetGenotypeCounts():
    # Test good example, no samplelist, uselength=True (default)
    ref_allele = "CAGCAGCAG"
    alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"]
    rec = trh.TRRecord(dummy_record1, ref_allele, alt_alleles, "CAG", "", None)
    print(rec)  # To test str function
    true_gt_counts = {
        (ref_allele, alt_alleles[0]): 1,
        (alt_alleles[0], alt_alleles[0]): 2,
        (alt_alleles[0], alt_alleles[1]): 1,
        (alt_alleles[1], alt_alleles[1]): 1,
        (ref_allele, ): 1
    }
    true_len_gt_counts = {(3, 4): 1, (4, 4): 2, (4, 6): 1, (6, 6): 1, (3, ): 1}

    gt_counts_uselength = rec.GetGenotypeCounts()
    gt_counts_nolength = rec.GetGenotypeCounts(uselength=False)
    assert (all(v == true_len_gt_counts[k]
                for k, v in gt_counts_uselength.items())
            and len(gt_counts_uselength) == len(true_len_gt_counts))
    assert (all(v == true_gt_counts[k] for k, v in gt_counts_nolength.items())
            and len(gt_counts_nolength) == len(true_gt_counts))

    # Test good example with samplelist
    true_gt_counts_slist = {
        (ref_allele, alt_alleles[0]): 1,
        (alt_alleles[0], alt_alleles[0]): 1,
        (ref_allele, ): 1
    }
    true_len_gt_counts_slist = {(3, 4): 1, (4, 4): 1, (3, ): 1}
    slist = ['S1', 'S3', 'S6']
    gt_counts_uselength_slist = rec.GetGenotypeCounts(samplelist=slist)
    gt_counts_nolength_slist = rec.GetGenotypeCounts(samplelist=slist,
                                                     uselength=False)
    assert (all(v == true_len_gt_counts_slist[k]
                for k, v in gt_counts_uselength_slist.items()) and
            len(gt_counts_uselength_slist) == len(true_len_gt_counts_slist))
    assert (all(v == true_gt_counts_slist[k]
                for k, v in gt_counts_nolength_slist.items())
            and len(gt_counts_nolength_slist) == len(true_gt_counts_slist))

    # Test example where alt=[]
    rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None)
    true_len_gt_counts = {(3, 3, 3): 1, (3, 3): 3}
    gt_counts_uselength = rec.GetGenotypeCounts()
    assert (all(v == true_len_gt_counts[k]
                for k, v in gt_counts_uselength.items())
            and len(gt_counts_uselength) == len(true_len_gt_counts))

    # Test example with non of samples in samplelist in VCF
    rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None)
    true_len_gt_counts_slist = {}
    gt_counts_uselength_slist = \
        rec.GetGenotypeCounts(samplelist=['NonExistentSample'])
    assert (all(v == true_len_gt_counts_slist[k]
                for k, v in gt_counts_uselength_slist.items()) and
            len(gt_counts_uselength_slist) == len(true_len_gt_counts_slist))

    # Test example where that has one uncalled sample only
    rec = trh.TRRecord(dummy_record4, ref_allele, [], "CAG", "", None)
    true_len_gt_counts = {}
    gt_counts_uselength = rec.GetGenotypeCounts()
    assert (all(v == true_len_gt_counts[k]
                for k, v in gt_counts_uselength.items())
            and len(gt_counts_uselength) == len(true_len_gt_counts))
Example #9
0
def test_TRRecord_iter():
    record = trh.TRRecord(dummy_record1, "ACG", ["A", "C", "G", "T"], "FOO",
                          "BAR", "some_field")
    record_iter = iter(record)
    assert next(record_iter) == dummy_sample1
    assert next(record_iter) == dummy_sample2