def test_TRRecord_GetGenotypes(): # Test good example ref_allele = "CAGCAGCAG" alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"] rec = trh.TRRecord(dummy_record1, ref_allele, alt_alleles, "CAG", "", None) print(rec) # To test str function true_gts = [[ref_allele, alt_alleles[0]], [alt_alleles[0], alt_alleles[0]], [alt_alleles[0], alt_alleles[0]], [alt_alleles[0], alt_alleles[1]], [alt_alleles[1], alt_alleles[1]], [ref_allele]] true_len_gts = [[3, 4], [4, 4], [4, 4], [4, 6], [6, 6], [3]] ind = 0 for sample in rec.vcfrecord: stringt = rec.GetStringGenotype(sample) lengt = rec.GetLengthGenotype(sample) assert (all([(stringt[i] == true_gts[ind][i]) for i in range(len(stringt))])) assert (all([(lengt[i] == true_len_gts[ind][i]) for i in range(len(lengt))])) ind += 1 # Test example where alt=[] rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None) print(rec) # To test str function for sample in rec.vcfrecord: stringt = rec.GetStringGenotype(sample) lengt = rec.GetLengthGenotype(sample) assert (all([item == ref_allele for item in stringt])) assert (all([item == 3 for item in lengt])) # Test example with discrepancy between alt_alleles and genotypes given with pytest.raises(ValueError): trh.TRRecord(dummy_record1, ref_allele, [], "CAG", "", None)
def test_GetMaxAllele(): # Test good example, no samplelist, uselength=True (default) ref_allele = "CAGCAGCAG" alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"] rec = trh.TRRecord(dummy_record1, ref_allele, alt_alleles, "CAG", "", None) print(rec) # To test str function true_al_max = 6.0 al_max = rec.GetMaxAllele() assert al_max == true_al_max # Test good example with samplelist true_al_freqs_slist = {ref_allele: 0.4, alt_alleles[0]: 0.6} true_len_al_freqs_slist = {3: 0.4, 4: 0.6} slist = ['S1', 'S3', 'S6'] true_al_max_slist = 4.0 al_max_slist = rec.GetMaxAllele(samplelist=slist) assert al_max_slist == true_al_max_slist # Test example where alt=[] rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None) true_al_max = 3.0 al_max = rec.GetMaxAllele() assert al_max == true_al_max # Test example with non of samples in samplelist in VCF rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None) true_al_max_slist = np.nan al_max_slist = rec.GetMaxAllele(samplelist=['NonExistentSample']) assert np.isnan(al_max_slist) == True # Test example where that has one uncalled sample only rec = trh.TRRecord(dummy_record4, ref_allele, [], "CAG", "", None) true_al_max = np.nan al_max = rec.GetMaxAllele() assert np.isnan(al_max) == True
def test_TRRecord_full_alleles(): full_ref = "TCAGCAGCAGA" full_alts = [ "ACAGCAGCAGCAGC", "ACAGCAGCAGCAGCAGCAGG", "ACAGCAGCAGCAGG", "ACAGCAGCAGG", "TCAGCAGG", ] ref_allele = full_ref[1:-1] alt_alleles = [] for allele in full_alts: alt_alleles.append(allele[1:-1]) motif = 'FOO' ID = 'BAR' with pytest.raises(ValueError): trh.TRRecord(dummy_record1, None, None, motif, ID, None, full_alleles=(full_ref, full_alts)) with pytest.raises(ValueError): trh.TRRecord(dummy_record1, ref_allele, alt_alleles, motif, ID, None, full_alleles=(["CAGCAGCAQQQQQQQQQQQQQQQ"], full_alts)) with pytest.raises(ValueError): bad_alts = ["CAGCAGCAQQQQQQQQQQQQQQQ", full_alts[1]] trh.TRRecord(dummy_record1, ref_allele, alt_alleles, motif, ID, None, full_alleles=(ref_allele, bad_alts)) record = trh.TRRecord(dummy_record1, ref_allele, alt_alleles, motif, ID, None, full_alleles=(ref_allele, alt_alleles)) assert record.UniqueStringGenotypes() == {0, 1, 2, 5} assert record.UniqueStringGenotypeMapping() == { 0: 0, 1: 1, 2: 2, 3: 1, 4: 0, 5: 5 }
def test_GetAlleleCounts(): # Test good example, no samplelist, uselength=True (default) ref_allele = "CAGCAGCAG" alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"] rec = trh.TRRecord(dummy_record1, ref_allele, alt_alleles, "CAG", "", None) print(rec) # To test str function true_al_counts = {ref_allele: 2, alt_alleles[0]: 6, alt_alleles[1]: 3} true_len_al_counts = {3: 2, 4: 6, 6: 3} al_counts_uselength = rec.GetAlleleCounts() al_counts_nolength = rec.GetAlleleCounts(uselength=False) assert (all(v == true_len_al_counts[k] for k, v in al_counts_uselength.items()) and len(al_counts_uselength) == len(true_len_al_counts)) assert (all(v == true_al_counts[k] for k, v in al_counts_nolength.items()) and len(al_counts_nolength) == len(true_al_counts)) # Test good example with samplelist true_al_counts_slist = {ref_allele: 2, alt_alleles[0]: 3} true_len_al_counts_slist = {3: 2, 4: 3} slist = ['S1', 'S3', 'S6'] al_counts_uselength_slist = rec.GetAlleleCounts(samplelist=slist) al_counts_nolength_slist = rec.GetAlleleCounts(samplelist=slist, uselength=False) assert (all(v == true_len_al_counts_slist[k] for k, v in al_counts_uselength_slist.items()) and len(al_counts_uselength_slist) == len(true_len_al_counts_slist)) assert (all(v == true_al_counts_slist[k] for k, v in al_counts_nolength_slist.items()) and len(al_counts_nolength_slist) == len(true_al_counts_slist)) # Test example where alt=[] rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None) true_len_al_counts = {3: 9} al_counts_uselength = rec.GetAlleleCounts() assert (all(v == true_len_al_counts[k] for k, v in al_counts_uselength.items()) and len(al_counts_uselength) == len(true_len_al_counts)) # Test example with non of samples in samplelist in VCF rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None) true_len_al_counts_slist = {} al_counts_uselength_slist = rec.GetAlleleCounts( samplelist=['NonExistentSample']) assert (all(v == true_len_al_counts_slist[k] for k, v in al_counts_uselength_slist.items()) and len(al_counts_uselength_slist) == len(true_len_al_counts_slist)) # Test example where that has one uncalled sample only rec = trh.TRRecord(dummy_record4, ref_allele, [], "CAG", "", None) true_len_al_counts = {} al_counts_uselength = rec.GetAlleleCounts() assert (all(v == true_len_al_counts[k] for k, v in al_counts_uselength.items()) and len(al_counts_uselength) == len(true_len_al_counts))
def test_TRRecord_allele_lengths(): ref_allele = "CAGCAGCAG" alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"] motif = 'FOO' ID = 'BAR' # alt alleles with pytest.raises(ValueError): trh.TRRecord(dummy_record1, ref_allele, alt_alleles, motif, ID, "some_field", alt_allele_lengths=[4, 6]) record = trh.TRRecord(dummy_record1, ref_allele, None, motif, ID, "some_field", alt_allele_lengths=[4, 5.5]) assert record.alt_alleles == [motif * 4, motif * 5 + "F"] # ref allele with pytest.raises(ValueError): trh.TRRecord(dummy_record1, ref_allele, alt_alleles, motif, ID, None, ref_allele_length=5) with pytest.raises(ValueError): trh.TRRecord(dummy_record1, None, alt_alleles, motif, ID, None, ref_allele_length=5) record = trh.TRRecord(dummy_record1, None, None, motif, ID, None, ref_allele_length=5.5, alt_allele_lengths=[4, 5.5]) assert record.ref_allele == motif * 5 + 'F'
def test_TRRecord_unique_lengths(): record = trh.TRRecord(dummy_record2, "ACGACGACG", ["ACGAAGACG", "ACGACGACGACG", "ACGACGACAACG"], "ACG", "ACG-repeat", None) assert record.UniqueLengthGenotypes() == {0, 2} assert record.UniqueLengthGenotypeMapping() == {0: 0, 1: 0, 2: 2, 3: 2}
def test_TRRecord_print(): ref = "ABC" alt = ["DEF", "GHI"] motif = "foo" ID = "bar" record = trh.TRRecord(dummy_record1, ref, alt, motif, ID) assert str(record) == "{} {} {} {},{}".format(ID, motif, ref, alt[0], alt[1]) record = trh.TRRecord(dummy_record1, ref, alt, motif, None) assert str(record) == "{}:{} {} {} {},{}".format(dummy_record1.CHROM, dummy_record1.POS, motif, ref, alt[0], alt[1]) record = trh.TRRecord(dummy_record1, "B", ["E", "H"], motif, ID, full_alleles=(ref, alt)) assert str(record) == "{} {} {} {},{}".format(ID, motif, ref, alt[0], alt[1]) record = trh.TRRecord(dummy_record1, ref, None, motif, ID, alt_allele_lengths=[3, 5.5]) assert str(record) == "{} {} {} n_reps:3,n_reps:5.5".format(ID, motif, ref) record = trh.TRRecord(dummy_record1, None, None, motif, ID, ref_allele_length=7, alt_allele_lengths=[3, 5.5]) assert str(record) == ("{} {} n_reps:7 n_reps:3,n_reps:5.5".format( ID, motif))
def test_GetGenotypeCounts(): # Test good example, no samplelist, uselength=True (default) ref_allele = "CAGCAGCAG" alt_alleles = ["CAGCAGCAGCAG", "CAGCAGCAGCAGCAGCAG"] rec = trh.TRRecord(dummy_record1, ref_allele, alt_alleles, "CAG", "", None) print(rec) # To test str function true_gt_counts = { (ref_allele, alt_alleles[0]): 1, (alt_alleles[0], alt_alleles[0]): 2, (alt_alleles[0], alt_alleles[1]): 1, (alt_alleles[1], alt_alleles[1]): 1, (ref_allele, ): 1 } true_len_gt_counts = {(3, 4): 1, (4, 4): 2, (4, 6): 1, (6, 6): 1, (3, ): 1} gt_counts_uselength = rec.GetGenotypeCounts() gt_counts_nolength = rec.GetGenotypeCounts(uselength=False) assert (all(v == true_len_gt_counts[k] for k, v in gt_counts_uselength.items()) and len(gt_counts_uselength) == len(true_len_gt_counts)) assert (all(v == true_gt_counts[k] for k, v in gt_counts_nolength.items()) and len(gt_counts_nolength) == len(true_gt_counts)) # Test good example with samplelist true_gt_counts_slist = { (ref_allele, alt_alleles[0]): 1, (alt_alleles[0], alt_alleles[0]): 1, (ref_allele, ): 1 } true_len_gt_counts_slist = {(3, 4): 1, (4, 4): 1, (3, ): 1} slist = ['S1', 'S3', 'S6'] gt_counts_uselength_slist = rec.GetGenotypeCounts(samplelist=slist) gt_counts_nolength_slist = rec.GetGenotypeCounts(samplelist=slist, uselength=False) assert (all(v == true_len_gt_counts_slist[k] for k, v in gt_counts_uselength_slist.items()) and len(gt_counts_uselength_slist) == len(true_len_gt_counts_slist)) assert (all(v == true_gt_counts_slist[k] for k, v in gt_counts_nolength_slist.items()) and len(gt_counts_nolength_slist) == len(true_gt_counts_slist)) # Test example where alt=[] rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None) true_len_gt_counts = {(3, 3, 3): 1, (3, 3): 3} gt_counts_uselength = rec.GetGenotypeCounts() assert (all(v == true_len_gt_counts[k] for k, v in gt_counts_uselength.items()) and len(gt_counts_uselength) == len(true_len_gt_counts)) # Test example with non of samples in samplelist in VCF rec = trh.TRRecord(dummy_record3, ref_allele, [], "CAG", "", None) true_len_gt_counts_slist = {} gt_counts_uselength_slist = \ rec.GetGenotypeCounts(samplelist=['NonExistentSample']) assert (all(v == true_len_gt_counts_slist[k] for k, v in gt_counts_uselength_slist.items()) and len(gt_counts_uselength_slist) == len(true_len_gt_counts_slist)) # Test example where that has one uncalled sample only rec = trh.TRRecord(dummy_record4, ref_allele, [], "CAG", "", None) true_len_gt_counts = {} gt_counts_uselength = rec.GetGenotypeCounts() assert (all(v == true_len_gt_counts[k] for k, v in gt_counts_uselength.items()) and len(gt_counts_uselength) == len(true_len_gt_counts))
def test_TRRecord_iter(): record = trh.TRRecord(dummy_record1, "ACG", ["A", "C", "G", "T"], "FOO", "BAR", "some_field") record_iter = iter(record) assert next(record_iter) == dummy_sample1 assert next(record_iter) == dummy_sample2