def test_path_with_affine(): readset, var_pos, clustering, genotypes = create_testinstance1() ploidy = 3 index, rev_index = get_position_map(readset) num_vars = len(rev_index) positions = get_cluster_start_end_positions(readset, clustering, index) coverage = get_coverage(readset, clustering, index) cov_map = get_pos_to_clusters_map(coverage, ploidy) consensus = get_local_cluster_consensus(readset, clustering, cov_map, positions) path = compute_threading_path( readset, clustering, num_vars, coverage, cov_map, consensus, ploidy, genotypes ) cluster_paths = ["".join([str(path[i][j]) for i in range(len(path))]) for j in range(3)] first_block = set([cluster_paths[0][:9], cluster_paths[1][:9], cluster_paths[2][:9]]) first_truth = set(["000000000", "111111111", "044444444"]) second_block = set([cluster_paths[0][9:20], cluster_paths[1][9:20], cluster_paths[2][9:20]]) second_truth = set(["33333333333", "22222222222", "44444555555"]) third_block = set([cluster_paths[0][20:], cluster_paths[1][20:], cluster_paths[2][20:]]) third_truth = set(["66", "77", "55"]) print(cluster_paths) assert first_block == first_truth assert second_block == second_truth assert third_block == third_truth
def find_inconsistencies(readset, clustering, ploidy): # Returns the number of cluster positions with inconsistencies # (counts position multiple times, if multiple clusters are inconsistent there) # Also returns a list of read pairs, which need to be seperated num_inconsistent_positions = 0 separated_pairs = [] exp_error = 0.05 p_val_threshold = 0.02 # Compute consensus and coverage index, rev_index = get_position_map(readset) num_vars = len(rev_index) coverage = get_coverage(readset, clustering, index) cov_map = get_pos_to_clusters_map(coverage, ploidy) positions = get_cluster_start_end_positions(readset, clustering, index) abs_coverage = get_coverage_absolute(readset, clustering, index) consensus = get_local_cluster_consensus_withfrac(readset, clustering, cov_map, positions) # Search for positions in clusters with ambivalent consensus for pos in range(num_vars): # print(str(pos)+" -> "+str(len(coverage[pos]))+" , "+str(len(consensus[pos]))) for c_id in coverage[pos]: if c_id not in consensus[pos]: continue # do binomial hypothesis test, whether the deviations from majority allele is significant enough for splitting abs_count = abs_coverage[pos][c_id] abs_deviations = int(abs_count * (1 - consensus[pos][c_id][1])) p_val = binom_test(abs_deviations, abs_count, exp_error, alternative="greater") if p_val < p_val_threshold: # print(" inconsistency in cluster "+str(c_id)+" at position"+str(pos)+" with coverage "+str(coverage[pos][c_id])+" and consensus "+str(consensus[pos][c_id])) num_inconsistent_positions += 1 zero_reads = [] one_reads = [] for read in clustering[c_id]: for var in readset[read]: if index[var.position] == pos: if var.allele == 0: zero_reads.append(read) else: one_reads.append(read) for r0 in zero_reads: for r1 in one_reads: separated_pairs.append((r0, r1)) return num_inconsistent_positions, separated_pairs
def draw_plots( block_readsets, clustering, threading, haplotypes, cut_positions, genotype_list_multi, phasable_variant_table, plot_clusters, plot_threading, output, ): # Plot options logger.info("Generating plots ...") combined_readset = ReadSet() for block_readset in block_readsets: for read in block_readset: combined_readset.add(read) if plot_clusters: draw_clustering( combined_readset, clustering, phasable_variant_table, output + ".clusters.pdf", genome_space=False, ) if plot_threading: index, rev_index = get_position_map(combined_readset) coverage = get_coverage(combined_readset, clustering, index) draw_threading( combined_readset, clustering, coverage, threading, cut_positions, haplotypes, phasable_variant_table, genotype_list_multi, output + ".threading.pdf", )
def test_auxiliary_datastructures(): # test postion map readset, var_pos, _, _ = create_testinstance1() index, rev_index = get_position_map(readset) for i in range(len(var_pos)): assert index[var_pos[i]] == i assert rev_index == var_pos # test relative coverage clustering = [ [0, 4, 6], [1, 2], [7, 10, 13], [9, 12, 14], [3, 5, 8, 11], [15, 16], [17], [18], ] cov = get_coverage(readset, clustering, index) assert cov[0] == {0: 0.5, 1: 0.5} assert cov[1] == {0: 0.25, 1: 0.5, 4: 0.25} assert cov[2] == {0: 1 / 3, 1: 1 / 3, 4: 1 / 3} assert cov[3] == {0: 3 / 7, 1: 2 / 7, 4: 2 / 7} assert cov[4] == {0: 3 / 8, 1: 2 / 8, 4: 3 / 8} assert cov[5] == {0: 3 / 9, 1: 2 / 9, 4: 4 / 9} assert cov[6] == {0: 3 / 9, 1: 2 / 9, 4: 4 / 9} assert cov[7] == {0: 2 / 9, 1: 2 / 9, 2: 1 / 9, 4: 4 / 9} assert cov[8] == {0: 2 / 10, 1: 1 / 10, 2: 2 / 10, 3: 1 / 10, 4: 4 / 10} assert cov[9] == {0: 2 / 11, 1: 1 / 11, 2: 2 / 11, 3: 2 / 11, 4: 4 / 11} assert cov[10] == {0: 1 / 11, 2: 3 / 11, 3: 3 / 11, 4: 4 / 11} assert cov[11] == {0: 1 / 10, 2: 3 / 10, 3: 3 / 10, 4: 3 / 10} assert cov[12] == {2: 3 / 8, 3: 3 / 8, 4: 2 / 8} assert cov[13] == {2: 3 / 7, 3: 3 / 7, 4: 1 / 7} assert cov[14] == {2: 3 / 8, 3: 3 / 8, 5: 2 / 8} assert cov[15] == {2: 3 / 8, 3: 3 / 8, 5: 2 / 8} assert cov[16] == {2: 3 / 10, 3: 3 / 10, 5: 2 / 10, 6: 1 / 10, 7: 1 / 10} assert cov[17] == {2: 2 / 9, 3: 3 / 9, 5: 2 / 9, 6: 1 / 9, 7: 1 / 9} assert cov[18] == {2: 1 / 7, 3: 2 / 7, 5: 2 / 7, 6: 1 / 7, 7: 1 / 7} assert cov[19] == {2: 1 / 6, 3: 1 / 6, 5: 2 / 6, 6: 1 / 6, 7: 1 / 6} assert cov[20] == {5: 2 / 4, 6: 1 / 4, 7: 1 / 4} assert cov[21] == {5: 2 / 4, 6: 1 / 4, 7: 1 / 4} # test absolute coverage abs_cov = get_coverage_absolute(readset, clustering, index) assert abs_cov[0] == {0: 1, 1: 1} assert abs_cov[1] == {0: 1, 1: 2, 4: 1} assert abs_cov[2] == {0: 2, 1: 2, 4: 2} assert abs_cov[3] == {0: 3, 1: 2, 4: 2} assert abs_cov[4] == {0: 3, 1: 2, 4: 3} assert abs_cov[5] == {0: 3, 1: 2, 4: 4} assert abs_cov[6] == {0: 3, 1: 2, 4: 4} assert abs_cov[7] == {0: 2, 1: 2, 2: 1, 4: 4} assert abs_cov[8] == {0: 2, 1: 1, 2: 2, 3: 1, 4: 4} assert abs_cov[9] == {0: 2, 1: 1, 2: 2, 3: 2, 4: 4} assert abs_cov[10] == {0: 1, 2: 3, 3: 3, 4: 4} assert abs_cov[11] == {0: 1, 2: 3, 3: 3, 4: 3} assert abs_cov[12] == {2: 3, 3: 3, 4: 2} assert abs_cov[13] == {2: 3, 3: 3, 4: 1} assert abs_cov[14] == {2: 3, 3: 3, 5: 2} assert abs_cov[15] == {2: 3, 3: 3, 5: 2} assert abs_cov[16] == {2: 3, 3: 3, 5: 2, 6: 1, 7: 1} assert abs_cov[17] == {2: 2, 3: 3, 5: 2, 6: 1, 7: 1} assert abs_cov[18] == {2: 1, 3: 2, 5: 2, 6: 1, 7: 1} assert abs_cov[19] == {2: 1, 3: 1, 5: 2, 6: 1, 7: 1} assert abs_cov[20] == {5: 2, 6: 1, 7: 1} assert abs_cov[21] == {5: 2, 6: 1, 7: 1}