def test_calc_dp_means(self): snps = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') means = calc_depth_mean_by_sample(snps) means2 = calc_depth_mean_by_sample(snps, chunk_size=None) assert means.shape[0] == 153 assert numpy.allclose(means, means2)
def test_filter_samples_by_pseudohet(self): snps = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') sample_dp_means = calc_depth_mean_by_sample(snps) flt = PseudoHetDuplicationFilter2(sample_dp_means=sample_dp_means, max_high_dp_freq=0.01, do_histogram=True, report_selection=True) res = flt(snps) assert res[FLT_STATS]['tot'] == snps.num_variations assert min(res[EDGES]) < 1 assert max(res[EDGES]) > 0 assert res[FLT_VARS].num_variations == res[FLT_STATS]['n_kept'] assert res[SELECTED_VARS].shape # some samples samples = snps.samples[:50] sample_dp_means = sample_dp_means[:50] flt = PseudoHetDuplicationFilter2(sample_dp_means=sample_dp_means, max_high_dp_freq=0.01, do_histogram=True, samples=samples) res = flt(snps) assert res[FLT_STATS]['tot'] == snps.num_variations assert min(res[EDGES]) < 1 assert max(res[EDGES]) > 0 assert res[FLT_VARS].num_variations == res[FLT_STATS]['n_kept']