Ejemplo n.º 1
0
    def test_calc_missing_memory(self):
        variations = Variations()
        gts = np.array([[[0, 0], [0, 0]], [[0, 0], [-1, -1]], [[0, 0],
                                                               [-1, -1]],
                        [[-1, -1], [-1, -1]]])
        samples = [str(i) for i in range(gts.shape[1])]
        variations.samples = np.array(samples)
        variations[GT_FIELD] = gts

        result = calc_missing_gt(variations, rates=False)

        expected = np.array([2, 1, 1, 0])
        assert np.all(result == 2 - expected)

        gts = np.array([[[0, 0], [0, 0], [0, 0], [0, 0], [0, -1]],
                        [[0, 0], [0, 0], [0, 0], [0, 0], [-1, -1]],
                        [[0, 0], [0, 0], [0, 0], [-1, -1], [-1, -1]],
                        [[0, 0], [-1, -1], [-1, -1], [-1, -1], [-1, -1]]])
        samples = [str(i) for i in range(gts.shape[1])]
        variations = Variations()
        variations.samples = np.array(samples)
        variations[GT_FIELD] = gts
        result = calc_missing_gt(variations, rates=False)
        #         result = compute(task)
        expected = np.array([0.5, 1, 2, 4])
        assert np.all(result == expected)
Ejemplo n.º 2
0
    def test_calc_missing(self):
        variations = create_non_materialized_snp_filtered_variations()
        variations = keep_samples(variations, samples=['pepo',
                                                       'upv196'])[FLT_VARS]
        task = calc_missing_gt(variations, rates=False)
        result = compute({'num_missing_gts': task})
        self.assertTrue(
            np.array_equal(result['num_missing_gts'], [1, 1, 1, 0, 2, 2, 1]))
        variations = create_non_materialized_snp_filtered_variations()
        variations = keep_samples(variations, samples=['pepo',
                                                       'upv196'])[FLT_VARS]
        task = calc_missing_gt(variations, rates=True)
        result = compute({'num_missing_gts': task})
        expected = [0.5, 0.5, 0.5, 0, 1, 1, 0.5]
        for a, b in zip(result['num_missing_gts'], expected):
            self.assertAlmostEqual(a, b, places=2)

        variations = create_dask_variations()
        task = calc_missing_gt_per_sample(variations, rates=True)
        result = compute({'num_missing_gts': task})

        variations = create_non_materialized_snp_filtered_variations()
        try:
            task = calc_missing_gt_per_sample(variations, rates=True)
            self.fail('NotMaterializedError expected')
        except NotMaterializedError:
            pass

        variations = create_dask_variations()
        task = calc_missing_gt_per_sample(variations, rates=False)
        result = compute({'num_missing_gts': task})
Ejemplo n.º 3
0
def remove_low_call_rate_vars(variations,
                              min_call_rate,
                              rates=True,
                              filter_id='call_rate',
                              calc_histogram=False,
                              n_bins=DEF_NUM_BINS,
                              limits=None):
    num_missing_gts = calc_missing_gt(variations, rates=rates)
    if rates:
        num_called = 1 - num_missing_gts
    else:
        num_called = utils_array.get_shape_item(variations.gt,
                                                1) - num_missing_gts

    selected_vars = num_called >= min_call_rate
    variations = variations.get_vars(selected_vars)

    num_selected_vars = va.count_nonzero(selected_vars)
    num_filtered = va.count_nonzero(va.logical_not(selected_vars))

    flt_stats = {N_KEPT: num_selected_vars, N_FILTERED_OUT: num_filtered}

    if calc_histogram:
        limits = (0, 1) if rates else (0, len(variations.num_samples))
        counts, bin_edges = va.histogram(num_called,
                                         n_bins=n_bins,
                                         limits=limits)
        flt_stats[COUNT] = counts
        flt_stats[BIN_EDGES] = bin_edges
        flt_stats[HIST_RANGE] = [min_call_rate]

    return {FLT_VARS: variations, FLT_ID: filter_id, FLT_STATS: flt_stats}
Ejemplo n.º 4
0
 def test_calc_missing(self):
     variations = _create_dask_variations()
     variations = keep_samples(variations, samples=['pepo',
                                                    'upv196'])[FLT_VARS]
     task = calc_missing_gt(variations, rates=False)
     result = compute({'num_missing_gts': task})
     self.assertTrue(
         np.array_equal(result['num_missing_gts'], [1, 1, 1, 0, 2, 2, 1]))
     variations = _create_dask_variations()
     variations = keep_samples(variations, samples=['pepo',
                                                    'upv196'])[FLT_VARS]
     task = calc_missing_gt(variations, rates=True)
     result = compute({'num_missing_gts': task})
     expected = [0.5, 0.5, 0.5, 0, 1, 1, 0.5]
     for a, b in zip(result['num_missing_gts'], expected):
         self.assertAlmostEqual(a, b, places=2)
Ejemplo n.º 5
0
def _get_gts_non_missing_in_both(vars1, vars2):
    num_missing_gts1 = calc_missing_gt(vars1, rates=True)
    num_missing_gts2 = calc_missing_gt(vars2, rates=True)

    is_called = va.logical_not(
        va.logical_or(num_missing_gts1, num_missing_gts2))

    gts1 = vars1[GT_FIELD]
    gts2 = vars2[GT_FIELD]

    gts1 = gts1[is_called]
    gts2 = gts2[is_called]
    indi1 = gts1[:, 0]
    indi2 = gts2[:, 0]

    return indi1, indi2
Ejemplo n.º 6
0
    def test_calc_missing_empty_vars(self):
        variations = _create_empty_dask_variations()

        task = calc_missing_gt(variations, rates=True)
        result = compute(task)
        self.assertEqual(result.shape, (0, ))