def test_interval_restriction_pval_with_lower_bigger_than_upper_limit_raises_error(
            self):
        mantissa_array, exp_array = get_mantissa_and_exp_arrays_from_pvals(
            pvals_same_exp_diff_mantissa)

        with pytest.raises(ValueError):
            IntervalRestrictionPval(1.2e-2, 1.3e-10, Dataset(mantissa_array),
                                    Dataset(exp_array))
    def test_get_interval_pval_restriction(self):
        dataset_mantissa = Dataset([1, 12, 13])
        dataset_exp = Dataset([-1, -2, -3])

        restriction = get_restriction(
            FloatInterval().set_string_tuple("1e-3:1e-2"),
            [dataset_mantissa, dataset_exp])
        assert isinstance(restriction, IntervalRestrictionPval)
Example #3
0
 def get_dset(self, dset_name, start, size):
     """
     :param dset_name: the name of the dataset we are interested in
     :param start: the offset we will start retrieving data from in the dataset (Dataset list)
     :param size: the number of data points that will be returned (size of the Dataset list)
     :return: Subset of the Dataset list based on start and size,
     or empty Dataset if it doesn't exist or out of bounds
     """
     dset = self.group.get(dset_name)
     if dset is not None:
         if start <= dset.shape[0]:
             end = min(dset.shape[0], (start + size))
             return Dataset(dset[start:end])
     return Dataset([])
    def setup_method(self, method):

        dataset_mantissa, dataset_exp = get_mantissa_and_exp_arrays_from_pvals(
            pvalsarray)
        self.loader_dictionary = {
            SNP_DSET: Dataset(snpsarray),
            MANTISSA_DSET: Dataset(dataset_mantissa),
            EXP_DSET: Dataset(dataset_exp),
            CHR_DSET: Dataset(chrarray),
            STUDY_DSET: Dataset(studyarray),
            OR_DSET: Dataset(orarray),
            BP_DSET: Dataset(bparray),
            EFFECT_DSET: Dataset(effectarray),
            OTHER_DSET: Dataset(otherarray),
            FREQ_DSET: Dataset(frequencyarray)
        }
    def test_various_pvals_2(self):
        mantissa_array, exp_array = get_mantissa_and_exp_arrays_from_pvals(
            pvals)

        restrictions = [
            IntervalRestrictionPval(4e-9, 5e-3, Dataset(mantissa_array),
                                    Dataset(exp_array))
        ]
        datasets = {
            MANTISSA_DSET: Dataset(mantissa_array),
            EXP_DSET: Dataset(exp_array)
        }

        filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions)
        assert len(filtered_dsets[MANTISSA_DSET]) == 3
        assert len(filtered_dsets[EXP_DSET]) == 3
    def test_pvals_with_same_mantissa_diff_exp_cross_on_limits(self):
        mantissa_array, exp_array = get_mantissa_and_exp_arrays_from_pvals(
            pvals_same_mantissa_diff_exp)

        restrictions = [
            IntervalRestrictionPval(1.2e-9, 1.2e-2, Dataset(mantissa_array),
                                    Dataset(exp_array))
        ]
        datasets = {
            MANTISSA_DSET: Dataset(mantissa_array),
            EXP_DSET: Dataset(exp_array)
        }

        filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions)
        assert len(filtered_dsets[MANTISSA_DSET]) == 5
        assert len(filtered_dsets[EXP_DSET]) == 5
    def test_pvals_with_same_exp_diff_mantissa_right_on_limits(self):
        mantissa_array, exp_array = get_mantissa_and_exp_arrays_from_pvals(
            pvals_same_exp_diff_mantissa)

        restrictions = [
            IntervalRestrictionPval(1.2e-15, 9e-15, Dataset(mantissa_array),
                                    Dataset(exp_array))
        ]

        datasets = {
            MANTISSA_DSET: Dataset(mantissa_array),
            EXP_DSET: Dataset(exp_array)
        }
        print(mantissa_array)
        print(exp_array)
        filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions)
        assert len(filtered_dsets[MANTISSA_DSET]) == 4
        assert len(filtered_dsets[EXP_DSET]) == 4
    def test_slice_datasets_where_chromosome(self):

        self.loader_dictionary[CHR_DSET] = Dataset([1, 1, 2, 2])
        self.loader_dictionary[SNP_DSET] = Dataset(
            ['snp1', 'snp2', 'snp3', 'snp4'])

        load = loader.Loader(None, self.h5file, self.study,
                             self.loader_dictionary)
        datasets = load._slice_datasets_where_chromosome(1)

        assert len(datasets[CHR_DSET]) == 2
        assert set(datasets[CHR_DSET]).pop() == 1

        assert len(datasets[SNP_DSET]) == 2
        assert "snp1" in datasets[SNP_DSET]
        assert "snp2" in datasets[SNP_DSET]
        assert "snp3" not in datasets[SNP_DSET]
        assert "snp4" not in datasets[SNP_DSET]
    def test_create_dataset(self):
        load = prepare_load_object_with_study_and_trait(h5file=self.h5file,
                                                        study="Study1",
                                                        trait="Trait1",
                                                        loader=loader)
        trait_group = load._create_trait_group()
        study_group = load._create_study_group(trait_group)
        dset_name = CHR_DSET
        data = Dataset([1, 2, 3])
        study_group.generate_dataset(dset_name, data)

        dataset = self.f.get("/Trait1/Study1/" + CHR_DSET)

        assert dataset is not None
        assert dataset.name == "/Trait1/Study1/" + CHR_DSET
        assert len(dataset[:]) == len(data)

        data_2 = Dataset([2, 3, 4])
        with pytest.raises(RuntimeError):
            study_group.generate_dataset(dset_name, data_2)

        dset_name = "random"
        with pytest.raises(KeyError):
            study_group.generate_dataset(dset_name, data_2)
    def test_get_equality_str_restriction(self):
        dataset_str = Dataset(['rs1', 'rs2', 'rs3'])

        restriction = get_restriction('rs1', dataset_str)
        assert isinstance(restriction, EqualityRestriction)
    def test_get_float_restriction_ceiling_none(self):
        dataset_float = Dataset([1., 2., 3.])

        restriction = get_restriction(FloatInterval().set_tuple(1., None),
                                      dataset_float)
        assert isinstance(restriction, IntervalRestriction)
 def test_empty_array(self):
     assert utils.empty_array(None)
     assert utils.empty_array([])
     assert not utils.empty_array("not an array")
     assert not utils.empty_array([1, 2, 3])
     assert not utils.empty_array(Dataset([1, 2, 3]))
    def test_get_equality_int_restriction(self):
        dataset_float = Dataset([1., 2., 3.])

        restriction = get_restriction(1., dataset_float)
        assert isinstance(restriction, EqualityRestriction)
    def test_filter_dsets_with_restrictions(self):
        datasets = {
            SNP_DSET: Dataset(["rs1", "rs1", "rs1", "rs2", "rs3"]),
            PVAL_DSET: Dataset([1., 2.1, 3, 3.1, 4]),
            CHR_DSET: Dataset([1, 1, 1, 1, 2])
        }

        restrictions = [
            EqualityRestriction("rs1", datasets[SNP_DSET]),
            IntervalRestriction(1., 2.1, datasets[PVAL_DSET]),
            EqualityRestriction(1, datasets[CHR_DSET])
        ]

        filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions)

        assert len(list(filtered_dsets.keys())) == 3

        assert len(filtered_dsets[SNP_DSET]) == 2
        assert len(set(filtered_dsets[SNP_DSET])) == 1
        assert filtered_dsets[SNP_DSET][0] == "rs1"

        assert len(filtered_dsets[PVAL_DSET]) == 2
        for pval in filtered_dsets[PVAL_DSET]:
            assert pval >= 1.
            assert pval <= 2.1

        assert len(filtered_dsets[CHR_DSET]) == 2
        for chromosome in filtered_dsets[CHR_DSET]:
            assert chromosome == 1

        restrictions = [
            IntervalRestriction(3., 3.1, datasets[PVAL_DSET]),
            EqualityRestriction(1, datasets[CHR_DSET])
        ]

        filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions)
        assert len(list(filtered_dsets.keys())) == 3
        assert len(filtered_dsets[SNP_DSET]) == 2

        assert filtered_dsets[SNP_DSET][0] == "rs1"
        assert filtered_dsets[SNP_DSET][1] == "rs2"

        assert len(filtered_dsets[PVAL_DSET]) == 2
        for pval in filtered_dsets[PVAL_DSET]:
            assert pval >= 3.
            assert pval <= 3.1

        assert len(filtered_dsets[CHR_DSET]) == 2
        for chromosome in filtered_dsets[CHR_DSET]:
            assert chromosome == 1

        restrictions = [
            IntervalRestriction(4., 4., datasets[PVAL_DSET]),
        ]

        filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions)
        assert len(list(filtered_dsets.keys())) == 3
        assert len(filtered_dsets[SNP_DSET]) == 1

        assert filtered_dsets[SNP_DSET][0] == "rs3"

        assert len(filtered_dsets[PVAL_DSET]) == 1
        assert filtered_dsets[PVAL_DSET][0] == 4.

        assert len(filtered_dsets[CHR_DSET]) == 1
        assert filtered_dsets[CHR_DSET][0] == 2

        #
        restrictions = []

        filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions)
        assert len(list(filtered_dsets.keys())) == 3
        for dset_name in datasets:
            assert len(datasets[dset_name]) == 5
Example #15
0
def _create_dset_placeholder(value, size):
    assert value is not None, "Can't create dataset with empty content!"
    return Dataset([value for _ in range(size)])
    def test_get_float_restriction(self):
        dataset_float = Dataset([1., 2., 3.])

        restriction = get_restriction(
            FloatInterval().set_string_tuple("1.:2."), dataset_float)
        assert isinstance(restriction, IntervalRestriction)
    def test_filter_dictionary_by_mask(self):
        loader_dictionary = {
            'dset1': Dataset([1, 2, 3]),
            'dset2': Dataset([1, 3, 3])
        }
        pvals = Dataset([1, 2, 2])
        mask = pvals.equality_mask(1)
        print(mask)
        loader_dictionary = utils.filter_dictionary_by_mask(
            loader_dictionary, mask)
        for dset in loader_dictionary:
            assert np.array_equal(loader_dictionary[dset], [1])

        loader_dictionary = {
            'dset1': Dataset(["a", "b", "c"]),
            'dset2': Dataset(["c", "d", "e"])
        }
        pvals = Dataset([1, 2, 2])
        mask = pvals.equality_mask(1)
        print(mask)
        loader_dictionary = utils.filter_dictionary_by_mask(
            loader_dictionary, mask)
        assert np.array_equal(loader_dictionary["dset1"], ["a"])
        assert np.array_equal(loader_dictionary["dset2"], ["c"])

        loader_dictionary = {
            'dset1': Dataset(["a", "b", "c"]),
            'dset2': Dataset(["c", "d", "e"])
        }
        pvals = Dataset([1, 2, 2])
        mask = pvals.equality_mask(2, )
        print(mask)
        loader_dictionary = utils.filter_dictionary_by_mask(
            loader_dictionary, mask)
        assert np.array_equal(loader_dictionary["dset1"], ["b", "c"])
        assert np.array_equal(loader_dictionary["dset2"], ["d", "e"])