コード例 #1
0
    def test_get_dsets_from_plethora_of_blocks(self):
        chr_group_2 = gu.Group(self.f.get("/2"))

        bp_interval = IntInterval().set_tuple(48500000, 49200000)
        block = bk.Block(bp_interval)
        block_groups = block.get_block_groups_from_parent(chr_group_2)
        all_subgroups = gu.generate_subgroups_from_generator_of_subgroups(
            block_groups)

        datasets = query.load_datasets_from_groups(all_subgroups, self.start,
                                                   self.size)
        assert datasets.__class__ is dict

        for dset_name in TO_QUERY_DSETS:
            # 2 values for each of 3 studies that we loaded
            assert len(datasets[dset_name]) == 6

        bp_interval = IntInterval().set_tuple(48600000, 48600000)
        block = bk.Block(bp_interval)
        block_groups = block.get_block_groups_from_parent(chr_group_2)

        datasets = query.load_datasets_from_groups(block_groups, self.start,
                                                   self.size)
        for dset_name in TO_QUERY_DSETS:
            # no SNP bp falls into this group
            assert len(datasets[dset_name]) == 0
コード例 #2
0
    def __init__(self, tsv, h5file, study, dict_of_data=None):
        self.study = study

        datasets_as_lists = fl.read_datasets_from_input(
            tsv, dict_of_data, const)
        self.datasets = fl.format_datasets(datasets_as_lists, study, const)

        # Open the file with read/write permissions and create if it doesn't exist
        self.file = h5py.File(h5file, 'a')
        self.file_group = gu.Group(self.file)
コード例 #3
0
    def __init__(self, tsv, h5file, study, uuid, dict_of_data=None):
        self.study = study
        self.uuid = uuid
        assert self.uuid is not None, "You need to specify a uuid"

        datasets_as_lists = fl.read_datasets_from_input(tsv, dict_of_data, const)
        self.datasets = fl.format_datasets(datasets_as_lists, study, const)

        # Open the file with read/write permissions and create if it doesn't exist
        self.file = h5py.File(h5file, 'a')
        self.file_group = gu.Group(self.file)
コード例 #4
0
 def setup_method(self, method):
     # open h5 file in read/write mode
     self.f = h5py.File(self.h5file, mode="a")
     self.file_group = gu.Group(self.f)
     self.file_group.create_subgroup("1")
     self.group_1 = self.file_group.get_subgroup("1")
     self.file_group.create_subgroup("1/sub1")
     self.subgroup1 = self.group_1.get_subgroup("sub1")
     self.subgroup1.generate_dataset(STUDY_DSET, ["study1"])
     self.file_group.create_subgroup("1/sub2")
     self.subgroup1_studies = ["study1"]
コード例 #5
0
 def _get_dict_of_h5_to_study_groups(self, h5file, hf_study_dict):
     file = h5py.File(h5file, 'r')
     file_group = gu.Group(file)
     chr_groups = file_group.get_all_subgroups()
     block_groups = gu.generate_subgroups_from_generator_of_subgroups(
         chr_groups)
     study_groups = gu.generate_subgroups_from_generator_of_subgroups(
         block_groups)
     for study_group in study_groups:
         if self.study == study_group.get_name().split("/")[-1]:
             hf_study_dict[h5file].append(study_group.get_name())
     file.close()
     return hf_study_dict
コード例 #6
0
    def __init__(self, tsv, h5file, study, trait, dict_of_data=None):
        h5file = h5file
        self.study = study
        self.trait = trait

        assert trait is not None, "You need to specify a trait with the trait loader!"

        datasets_as_lists = fl.read_datasets_from_input(
            tsv, dict_of_data, const)
        self.datasets = fl.format_datasets(datasets_as_lists, study, const)

        # Open the file with read/write permissions and create if it doesn't exist
        self.file = h5py.File(h5file, 'a')
        self.file_group = gu.Group(self.file)
コード例 #7
0
 def __init__(self, h5file):
     # Open the file with read permissions
     self.file = h5py.File(h5file, 'r')
     self.datasets = {}
     self.file_group = gu.Group(self.file)
     self.pd_hdf = pd.HDFStore(h5file)
     self.key = self.file_group.get_all_subgroups_keys()[0]
     self.study = get_study_metadata(hdf=self.pd_hdf, key=self.key)['study']
     self.tissue = get_study_metadata(hdf=self.pd_hdf,
                                      key=self.key)['tissue']
     self.chromosomes = get_study_metadata(
         hdf=self.pd_hdf, key=self.key)['chromosomes'].tolist()
     self.traits = get_study_metadata(hdf=self.pd_hdf,
                                      key=self.key)['traits'].tolist()
コード例 #8
0
 def _get_dict_of_h5_to_study_groups(self, h5file, hf_study_dict):
     file = h5py.File(h5file, 'r')
     file_group = gu.Group(file)
     snp_groups = file_group.get_all_subgroups()
     study_groups = gu.generate_subgroups_from_generator_of_subgroups(
         snp_groups)
     for study_group in study_groups:
         if self.study == study_group.get_name().split("/")[-1]:
             snp_group = study_group.get_parent()
             if len(snp_group.get_all_subgroups_keys()) == 1:
                 hf_study_dict[h5file].append(snp_group.get_name())
             else:
                 hf_study_dict[h5file].append(study_group.get_name())
     file.close()
     return hf_study_dict
コード例 #9
0
    def test_get_dsets_group(self):
        chr_group_2 = gu.Group(self.f.get("/2"))

        bp_interval = IntInterval().set_tuple(48500000, 48500000)
        block = bk.Block(bp_interval)
        block_groups = block.get_block_groups_from_parent(chr_group_2)

        block_group = next(block_groups)

        block_sub_groups = block_group.get_all_subgroups()
        d = du.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)

        for block_sub_group in block_sub_groups:
            datasets = query.get_dsets_from_group(block_sub_group, self.start,
                                                  self.size)
            assert len(datasets) == len(TO_STORE_DSETS)
            d = du.extend_dsets_with_subset(d, datasets)

        for dset_name, dset in d.items():
            if dset_name is STUDY_DSET:
                assert len(set(dset)) == 3
            else:
                assert len(set(dset)) == 1
コード例 #10
0
 def __init__(self, h5file):
     # Open the file with read permissions
     self.file = h5py.File(h5file, 'r')
     self.datasets = {}
     self.file_group = gu.Group(self.file)
     self.study = None
コード例 #11
0
 def test_initializing_group_with_dataset_raises_error(self):
     dataset = self.file_group.generate_dataset(STUDY_DSET, self.subgroup1_studies)
     with pytest.raises(TypeError):
         gu.Group(dataset)
コード例 #12
0
def save_snps_and_study_in_file(opened_file, list_of_snps, study):
    for snp in list_of_snps:
        group = gu.Group(opened_file.create_group(snp))
        group.generate_dataset(STUDY_DSET, [study])