def test_get_dsets_from_plethora_of_blocks(self): chr_group_2 = gu.Group(self.f.get("/2")) bp_interval = IntInterval().set_tuple(48500000, 49200000) block = bk.Block(bp_interval) block_groups = block.get_block_groups_from_parent(chr_group_2) all_subgroups = gu.generate_subgroups_from_generator_of_subgroups( block_groups) datasets = query.load_datasets_from_groups(all_subgroups, self.start, self.size) assert datasets.__class__ is dict for dset_name in TO_QUERY_DSETS: # 2 values for each of 3 studies that we loaded assert len(datasets[dset_name]) == 6 bp_interval = IntInterval().set_tuple(48600000, 48600000) block = bk.Block(bp_interval) block_groups = block.get_block_groups_from_parent(chr_group_2) datasets = query.load_datasets_from_groups(block_groups, self.start, self.size) for dset_name in TO_QUERY_DSETS: # no SNP bp falls into this group assert len(datasets[dset_name]) == 0
def __init__(self, tsv, h5file, study, dict_of_data=None): self.study = study datasets_as_lists = fl.read_datasets_from_input( tsv, dict_of_data, const) self.datasets = fl.format_datasets(datasets_as_lists, study, const) # Open the file with read/write permissions and create if it doesn't exist self.file = h5py.File(h5file, 'a') self.file_group = gu.Group(self.file)
def __init__(self, tsv, h5file, study, uuid, dict_of_data=None): self.study = study self.uuid = uuid assert self.uuid is not None, "You need to specify a uuid" datasets_as_lists = fl.read_datasets_from_input(tsv, dict_of_data, const) self.datasets = fl.format_datasets(datasets_as_lists, study, const) # Open the file with read/write permissions and create if it doesn't exist self.file = h5py.File(h5file, 'a') self.file_group = gu.Group(self.file)
def setup_method(self, method): # open h5 file in read/write mode self.f = h5py.File(self.h5file, mode="a") self.file_group = gu.Group(self.f) self.file_group.create_subgroup("1") self.group_1 = self.file_group.get_subgroup("1") self.file_group.create_subgroup("1/sub1") self.subgroup1 = self.group_1.get_subgroup("sub1") self.subgroup1.generate_dataset(STUDY_DSET, ["study1"]) self.file_group.create_subgroup("1/sub2") self.subgroup1_studies = ["study1"]
def _get_dict_of_h5_to_study_groups(self, h5file, hf_study_dict): file = h5py.File(h5file, 'r') file_group = gu.Group(file) chr_groups = file_group.get_all_subgroups() block_groups = gu.generate_subgroups_from_generator_of_subgroups( chr_groups) study_groups = gu.generate_subgroups_from_generator_of_subgroups( block_groups) for study_group in study_groups: if self.study == study_group.get_name().split("/")[-1]: hf_study_dict[h5file].append(study_group.get_name()) file.close() return hf_study_dict
def __init__(self, tsv, h5file, study, trait, dict_of_data=None): h5file = h5file self.study = study self.trait = trait assert trait is not None, "You need to specify a trait with the trait loader!" datasets_as_lists = fl.read_datasets_from_input( tsv, dict_of_data, const) self.datasets = fl.format_datasets(datasets_as_lists, study, const) # Open the file with read/write permissions and create if it doesn't exist self.file = h5py.File(h5file, 'a') self.file_group = gu.Group(self.file)
def __init__(self, h5file): # Open the file with read permissions self.file = h5py.File(h5file, 'r') self.datasets = {} self.file_group = gu.Group(self.file) self.pd_hdf = pd.HDFStore(h5file) self.key = self.file_group.get_all_subgroups_keys()[0] self.study = get_study_metadata(hdf=self.pd_hdf, key=self.key)['study'] self.tissue = get_study_metadata(hdf=self.pd_hdf, key=self.key)['tissue'] self.chromosomes = get_study_metadata( hdf=self.pd_hdf, key=self.key)['chromosomes'].tolist() self.traits = get_study_metadata(hdf=self.pd_hdf, key=self.key)['traits'].tolist()
def _get_dict_of_h5_to_study_groups(self, h5file, hf_study_dict): file = h5py.File(h5file, 'r') file_group = gu.Group(file) snp_groups = file_group.get_all_subgroups() study_groups = gu.generate_subgroups_from_generator_of_subgroups( snp_groups) for study_group in study_groups: if self.study == study_group.get_name().split("/")[-1]: snp_group = study_group.get_parent() if len(snp_group.get_all_subgroups_keys()) == 1: hf_study_dict[h5file].append(snp_group.get_name()) else: hf_study_dict[h5file].append(study_group.get_name()) file.close() return hf_study_dict
def test_get_dsets_group(self): chr_group_2 = gu.Group(self.f.get("/2")) bp_interval = IntInterval().set_tuple(48500000, 48500000) block = bk.Block(bp_interval) block_groups = block.get_block_groups_from_parent(chr_group_2) block_group = next(block_groups) block_sub_groups = block_group.get_all_subgroups() d = du.create_dictionary_of_empty_dsets(TO_QUERY_DSETS) for block_sub_group in block_sub_groups: datasets = query.get_dsets_from_group(block_sub_group, self.start, self.size) assert len(datasets) == len(TO_STORE_DSETS) d = du.extend_dsets_with_subset(d, datasets) for dset_name, dset in d.items(): if dset_name is STUDY_DSET: assert len(set(dset)) == 3 else: assert len(set(dset)) == 1
def __init__(self, h5file): # Open the file with read permissions self.file = h5py.File(h5file, 'r') self.datasets = {} self.file_group = gu.Group(self.file) self.study = None
def test_initializing_group_with_dataset_raises_error(self): dataset = self.file_group.generate_dataset(STUDY_DSET, self.subgroup1_studies) with pytest.raises(TypeError): gu.Group(dataset)
def save_snps_and_study_in_file(opened_file, list_of_snps, study): for snp in list_of_snps: group = gu.Group(opened_file.create_group(snp)) group.generate_dataset(STUDY_DSET, [study])