Exemple #1
0
 def has_trait(self, trait):
     h5files = fsutils.get_h5files_in_dir(self.search_path, self.trait_dir)
     for h5file in h5files:
         service = trait_service.TraitService(h5file=h5file)
         if service.has_trait(trait):
             return True
     raise NotFoundError("Trait " + trait)
Exemple #2
0
    def _location_for_snp_in_chromosome(self, chromosome):
        """
        Looks up all the files under the chromosome directory, in parallel, and tries to find out which one the
        SNP lives in.
        :param chromosome: the chromosome we think the SNP lives in
        :return: the exact file that this SNP is stored in or raises and error if not found
        """
        dir_name = fsutils.join(self.snp_dir, str(chromosome))
        if not fsutils.is_valid_dir_path(path=self.search_path,
                                         dir_name=dir_name):
            logger.debug("Chromosome %s given for variant %s doesn't exist!",
                         str(self.chromosome), self.snp)
            raise NotFoundError("Chromosome " + str(self.chromosome))
        h5files = fsutils.get_h5files_in_dir(path=self.search_path,
                                             dir_name=dir_name)

        snps = [self.snp for _ in h5files]
        pool = Pool(self.bp_step)
        results = pool.map(is_snp_in_file, zip(snps, h5files))
        pool.close()
        pool.join()
        for h5file in results:
            if h5file is not None:
                return h5file

        # not found anywhere in chromosome
        raise NotFoundError("Chromosome-variant combination")
Exemple #3
0
    def get_list_of_studies(self):
        studies = []
        h5files = fsutils.get_h5files_in_dir(self.search_path, self.trait_dir)
        for h5file in h5files:
            service = study_service.StudyService(h5file=h5file)
            studies.extend(service.list_studies())
            service.close_file()

        return sorted(studies)
Exemple #4
0
    def get_list_of_traits(self):
        traits = []
        h5files = fsutils.get_h5files_in_dir(self.search_path, self.trait_dir)
        for h5file in h5files:
            service = trait_service.TraitService(h5file=h5file)
            traits.extend(service.list_traits())
            service.close_file()

        return sorted(traits)
Exemple #5
0
 def get_trait_of_study(self, study_to_find):
     h5files = fsutils.get_h5files_in_dir(self.search_path, self.trait_dir)
     for h5file in h5files:
         service = study_service.StudyService(h5file=h5file)
         for trait_study in service.list_trait_study_pairs():
             if study_to_find == trait_study.split(":")[1]:
                 service.close_file()
                 return trait_study.split(":")[0]
         service.close_file()
     # study not found
     raise NotFoundError("Study " + study_to_find)
 def has_chromosome(self, chromosome):
     # raises Not Found Error
     """To do: Store the chromosome list as an attribute in the hdf5 file."""
     h5files = fsutils.get_h5files_in_dir(self.search_path, self.study_dir)
     #chromosomes = []
     #for h5file in h5files:
     #    service = trait_service.StudyService(h5file=h5file)
     #    traits.extend(service.list_traits())
     #    service.close_file()
     search = cr.search_all_assocs(chromosome=chromosome,
                                   start=0,
                                   size=0,
                                   properties=self.properties)
     if search[-1] > 0:
         print('checked')
         return True
     raise NotFoundError("Chromosome " + str(chromosome))
Exemple #7
0
 def _find_h5file_study_group(self):
     """
     Traverse all the hdf5 file and find any with the study group of interest
     :return: dict of {h5file: studygroup path}
     """
     hf_study_dict = {}
     h5files = fsutils.get_h5files_in_dir(self.search_path, self.trait_dir)
     for h5file in h5files:
         service = study_service.StudyService(h5file=h5file)
         for study_group in service.get_study_groups():
             if self.study == study_group.get_name().split("/")[-1]:
                 hf_study_dict[h5file] = study_group.get_name()
         service.close_file()
     if any(hf_study_dict):
         return hf_study_dict
     else:
         logger.debug("Study %s not found in any trait!", self.study)
         raise NotFoundError("Study " + self.study)
 def test_get_h5files_in_dir_raise_error(self):
     with pytest.raises(RuntimeError):
         fsu.get_h5files_in_dir('/', self.dir_name)
 def test_get_h5files_in_dir_same_order(self):
     files1 = fsu.get_h5files_in_dir('./', self.dir_name)
     files2 = fsu.get_h5files_in_dir('./', self.dir_name)
     assert np.array_equal(files1, files2)