Ejemplo n.º 1
0
    def test_local_distribute(self):
        logging.info("test_local_distribute")
        force_python_only = False

        output_file = self.file_name("local_distribute")

        storage = LocalCache("local_cache/local_distribute")
        test_storage = storage.join('test_snps')
        test_storage.rmtree('')
        test_snps = DistributedBed.write(test_storage,
                                         self.bed,
                                         piece_per_chrom_count=2)

        results_df = single_snp_scale(test_snps=test_snps,
                                      pheno=self.phen_fn,
                                      covar=self.cov_fn,
                                      G0=self.bed,
                                      cache=self._cache_dict(storage,
                                                             clear_cache=True),
                                      output_file_name=output_file,
                                      force_python_only=force_python_only)

        self.compare_files(results_df, "old")

        results_df = single_snp_scale(test_snps=self.bed,
                                      pheno=self.phen_fn,
                                      covar=self.cov_fn,
                                      G0=self.bed,
                                      cache=self._cache_dict(
                                          storage, clear_cache=False),
                                      output_file_name=output_file)
        self.compare_files(results_df, "old")
Ejemplo n.º 2
0
    def too_slow_test_peertopeer(self):
        logging.info("test_peertopeer")

        output_file = self.file_name("peertopeer")

        def id_and_path_function():
            from pysnptools.util.filecache import ip_address_pid
            ip_pid = ip_address_pid()
            #Need to put the 'cache_top' here explicitly.
            return ip_pid, 'peertopeer/{0}'.format(ip_pid)

        storage = PeerToPeer(common_directory='peertopeer/common',
                             id_and_path_function=id_and_path_function)
        test_snps_cache = storage.join('test_snps')
        test_snps_cache.rmtree()
        test_snps = DistributedBed.write(test_snps_cache,
                                         self.bed,
                                         piece_per_chrom_count=2)

        runner = LocalMultiProc(
            taskcount=5)  #Run on 5 additional Python processes

        for clear_cache in (True, False):
            if clear_cache:
                storage.join('cache').rmtree()
            results_df = single_snp_scale(test_snps=test_snps,
                                          pheno=self.phen_fn,
                                          covar=self.cov_fn,
                                          cache=storage.join('cache'),
                                          output_file_name=output_file,
                                          runner=runner)
            self.compare_files(results_df, "old")
Ejemplo n.º 3
0
    def test_one_chrom(self):
        logging.info("test_one_chrom")

        output_file = self.file_name("one_chrom")

        storage = LocalCache("local_cache/one_chrom")
        test_storage = storage.join('test_snps')
        test_storage.rmtree('')
        test_snps3 = self.bed[:, self.bed.pos[:, 0] ==
                              3]  # Test only on chromosome 3
        test_snps3_dist = DistributedBed.write(test_storage,
                                               test_snps3,
                                               piece_per_chrom_count=2)

        for test_snps, ref, clear_cache, name in (
            (test_snps3, "old_one", True, "Run with just chrom3"),
            (test_snps3_dist, "old_one", True,
             "Run with distributed test SNPs"),
            (test_snps3, "old_one", False, "Run with just chrom3 (use cache)"),
            (test_snps3_dist, "old_one", False,
             "Run with distributed test SNPs (use cache)"),
        ):
            logging.info("=========== " + name + " ===========")
            results_df = single_snp_scale(
                test_snps=test_snps,
                pheno=self.phen_fn,
                covar=self.cov_fn,
                K0=self.bed,
                cache=self._cache_dict(storage, clear_cache=clear_cache),
                output_file_name=output_file,
            )
            self.compare_files(results_df, ref)
Ejemplo n.º 4
0
    def test_mapreduce1_runner(self):
        logging.info("test_mapreduce1_runner")

        output_file = self.file_name("mapreduce1_runner")
        runner = LocalMultiProc(taskcount=4, just_one_process=True)
        results_df = single_snp_scale(test_snps=self.bed,
                                      pheno=self.phen_fn,
                                      covar=self.cov_fn,
                                      output_file_name=output_file,
                                      runner=runner)
        self.compare_files(results_df, "old")
Ejemplo n.º 5
0
    def test_low(self):
        logging.info("test_low")

        output_file = self.file_name("low")

        storage = LocalCache("local_cache/low")
        for clear_cache in (True, False):
            if clear_cache:
                storage.rmtree()
            results_df = single_snp_scale(test_snps=self.bed,
                                          pheno=self.phen_fn,
                                          covar=self.cov_fn,
                                          cache=storage,
                                          output_file_name=output_file)
            self.compare_files(results_df, "old")
Ejemplo n.º 6
0
    def test_one_fast(self):
        logging.info("test_one_fast")

        output_file = self.file_name("one_fast")

        storage = LocalCache("local_cache")
        test_storage = storage.join('one_fast')
        test_storage.rmtree()
        test_snps3 = self.bed[:, self.bed.pos[:, 0] ==
                              3]  # Test only on chromosome 3
        test_snps3_dist = DistributedBed.write(test_storage,
                                               test_snps3,
                                               piece_per_chrom_count=2)

        results_df = single_snp_scale(test_snps=test_snps3_dist,
                                      pheno=self.phen_fn,
                                      covar=self.cov_fn,
                                      G0=self.bed,
                                      output_file_name=output_file)
        self.compare_files(results_df, "old_one")
Ejemplo n.º 7
0
    def test_multipheno(self):
        logging.info("test_multipheno")

        random_state = RandomState(29921)
        pheno_reference = Pheno(self.phen_fn).read()
        for pheno_count in [2, 5, 1]:
            val = random_state.normal(loc=pheno_count,
                                      scale=pheno_count,
                                      size=(pheno_reference.iid_count,
                                            pheno_count))
            pheno_col = ['pheno{0}'.format(i) for i in range(pheno_count)]
            pheno_multi = SnpData(iid=pheno_reference.iid,
                                  sid=pheno_col,
                                  val=val)

            reference = pd.concat([
                single_snp(test_snps=self.bed,
                           pheno=pheno_multi[:, pheno_index],
                           covar=self.cov_fn)
                for pheno_index in range(pheno_count)
            ])
            frame = single_snp_scale(test_snps=self.bed,
                                     pheno=pheno_multi,
                                     covar=self.cov_fn)

            assert len(frame) == len(
                reference), "# of pairs differs from file '{0}'".format(
                    reffile)
            for sid in sorted(
                    set(reference.SNP
                        )):  #This ignores which pheno produces which pvalue
                pvalue_frame = np.array(
                    sorted(frame[frame['SNP'] == sid].PValue))
                pvalue_reference = np.array(
                    sorted(reference[reference['SNP'] == sid].PValue))
                assert (
                    abs(pvalue_frame - pvalue_reference) < 1e-5
                ).all, "pair {0} differs too much from reference".format(sid)