def test_local_distribute(self): logging.info("test_local_distribute") force_python_only = False output_file = self.file_name("local_distribute") storage = LocalCache("local_cache/local_distribute") test_storage = storage.join('test_snps') test_storage.rmtree('') test_snps = DistributedBed.write(test_storage, self.bed, piece_per_chrom_count=2) results_df = single_snp_scale(test_snps=test_snps, pheno=self.phen_fn, covar=self.cov_fn, G0=self.bed, cache=self._cache_dict(storage, clear_cache=True), output_file_name=output_file, force_python_only=force_python_only) self.compare_files(results_df, "old") results_df = single_snp_scale(test_snps=self.bed, pheno=self.phen_fn, covar=self.cov_fn, G0=self.bed, cache=self._cache_dict( storage, clear_cache=False), output_file_name=output_file) self.compare_files(results_df, "old")
def too_slow_test_peertopeer(self): logging.info("test_peertopeer") output_file = self.file_name("peertopeer") def id_and_path_function(): from pysnptools.util.filecache import ip_address_pid ip_pid = ip_address_pid() #Need to put the 'cache_top' here explicitly. return ip_pid, 'peertopeer/{0}'.format(ip_pid) storage = PeerToPeer(common_directory='peertopeer/common', id_and_path_function=id_and_path_function) test_snps_cache = storage.join('test_snps') test_snps_cache.rmtree() test_snps = DistributedBed.write(test_snps_cache, self.bed, piece_per_chrom_count=2) runner = LocalMultiProc( taskcount=5) #Run on 5 additional Python processes for clear_cache in (True, False): if clear_cache: storage.join('cache').rmtree() results_df = single_snp_scale(test_snps=test_snps, pheno=self.phen_fn, covar=self.cov_fn, cache=storage.join('cache'), output_file_name=output_file, runner=runner) self.compare_files(results_df, "old")
def test_one_chrom(self): logging.info("test_one_chrom") output_file = self.file_name("one_chrom") storage = LocalCache("local_cache/one_chrom") test_storage = storage.join('test_snps') test_storage.rmtree('') test_snps3 = self.bed[:, self.bed.pos[:, 0] == 3] # Test only on chromosome 3 test_snps3_dist = DistributedBed.write(test_storage, test_snps3, piece_per_chrom_count=2) for test_snps, ref, clear_cache, name in ( (test_snps3, "old_one", True, "Run with just chrom3"), (test_snps3_dist, "old_one", True, "Run with distributed test SNPs"), (test_snps3, "old_one", False, "Run with just chrom3 (use cache)"), (test_snps3_dist, "old_one", False, "Run with distributed test SNPs (use cache)"), ): logging.info("=========== " + name + " ===========") results_df = single_snp_scale( test_snps=test_snps, pheno=self.phen_fn, covar=self.cov_fn, K0=self.bed, cache=self._cache_dict(storage, clear_cache=clear_cache), output_file_name=output_file, ) self.compare_files(results_df, ref)
def test_mapreduce1_runner(self): logging.info("test_mapreduce1_runner") output_file = self.file_name("mapreduce1_runner") runner = LocalMultiProc(taskcount=4, just_one_process=True) results_df = single_snp_scale(test_snps=self.bed, pheno=self.phen_fn, covar=self.cov_fn, output_file_name=output_file, runner=runner) self.compare_files(results_df, "old")
def test_low(self): logging.info("test_low") output_file = self.file_name("low") storage = LocalCache("local_cache/low") for clear_cache in (True, False): if clear_cache: storage.rmtree() results_df = single_snp_scale(test_snps=self.bed, pheno=self.phen_fn, covar=self.cov_fn, cache=storage, output_file_name=output_file) self.compare_files(results_df, "old")
def test_one_fast(self): logging.info("test_one_fast") output_file = self.file_name("one_fast") storage = LocalCache("local_cache") test_storage = storage.join('one_fast') test_storage.rmtree() test_snps3 = self.bed[:, self.bed.pos[:, 0] == 3] # Test only on chromosome 3 test_snps3_dist = DistributedBed.write(test_storage, test_snps3, piece_per_chrom_count=2) results_df = single_snp_scale(test_snps=test_snps3_dist, pheno=self.phen_fn, covar=self.cov_fn, G0=self.bed, output_file_name=output_file) self.compare_files(results_df, "old_one")
def test_multipheno(self): logging.info("test_multipheno") random_state = RandomState(29921) pheno_reference = Pheno(self.phen_fn).read() for pheno_count in [2, 5, 1]: val = random_state.normal(loc=pheno_count, scale=pheno_count, size=(pheno_reference.iid_count, pheno_count)) pheno_col = ['pheno{0}'.format(i) for i in range(pheno_count)] pheno_multi = SnpData(iid=pheno_reference.iid, sid=pheno_col, val=val) reference = pd.concat([ single_snp(test_snps=self.bed, pheno=pheno_multi[:, pheno_index], covar=self.cov_fn) for pheno_index in range(pheno_count) ]) frame = single_snp_scale(test_snps=self.bed, pheno=pheno_multi, covar=self.cov_fn) assert len(frame) == len( reference), "# of pairs differs from file '{0}'".format( reffile) for sid in sorted( set(reference.SNP )): #This ignores which pheno produces which pvalue pvalue_frame = np.array( sorted(frame[frame['SNP'] == sid].PValue)) pvalue_reference = np.array( sorted(reference[reference['SNP'] == sid].PValue)) assert ( abs(pvalue_frame - pvalue_reference) < 1e-5 ).all, "pair {0} differs too much from reference".format(sid)