def testThreadJob(self): """Multithread the square of an array""" numpy.random.seed(0) a = numpy.random.randint(0, 100, [1000000]) b = numpy.empty([1000000], dtype='int64') expected = a ** 2 def targ(ina, outa, start, count): outa[start:start + count] = ina[start:start + count] ** 2 tb.thread_job(len(a), 0, targ, a, b) self.assertEqual(list(expected), list(b))
def aa_ci(self, inter, n_boots=1000, seed=None): """Get bootstrap confidence intervals for association number Requires input of desired confidence interval, e.g.: >>> obj.aa_ci(95) Upper and lower confidence limits are added to :attr:`~PPro.ci`. After calling, :attr:`~PPro.conf_above` will contain the confidence (in percent) that the association number at that lag is *above* the asymptotic association number. (The confidence of being below is 100 - conf_above) For minor variations in conf_above to be meaningful, a *large* number of bootstraps is required. (Rougly, 1000 to be meaningful to the nearest percent; 10000 to be meaningful to a tenth of a percent.) A conf_above of 100 usually indicates an insufficient sample size to resolve, *not* perfect certainty. Note also that a 95% chance of being above indicates an exclusion from the *90%* confidence interval! Parameters ========== inter : float percentage confidence interval to calculate n_boots : int, optional number of bootstrap iterations to run seed : int, optional seed for the random number generator. If not specified, Python code will use numpy's RNG and its current seed; C code will seed from the clock. Warnings ======== If ``seed`` is specified on numpy 1.5 and earlier, the available entropy is reduced to work around a random number generator bug. Upgrade to numpy 1.6 to avoid this limitation. Because of this workaround, if a seed is specified, results from numpy 1.5 are not reproducible with numpy 1.6 """ lags = self.lags ci_low = np.empty([len(lags)]) ci_high = np.empty([len(lags)]) conf_above = np.empty([len(lags)]) if seed != None: np.random.seed(seed) #TODO: This is a bit ugly and we potentially lose entropy #should be unsigned long, but numpy forces signed int... minseed = -sys.maxsize - 1 maxseed = sys.maxsize (maj, minor) = np.__version__.split('.')[0:2] if int(maj) < 2 and int(minor) < 6: warnings.warn('Upgrade to numpy 1.6 to avoid reduced entropy.') maxseed = sys.maxsize minseed = 0 lag_seeds = np.random.randint(minseed, maxseed, [len(lags)]) newtype = np.dtype('u' + str(lag_seeds.dtype)) lag_seeds = np.require(lag_seeds, dtype=newtype) if lib.have_libspacepy == False: for i in range(len(lags)): if seed != None: np.random.seed(lag_seeds[i]) ci_low[i], ci_high[i], conf_above[i]= boots_ci( self.n_assoc[i, :], n_boots, inter, np.add.reduce, seed=None, target=self.asympt_assoc) else: perc_low = (100.-inter)/2. #set confidence interval perc_high = inter + perc_low dtype = 'int' + str(ctypes.sizeof(ctypes.c_long) * 8) assoc_totals = np.empty([len(lags), n_boots], dtype=dtype, order='C') if seed == None: clock_seed = ctypes.c_int(1) lag_seeds = np.empty([len(lags)], dtype=dtype) else: clock_seed = ctypes.c_int(0) def thread_targ(start, size): lib.aa_ci( self.n_assoc[start:start+size].ctypes.data_as(lib.ulptr), assoc_totals[start:start+size].ctypes.data_as(lib.ulptr), size, len(self.process1), n_boots, lag_seeds[start:start+size].ctypes.data_as(lib.ulptr), clock_seed) tb.thread_job(len(lags), 0, thread_targ) for i in range(len(lags)): assoc_totals[i, :].sort() ci_low[i], ci_high[i] = matplotlib.mlab.prctile( assoc_totals[i, :], p=(perc_low,perc_high)) conf_above[i] = 100.0 - value_percentile(assoc_totals[i, :], self.asympt_assoc) self.ci = [ci_low, ci_high] self.conf_above = conf_above return None
def aa_ci(self, inter, n_boots=1000, seed=None): """Get bootstrap confidence intervals for association number Requires input of desired confidence interval, e.g.: >>> obj.aa_ci(95) Upper and lower confidence limits are added to :attr:`~PPro.ci`. After calling, :attr:`~PPro.conf_above` will contain the confidence (in percent) that the association number at that lag is *above* the asymptotic association number. (The confidence of being below is 100 - conf_above) For minor variations in conf_above to be meaningful, a *large* number of bootstraps is required. (Rougly, 1000 to be meaningful to the nearest percent; 10000 to be meaningful to a tenth of a percent.) A conf_above of 100 usually indicates an insufficient sample size to resolve, *not* perfect certainty. Note also that a 95% chance of being above indicates an exclusion from the *90%* confidence interval! Parameters ========== inter : float percentage confidence interval to calculate n_boots : int, optional number of bootstrap iterations to run seed : int, optional seed for the random number generator. If not specified, Python code will use numpy's RNG and its current seed; C code will seed from the clock. Warnings ======== If ``seed`` is specified on numpy 1.5 and earlier, the available entropy is reduced to work around a random number generator bug. Upgrade to numpy 1.6 to avoid this limitation. Because of this workaround, if a seed is specified, results from numpy 1.5 are not reproducible with numpy 1.6 """ lags = self.lags ci_low = np.empty([len(lags)]) ci_high = np.empty([len(lags)]) conf_above = np.empty([len(lags)]) if seed != None: np.random.seed(seed) #TODO: This is a bit ugly and we potentially lose entropy #should be unsigned long, but numpy forces signed int... minseed = -sys.maxsize - 1 maxseed = sys.maxsize (maj, minor) = np.__version__.split('.')[0:2] if int(maj) < 2 and int(minor) < 6: warnings.warn('Upgrade to numpy 1.6 to avoid reduced entropy.') maxseed = sys.maxsize minseed = 0 lag_seeds = np.random.randint(minseed, maxseed, [len(lags)]) newtype = np.dtype('u' + str(lag_seeds.dtype)) lag_seeds = np.require(lag_seeds, dtype=newtype) if lib.have_libspacepy == False: for i in range(len(lags)): if seed != None: np.random.seed(lag_seeds[i]) ci_low[i], ci_high[i], conf_above[i] = boots_ci( self.n_assoc[i, :], n_boots, inter, np.add.reduce, seed=None, target=self.asympt_assoc) else: perc_low = (100. - inter) / 2. #set confidence interval perc_high = inter + perc_low dtype = 'int' + str(ctypes.sizeof(ctypes.c_long) * 8) assoc_totals = np.empty([len(lags), n_boots], dtype=dtype, order='C') if seed == None: clock_seed = ctypes.c_int(1) lag_seeds = np.empty([len(lags)], dtype=dtype) else: clock_seed = ctypes.c_int(0) def thread_targ(start, size): lib.aa_ci( self.n_assoc[start:start + size].ctypes.data_as(lib.ulptr), assoc_totals[start:start + size].ctypes.data_as(lib.ulptr), size, len(self.process1), n_boots, lag_seeds[start:start + size].ctypes.data_as(lib.ulptr), clock_seed) tb.thread_job(len(lags), 0, thread_targ) for i in range(len(lags)): assoc_totals[i, :].sort() ci_low[i], ci_high[i] = matplotlib.mlab.prctile( assoc_totals[i, :], p=(perc_low, perc_high)) conf_above[i] = 100.0 - value_percentile( assoc_totals[i, :], self.asympt_assoc) self.ci = [ci_low, ci_high] self.conf_above = conf_above return None
def aa_ci(self, inter, n_boots=1000, seed=None): """Get bootstrap confidence intervals for association number Requires input of desired confidence interval, e.g.: >>> obj.aa_ci(95) Upper and lower confidence limits are added to :attr:`~PPro.ci`. After calling, :attr:`~PPro.conf_above` will contain the confidence (in percent) that the association number at that lag is *above* the asymptotic association number. (The confidence of being below is 100 - conf_above) For minor variations in conf_above to be meaningful, a *large* number of bootstraps is required. (Rougly, 1000 to be meaningful to the nearest percent; 10000 to be meaningful to a tenth of a percent.) A conf_above of 100 usually indicates an insufficient sample size to resolve, *not* perfect certainty. Note also that a 95% chance of being above indicates an exclusion from the *90%* confidence interval! Parameters ========== inter : float percentage confidence interval to calculate n_boots : int, optional number of bootstrap iterations to run seed : int, optional seed for the random number generator. If not specified, Python code will use numpy's RNG and its current seed; C code will seed from the clock. Warnings ======== If ``seed`` is specified, results may not be reproducible between systems with different sizes for C long type. Note that 64-bit Windows uses a 32-bit long and so results will be the same between 64 and 32-bit Windows, but not between 64-bit Windows and other 64-bit operating systems. If ``seed`` is not specified, results are not reproducible anyhow. """ lags = self.lags ci_low = np.empty([len(lags)]) ci_high = np.empty([len(lags)]) conf_above = np.empty([len(lags)]) long_size = ctypes.sizeof(ctypes.c_long) * 8 if seed != None: np.random.seed(seed) minseed = -2 ** (long_size - 1) maxseed = 2 ** (long_size - 1) - 1 #randint used to be system-size signed integer only. #so used that and cast to the required unsigned later #cast does not lose entropy: negative numbers map to high positives. #For reproducibility, keep doing that even though dtype #kwarg now available. lag_seeds = np.random.randint(minseed, maxseed, [len(lags)]) newtype = np.dtype('u' + str(lag_seeds.dtype)) lag_seeds = np.require(lag_seeds, dtype=newtype) if lib.have_libspacepy == False: for i in range(len(lags)): if seed != None: np.random.seed(lag_seeds[i]) ci_low[i], ci_high[i], conf_above[i]= boots_ci( self.n_assoc[i, :], n_boots, inter, np.add.reduce, seed=None, target=self.asympt_assoc) else: perc_low = (100.-inter)/2. #set confidence interval perc_high = inter + perc_low dtype = 'int' + str(long_size) assoc_totals = np.empty([len(lags), n_boots], dtype=dtype, order='C') if seed == None: clock_seed = ctypes.c_int(1) lag_seeds = np.empty([len(lags)], dtype=dtype) else: clock_seed = ctypes.c_int(0) def thread_targ(start, size): lib.aa_ci( self.n_assoc[start:start+size].ctypes.data_as(lib.ulptr), assoc_totals[start:start+size].ctypes.data_as(lib.ulptr), size, len(self.process1), n_boots, lag_seeds[start:start+size].ctypes.data_as(lib.ulptr), clock_seed) tb.thread_job(len(lags), 0, thread_targ) for i in range(len(lags)): assoc_totals[i, :].sort() ci_low[i], ci_high[i] = np.percentile( assoc_totals[i, :], (perc_low,perc_high)) conf_above[i] = 100.0 - value_percentile(assoc_totals[i, :], self.asympt_assoc) self.ci = [ci_low, ci_high] self.conf_above = conf_above return None