class GaussianTheta(PhaseSpace): """Longitudinal Gaussian particle phase space distribution.""" def __init__(self, sigma_theta, sigma_dE, is_accepted=None, generator_seed=None): self.sigma_theta = sigma_theta self.sigma_dE = sigma_dE self.is_accepted = is_accepted self.random_state = RandomState() self.random_state.seed(generator_seed) def generate(self, beam): beam.theta = self.sigma_theta * self.random_state.randn(beam.n_macroparticles) beam.delta_E = self.sigma_dE * self.random_state.randn(beam.n_macroparticles) if self.is_accepted: self._redistribute(beam) def _redistribute(self, beam): n = beam.n_macroparticles theta = beam.theta.copy() delta_E = beam.delta_E.copy() for i in xrange(n): while not self.is_accepted(theta[i], delta_E[i]): theta[i] = self.sigma_theta * self.random_state.randn() delta_E[i] = self.sigma_dE * self.random_state.randn() beam.theta = theta beam.delta_E = delta_E
class GaussianX(PhaseSpace): """Horizontal Gaussian particle phase space distribution.""" def __init__(self, sigma_x, sigma_xp, generator_seed=None): """Initiates the horizontal beam coordinates to the given Gaussian shape. """ self.sigma_x = sigma_x self.sigma_xp = sigma_xp self.random_state = RandomState() self.random_state.seed(generator_seed) @classmethod def from_optics(cls, alpha_x, beta_x, epsn_x, betagamma, generator_seed=None): """Initialise GaussianX from the given optics functions. beta_x is given in meters and epsn_x in micrometers. """ sigma_x = np.sqrt(beta_x * epsn_x * 1e-6 / betagamma) sigma_xp = sigma_x / beta_x return cls(sigma_x, sigma_xp, generator_seed) def generate(self, beam): beam.x = self.sigma_x * self.random_state.randn(beam.n_macroparticles) beam.xp = self.sigma_xp * self.random_state.randn(beam.n_macroparticles)
class RandomGenerator(object): def __init__(self, seed=None): self._random = RandomState(seed=seed) def seed(self, seed): self._random.seed(seed) def random(self): return self._random.rand() def randint(self, a, b=None): if b is None: b = a a = 0 r = self._random.randint(a, high=b, size=1) return r[0] def sample(self, population, k): if k == 0: return [] return list(self._random.choice(population, size=k, replace=False)) def __getattr__(self, attr): return getattr(self._random, attr) def __getstate__(self): return {'_random': self._random} def __setstate__(self, d): self._random = d['_random'] def uniform(self, low=0.0, high=1.0, size=None): return self._random.uniform(low, high, size)
def wrapper(n: int, trend: Literal["n", "c", "ct", "ctt"], b: int, rng_seed: int = 0) -> ndarray: """ Wraps and blocks the main simulation so that the maximum amount of memory can be controlled on multi processor systems when executing in parallel """ rng = RandomState() rng.seed(rng_seed) remaining = b res = zeros(b) finished = 0 block_size = int(2**20.0 * MAX_MEMORY_SIZE / (8.0 * n)) for _ in range(0, b, block_size): if block_size < remaining: count = block_size else: count = remaining st = finished en = finished + count res[st:en] = adf_simulation(n, trend, count, rng) finished += count remaining -= count return res
class BulkRandomGenerator(object): """Bulk generator of random integers for tournament seeding and reproducibility. Bulk generation of random values is more efficient. Use this class like a generator.""" def __init__(self, seed=None, batch_size: int = 1000): self._random_generator = RandomState() self._random_generator.seed(seed) self._ints = None self._batch_size = batch_size self._index = 0 self._fill_ints() def _fill_ints(self): # Generate more random values. Store as a list since generators # cannot be pickled. self._ints = self._random_generator.randint(low=0, high=2**32 - 1, size=self._batch_size, dtype="uint64") self._index = 0 def __next__(self): try: x = self._ints[self._index] except IndexError: self._fill_ints() x = self._ints[self._index] self._index += 1 return x
class GaussianY(PhaseSpace): """Vertical Gaussian particle phase space distribution.""" def __init__(self, sigma_y, sigma_yp, generator_seed=None): """Initiates the vertical beam coordinates to the given Gaussian shape. """ self.sigma_y = sigma_y self.sigma_yp = sigma_yp self.random_state = RandomState() self.random_state.seed(generator_seed) @classmethod def from_optics(cls, alpha_y, beta_y, epsn_y, betagamma, generator_seed=None): """Initialise GaussianY from the given optics functions. beta_y is given in meters and epsn_y in micrometers. """ sigma_y = np.sqrt(beta_y * epsn_y * 1e-6 / betagamma) sigma_yp = sigma_y / beta_y return cls(sigma_y, sigma_yp, generator_seed) def generate(self, beam): beam.y = self.sigma_y * self.random_state.randn(beam.n_macroparticles) beam.yp = self.sigma_yp * self.random_state.randn(beam.n_macroparticles)
def rws_test(): size = 10000 selection = 1000 random_state = RandomState() probs = random_state.uniform(size=size) probs /= sum(probs) random_state.seed(5) def standard_method(): t.tic() result = [] cum_probs = np.cumsum(probs) for _ in range(selection): r = random_state.random() for i in range(size): if r <= cum_probs[i]: result.append(i) break return result def numpy_method(): return random_state.choice(size, size=selection, replace=True, p=probs) t = TicToc() t.tic() result_standard_method = standard_method() elp_std = t.tocvalue(restart=True) result_numpy_method = numpy_method() elp_np = t.tocvalue() print('standard: {}'.format(elp_std)) print('numpy: {}'.format(elp_np)) print(result_numpy_method) print(result_standard_method)
def seed(self, seed=None): """ Seed the generator. seed can be an integer, an array (or other sequence) of integers of any length, or None. If seed is None, then RandomState will try to read data from /dev/urandom (or the Windows analogue) if available or seed from the clock otherwise. """ RandomState.seed(self, seed) self.initial_seed = seed
class GaussianZ(PhaseSpace): """Longitudinal Gaussian particle phase space distribution.""" def __init__(self, sigma_z, sigma_dp, is_accepted=None, generator_seed=None): """Initiates the longitudinal beam coordinates to a given Gaussian shape. If the argument is_accepted is set to the is_in_separatrix(z, dp, beam) method of a RFSystems object (or similar), macroparticles will be initialised until is_accepted returns True. """ self.sigma_z = sigma_z self.sigma_dp = sigma_dp self.is_accepted = is_accepted self.random_state = RandomState() self.random_state.seed(generator_seed) @classmethod def from_optics(cls, beta_z, epsn_z, p0, is_accepted=None, generator_seed=None): """Initialise GaussianZ from the given optics functions. For the argument is_accepted see __init__. """ sigma_z = np.sqrt(beta_z*epsn_z/(4*np.pi) * e/p0) sigma_dp = sigma_z / beta_z return cls(sigma_z, sigma_dp, is_accepted, generator_seed) def generate(self, beam): beam.z = self.sigma_z * self.random_state.randn(beam.n_macroparticles) beam.dp = self.sigma_dp * self.random_state.randn(beam.n_macroparticles) if self.is_accepted: self._redistribute(beam) def _redistribute(self, beam): n = beam.n_macroparticles z = beam.z.copy() dp = beam.dp.copy() mask_out = ~self.is_accepted(z, dp) while mask_out.any(): n_gen = np.sum(mask_out) z[mask_out] = self.sigma_z * self.random_state.randn(n_gen) dp[mask_out] = self.sigma_dp * self.random_state.randn(n_gen) mask_out = ~self.is_accepted(z, dp) print 'Reiterate on non-accepted particles' # for i in xrange(n): # while not self.is_accepted(z[i], dp[i]): # z[i] = self.sigma_z * self.random_state.randn() # dp[i] = self.sigma_dp * self.random_state.randn() beam.z = z beam.dp = dp
def test_segmentation(): PRNG = RandomState() PRNG2 = RandomState() if args.seed > 0: PRNG.seed(args.seed) PRNG2.seed(args.seed) transform = Compose( [ [ColorJitter(prob=0.75), None], Merge(), Expand((0.8, 1.5)), RandomCompose([ # RandomResize(1, 1.5), RandomRotate(10), RandomShift(0.1) ]), Scale(300), # ElasticTransform(100), RandomCrop(300), HorizontalFlip(), Split([0, 3], [3, 6]), #[SubtractMean(mean=VOC.MEAN), None], ], PRNG, border='constant', fillval=VOC.MEAN, anchor_index=3) voc_dataset = VOCSegmentation(root=args.root, image_set=[('2007', 'trainval')], transform=transform, instance=False) viz = Viz() results = [] count = 0 i = PRNG2.choice(len(voc_dataset)) for _ in range(1000): img, target = voc_dataset[i] img2 = viz.blend_segmentation(img, target) con = np.hstack([img, target, img2]) results.append(con) cv2.imshow('result', con[..., ::-1]) c = cv2.waitKey(500) if c == 27 or c == ord('q'): # ESC / 'q' break elif c == ord('c') or count >= 3: count = 0 i = PRNG2.choice(len(voc_dataset)) count += 1
def test_random_choice(): """nestle.random_choice() is designed to mimic np.random.choice(), for numpy < v1.7.0. In cases where we have both, test that they agree. """ rstate = RandomState(0) p = rstate.rand(10) p /= p.sum() for seed in range(10): rstate.seed(seed) i = rstate.choice(10, p=p) rstate.seed(seed) j = nestle.random_choice(10, p=p, rstate=rstate) assert i == j
def pseudorandom(sequence, seed=None): ''' Returns a randomly selected element from the sequence. ''' # We need to create a stand-alone generator that cannot be affected by other # parts of the code that may require random data (e.g. noise). from numpy.random import RandomState state = RandomState() state.seed(seed) n = len(sequence) while True: i = state.randint(0, n) yield sequence[i]
def test_bboxes(): PRNG = RandomState() PRNG2 = RandomState() if args.seed > 0: PRNG.seed(args.seed) PRNG2.seed(args.seed) transform = Compose( [ [ColorJitter(prob=0.5)], # or write [ColorJitter(), None] BoxesToCoords(), HorizontalFlip(), Expand((1, 4), prob=0.5), ObjectRandomCrop(), Resize(300), CoordsToBoxes(), #[SubtractMean(mean=VOC.MEAN)], ], PRNG, mode=None, fillval=VOC.MEAN, outside_points='clamp') viz = Viz() voc_dataset = VOCDetection(root=args.root, image_set=[('2007', 'trainval')], keep_difficult=True, transform=transform) results = [] count = 0 i = PRNG2.choice(len(voc_dataset)) for _ in range(100): img, boxes, labels = voc_dataset[i] if len(labels) == 0: continue img = viz.draw_bbox(img, boxes, labels, True) results.append(img) cv2.imshow('0', img[:, :, ::-1]) c = cv2.waitKey(500) if c == 27 or c == ord('q'): # ESC / 'q' break elif c == ord('c') or count >= 5: count = 0 i = PRNG2.choice(len(voc_dataset)) count += 1
def wrapper(nobs: int, b: int, trend: str = "c", max_memory: int = 1024) -> np.ndarray: """ A wrapper around the main simulation that runs it in blocks so that large simulations can be run without constructing very large arrays and running out of memory. """ rng = RandomState() rng.seed(0) memory = max_memory * 2**20 b_max_memory = memory // 8 // nobs b_max_memory = max(b_max_memory, 1) remaining = b results = np.zeros(b) now = dt.datetime.now() time_fmt = "{0:d}:{1:0>2d}:{2:0>2d}" msg = "trend {0}, {1} reps remaining, " + "elapsed {2}, remaining {3}" while remaining > 0: b_eff = min(remaining, b_max_memory) completed = b - remaining results[completed:completed + b_eff] = simulate_kpss(nobs, b_eff, trend=trend, rng=rng) remaining -= b_max_memory elapsed = (dt.datetime.now() - now).total_seconds() expected_remaining = max(0, remaining) * (elapsed / (b - remaining)) m, s = divmod(int(elapsed), 60) h, m = divmod(m, 60) elapsed_fmt = time_fmt.format(h, m, s) m, s = divmod(int(expected_remaining), 60) h, m = divmod(m, 60) expected_remaining_fmt = time_fmt.format(h, m, s) print( msg.format(trend, max(0, remaining), elapsed_fmt, expected_remaining_fmt)) return results
def pseudorandom(sequence, c=np.inf, key=None, seed=None): ''' Returns a randomly selected element from the sequence. Parameters ---------- {common_docstring} seed : int Seed for random number generator. ''' # We need to create a stand-alone generator that cannot be affected by # other parts of the code that may require random data (e.g. noise). state = RandomState() state.seed(seed) n = len(sequence) cycle = 0 while cycle < c: i = state.randint(0, n) yield sequence[i] cycle += 1
def shuffled_set(sequence, c=np.inf, key=None, seed=None): ''' Returns a randomly selected element from the sequence and removes it from the sequence. Once the sequence is exhausted, repopulate list with the original sequence. Parameters ---------- {common_docstring} seed : int Seed for random number generator. ''' cycle = 0 state = RandomState() state.seed(seed) while cycle < c: indices = list(range(len(sequence))) state.shuffle(indices) for i in indices: yield sequence[i] cycle += 1
def wrapper(nobs, b, trend='c', max_memory=250): """ A wrapper around the main simulation that runs it in blocks so that large simulations can be run without constructing very large arrays and running out of memory. """ rng = RandomState() rng.seed(0) memory = max_memory * 2 ** 20 b_max_memory = memory // 8 // nobs b_max_memory = max(b_max_memory, 1) remaining = b results = np.zeros(b) while remaining > 0: b_eff = min(remaining, b_max_memory) completed = b - remaining results[completed:completed + b_eff] = \ simulate_kpss(nobs, b_eff, trend=trend, rng=rng) remaining -= b_max_memory return results
def wrapper(nobs, b, trend='c', max_memory=250): """ A wrapper around the main simulation that runs it in blocks so that large simulations can be run without constructing very large arrays and running out of memory. """ rng = RandomState() rng.seed(0) memory = max_memory * 2**20 b_max_memory = memory // 8 // nobs b_max_memory = max(b_max_memory, 1) remaining = b results = np.zeros(b) while remaining > 0: b_eff = min(remaining, b_max_memory) completed = b - remaining results[completed:completed + b_eff] = \ simulate_kpss(nobs, b_eff, trend=trend, rng=rng) remaining -= b_max_memory return results
def simulate_kpss(nobs, B, trend='c', rng=None): """ Simulated the KPSS test statistic for nobs observations, performing B replications. """ if rng is None: rng = RandomState() rng.seed(0) standard_normal = rng.standard_normal e = standard_normal((nobs, B)) z = np.ones((nobs, 1)) if trend == 'ct': z = add_trend(z, trend='t') zinv = np.linalg.pinv(z) trend_coef = zinv.dot(e) resid = e - z.dot(trend_coef) s = np.cumsum(resid, axis=0) lam = np.mean(resid ** 2.0, axis=0) kpss = 1 / (nobs ** 2.0) * np.sum(s ** 2.0, axis=0) / lam return kpss
def wrapper(n, trend, b, seed=0): """ Wraps and blocks the main simulation so that the maximum amount of memory can be controlled on multi processor systems when executing in parallel """ rng = RandomState() rng.seed(seed) remaining = b res = zeros(b) finished = 0 block_size = int(2 ** 20.0 * MAX_MEMORY_SIZE / (8.0 * n)) for j in range(0, b, block_size): if block_size < remaining: count = block_size else: count = remaining st = finished en = finished + count res[st:en] = adf_simulation(n, trend, count, rng) finished += count remaining -= count return res
class Streams(object): def __init__(self, startscape_seed): self.startscape_rand = RandomState() self.startscape_rand.seed(startscape_seed) ##esta funcion hace que todos los delays sena cero # def generate_startscape_rand(self,members): # if members['kids']==0 and members['olds']==0: # stratscape_vals=np.arange(1) # startscape_prob=[1] # elif members['kids']>0 and members['olds']==0: # stratscape_vals=np.arange(1) # startscape_prob=[1] # elif members['kids']==0 and members['olds']>0: # stratscape_vals=np.arange(1) # startscape_prob=[1] # else: # stratscape_vals=np.arange(1) # startscape_prob=[1] # return(self.startscape_rand.choice(stratscape_vals,p=startscape_prob)) #esta funcion hace que todos los delays sean segun una funcion de dist def generate_startscape_rand(self, members): if members['kids'] == 0 and members['olds'] == 0: stratscape_vals = np.arange(2, 10) startscape_prob = (0.2, 0.3, 0.3, 0.15, 0.05, 0.0, 0.0, 0.0) elif members['kids'] > 0 and members['olds'] == 0: stratscape_vals = np.arange(2, 10) startscape_prob = (0.0, 0.1, 0.15, 0.30, 0.3, 0.15, 0.0, 0.0) elif members['kids'] == 0 and members['olds'] > 0: stratscape_vals = np.arange(2, 10) startscape_prob = (0.0, 0.0, 0.0, 0.1, 0.3, 0.3, 0.15, 0.15) else: stratscape_vals = np.arange(2, 10) startscape_prob = (0.0, 0.0, 0.0, 0.0, 0.2, 0.3, 0.3, 0.2) return (self.startscape_rand.choice(stratscape_vals, p=startscape_prob))
def wrapper(nobs, b, trend='c', max_memory=1024): """ A wrapper around the main simulation that runs it in blocks so that large simulations can be run without constructing very large arrays and running out of memory. """ rng = RandomState() rng.seed(0) memory = max_memory * 2 ** 20 b_max_memory = memory // 8 // nobs b_max_memory = max(b_max_memory, 1) remaining = b results = np.zeros(b) now = dt.datetime.now() time_fmt = '{0:d}:{1:0>2d}:{2:0>2d}' msg = 'trend {0}, {1} reps remaining, ' + \ 'elapsed {2}, remaining {3}' while remaining > 0: b_eff = min(remaining, b_max_memory) completed = b - remaining results[completed:completed + b_eff] = \ simulate_kpss(nobs, b_eff, trend=trend, rng=rng) remaining -= b_max_memory elapsed = (dt.datetime.now() - now).total_seconds() expected_remaining = max(0, remaining) * (elapsed / (b - remaining)) m, s = divmod(int(elapsed), 60) h, m = divmod(m, 60) elapsed = time_fmt.format(h, m, s) m, s = divmod(int(expected_remaining), 60) h, m = divmod(m, 60) expected_remaining = time_fmt.format(h, m, s) print(msg.format(trend, max(0, remaining), elapsed, expected_remaining)) return results
class IIDBootstrap(object): """ Bootstrap using uniform resampling Parameters ---------- args Positional arguments to bootstrap kwargs Keyword arguments to bootstrap Attributes ---------- index : array The current index of the bootstrap data : tuple Two-element tuple with the pos_data in the first position and kw_data in the second (pos_data, kw_data) pos_data : tuple Tuple containing the positional arguments (in the order entered) kw_data : dict Dictionary containing the keyword arguments random_state : RandomState RandomState instance used by bootstrap Notes ----- Supports numpy arrays and pandas Series and DataFrames. Data returned has the same type as the input date. Data entered using keyword arguments is directly accessibly as an attribute. Examples -------- Data can be accessed in a number of ways. Positional data is retained in the same order as it was entered when the bootstrap was initialized. Keyword data is available both as an attribute or using a dictionary syntax on kw_data. >>> from arch.bootstrap import IIDBootstrap >>> from numpy.random import standard_normal >>> y = standard_normal((500, 1)) >>> x = standard_normal((500,2)) >>> z = standard_normal(500) >>> bs = IIDBootstrap(x, y=y, z=z) >>> for data in bs.bootstrap(100): ... bs_x = data[0][0] ... bs_y = data[1]['y'] ... bs_z = bs.z """ def __init__(self, *args, **kwargs): self.random_state = RandomState() self._initial_state = self.random_state.get_state() self._args = args self._kwargs = kwargs if args: self._num_items = len(args[0]) elif kwargs: key = list(kwargs.keys())[0] self._num_items = len(kwargs[key]) all_args = list(args) all_args.extend([v for v in itervalues(kwargs)]) for arg in all_args: if len(arg) != self._num_items: raise ValueError("All inputs must have the same number of " "elements in axis 0") self._index = np.arange(self._num_items) self._parameters = [] self._seed = None self.pos_data = args self.kw_data = kwargs self.data = (args, kwargs) self._base = None self._results = None self._studentized_results = None self._last_func = None self._name = 'IID Bootstrap' for key, value in iteritems(kwargs): attr = getattr(self, key, None) if attr is None: self.__setattr__(key, value) else: raise ValueError(key + ' is a reserved name') def __str__(self): repr = self._name repr += '(no. pos. inputs: ' + str(len(self.pos_data)) repr += ', no. keyword inputs: ' + str(len(self.kw_data)) + ')' return repr def __repr__(self): return self.__str__()[:-1] + ', ID: ' + hex(id(self)) + ')' def _repr_html(self): html = '<strong>' + self._name + '</strong>(' html += '<strong>no. pos. inputs</strong>: ' + str(len(self.pos_data)) html += ', <strong>no. keyword inputs</strong>: ' + \ str(len(self.kw_data)) html += ', <strong>ID</strong>: ' + hex(id(self)) + ')' return html @property def index(self): """ Returns the current index of the bootstrap """ return self._index def get_state(self): """ Gets the state of the bootstrap's random number generator Returns ------- state : RandomState state vector Array containing the state """ return self.random_state.get_state() def set_state(self, state): """ Sets the state of the bootstrap's random number generator Parameters ---------- state : RandomState state vector Array containing the state """ return self.random_state.set_state(state) def seed(self, value): """ Seeds the bootstrap's random number generator Parameters ---------- value : int Integer to use as the seed """ self._seed = value self.random_state.seed(value) return None def reset(self, use_seed=True): """ Resets the bootstrap to either its initial state or the last seed. Parameters ---------- use_seed : bool, optional Flag indicating whether to use the last seed if provided. If False or if no seed has been set, the bootstrap will be reset to the initial state. Default is True """ self._index = np.arange(self._num_items) self._resample() self.random_state.set_state(self._initial_state) if use_seed and self._seed is not None: self.seed(self._seed) return None def bootstrap(self, reps): """ Iterator for use when bootstrapping Parameters ---------- reps : int Number of bootstrap replications Example ------- The key steps are problem dependent and so this example shows the use as an iterator that does not produce any output >>> from arch.bootstrap import IIDBootstrap >>> import numpy as np >>> bs = IIDBootstrap(np.arange(100), x=np.random.randn(100)) >>> for posdata, kwdata in bs.bootstrap(1000): ... # Do something with the positional data and/or keyword data ... pass .. note:: Note this is a generic example and so the class used should be the name of the required bootstrap Notes ----- The iterator returns a tuple containing the data entered in positional arguments as a tuple and the data entered using keywords as a dictionary """ for _ in range(reps): indices = np.asarray(self.update_indices()) self._index = indices yield self._resample() def conf_int(self, func, reps=1000, method='basic', size=0.95, tail='two', extra_kwargs=None, reuse=False, sampling='nonparametric', std_err_func=None, studentize_reps=1000): """ Parameters ---------- func : callable Function the computes parameter values. See Notes for requirements reps : int, optional Number of bootstrap replications method : string, optional One of 'basic', 'percentile', 'studentized', 'norm' (identical to 'var', 'cov'), 'bc' (identical to 'debiased', 'bias-corrected'), or 'bca' size : float, optional Coverage of confidence interval tail : string, optional One of 'two', 'upper' or 'lower'. reuse : bool, optional Flag indicating whether to reuse previously computed bootstrap results. This allows alternative methods to be compared without rerunning the bootstrap simulation. Reuse is ignored if reps is not the same across multiple runs, func changes across calls, or method is 'studentized'. sampling : string, optional Type of sampling to use: 'nonparametric', 'semi-parametric' (or 'semi') or 'parametric'. The default is 'nonparametric'. See notes about the changes to func required when using 'semi' or 'parametric'. extra_kwargs : dict, optional Extra keyword arguments to use when calling func and std_err_func, when appropriate std_err_func : callable, optional Function to use when standardizing estimated parameters when using the studentized bootstrap. Providing an analytical function eliminates the need for a nested bootstrap studentize_reps : int, optional Number of bootstraps to use in the innter component when using the studentized bootstrap. Ignored when ``std_err_func`` is provided Returns ------- intervals : 2-d array Computed confidence interval. Row 0 contains the lower bounds, and row 1 contains the upper bounds. Each column corresponds to a parameter. When tail is 'lower', all upper bounds are inf. Similarly, 'upper' sets all lower bounds to -inf. Examples -------- >>> import numpy as np >>> def func(x): ... return x.mean(0) >>> y = np.random.randn(1000, 2) >>> from arch.bootstrap import IIDBootstrap >>> bs = IIDBootstrap(y) >>> ci = bs.conf_int(func, 1000) Notes ----- When there are no extra keyword arguments, the function is called .. code:: python func(*args, **kwargs) where args and kwargs are the bootstrap version of the data provided when setting up the bootstrap. When extra keyword arguments are used, these are appended to kwargs before calling func. The standard error function, if provided, must return a vector of parameter standard errors and is called .. code:: python std_err_func(params, *args, **kwargs) where ``params`` is the vector of estimated parameters using the same bootstrap data as in args and kwargs. The bootstraps are: * 'basic' - Basic confidence using the estimated parameter and difference between the estimated parameter and the bootstrap parameters * 'percentile' - Direct use of bootstrap percentiles * 'norm' - Makes use of normal approximation and bootstrap covariance estimator * 'studentized' - Uses either a standard error function or a nested bootstrap to estimate percentiles and the bootstrap covariance for scale * 'bc' - Bias corrected using estimate bootstrap bias correction * 'bca' - Bias corrected and accelerated, adding acceleration parameter to 'bc' method """ studentized = 'studentized' if not 0.0 < size < 1.0: raise ValueError('size must be strictly between 0 and 1') tail = tail.lower() if tail not in ('two', 'lower', 'upper'): raise ValueError('tail must be one of two-sided, lower or upper') studentize_reps = studentize_reps if method == studentized else 0 _reuse = False if reuse: # check conditions for reuse _reuse = (self._results is not None and len(self._results) == reps and method != studentized and self._last_func is func) if not _reuse: if reuse: import warnings warn = 'The conditions to reuse the previous bootstrap has ' \ 'not been satisfied. A new bootstrap will be used.' warnings.warn(warn, RuntimeWarning) self._construct_bootstrap_estimates( func, reps, extra_kwargs, std_err_func=std_err_func, studentize_reps=studentize_reps, # noqa sampling=sampling) base, results = self._base, self._results studentized_results = self._studentized_results std_err = [] if method in ('norm', 'var', 'cov', studentized): errors = results - results.mean(axis=0) std_err = np.sqrt(np.diag(errors.T.dot(errors) / reps)) if tail == 'two': alpha = (1.0 - size) / 2 else: alpha = (1.0 - size) percentiles = [alpha, 1.0 - alpha] norm_quantiles = stats.norm.ppf(percentiles) if method in ('norm', 'var', 'cov'): lower = base + norm_quantiles[0] * std_err upper = base + norm_quantiles[1] * std_err elif method in ('percentile', 'basic', studentized, 'debiased', 'bc', 'bias-corrected', 'bca'): values = results if method == studentized: # studentized uses studentized parameter estimates values = studentized_results if method in ('debiased', 'bc', 'bias-corrected', 'bca'): # bias corrected uses modified percentiles, but is # otherwise identical to the percentile method p = (results < base).mean(axis=0) b = stats.norm.ppf(p) b = b[:, None] if method == 'bca': nobs = self._num_items jk_params = _loo_jackknife(func, nobs, self._args, self._kwargs) u = (nobs - 1) * (jk_params - base) numer = np.sum(u**3, 0) denom = 6 * (np.sum(u**2, 0)**(3.0 / 2.0)) small = denom < (np.abs(numer) * np.finfo(np.float64).eps) if small.any(): message = 'Jackknife variance estimate {jk_var} is ' \ 'too small to use BCa' raise RuntimeError(message.format(jk_var=denom)) a = numer / denom a = a[:, None] else: a = 0.0 percentiles = stats.norm.cdf(b + (b + norm_quantiles) / (1.0 - a * (b + norm_quantiles))) percentiles = list(100 * percentiles) else: percentiles = [100 * p for p in percentiles] # Rescale if method not in ('bc', 'debiased', 'bias-corrected', 'bca'): ci = np.asarray(np.percentile(values, percentiles, axis=0)) lower = ci[0, :] upper = ci[1, :] else: k = values.shape[1] lower = np.zeros(k) upper = np.zeros(k) for i in range(k): lower[i], upper[i] = np.percentile(values[:, i], list(percentiles[i])) # Basic and studentized use the lower empirical quantile to # compute upper and vice versa. Bias corrected and percentile use # upper to estimate the upper, and lower to estimate the lower if method == 'basic': lower_copy = lower + 0.0 lower = 2.0 * base - upper upper = 2.0 * base - lower_copy elif method == studentized: lower_copy = lower + 0.0 lower = base - upper * std_err upper = base - lower_copy * std_err else: raise ValueError('Unknown method') if tail == 'lower': upper = np.zeros_like(base) upper.fill(np.inf) elif tail == 'upper': lower = np.zeros_like(base) lower.fill(-1 * np.inf) return np.vstack((lower, upper)) def clone(self, *args, **kwargs): """ Clones the bootstrap using different data. Parameters ---------- args Positional arguments to bootstrap kwargs Keyword arguments to bootstrap Returns ------- bs Bootstrap instance """ pos_arguments = copy.deepcopy(self._parameters) pos_arguments.extend(args) bs = self.__class__(*pos_arguments, **kwargs) if self._seed is not None: bs.seed(self._seed) return bs def apply(self, func, reps=1000, extra_kwargs=None): """ Applies a function to bootstrap replicated data Parameters ---------- func : callable Function the computes parameter values. See Notes for requirements reps : int, optional Number of bootstrap replications extra_kwargs : dict, optional Extra keyword arguments to use when calling func. Must not conflict with keyword arguments used to initialize bootstrap Returns ------- results : array reps by nparam array of computed function values where each row corresponds to a bootstrap iteration Notes ----- When there are no extra keyword arguments, the function is called .. code:: python func(params, *args, **kwargs) where args and kwargs are the bootstrap version of the data provided when setting up the bootstrap. When extra keyword arguments are used, these are appended to kwargs before calling func Examples -------- >>> import numpy as np >>> x = np.random.randn(1000,2) >>> from arch.bootstrap import IIDBootstrap >>> bs = IIDBootstrap(x) >>> def func(y): ... return y.mean(0) >>> results = bs.apply(func, 100) """ kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs) base = func(*self._args, **kwargs) try: num_params = base.shape[0] except: num_params = 1 results = np.zeros((reps, num_params)) count = 0 for pos_data, kw_data in self.bootstrap(reps): kwargs = _add_extra_kwargs(kw_data, extra_kwargs) results[count] = func(*pos_data, **kwargs) count += 1 return results def _construct_bootstrap_estimates(self, func, reps, extra_kwargs=None, std_err_func=None, studentize_reps=0, sampling='nonparametric'): # Private, more complicated version of apply self._last_func = func semi = parametric = False if sampling == 'parametric': parametric = True elif sampling == 'semiparametric': semi = True if extra_kwargs is not None: if any(k in self._kwargs for k in extra_kwargs): raise ValueError('extra_kwargs contains keys used for variable' ' names in the bootstrap') kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs) base = func(*self._args, **kwargs) num_params = 1 if np.isscalar(base) else base.shape[0] results = np.zeros((reps, num_params)) studentized_results = np.zeros((reps, num_params)) count = 0 for pos_data, kw_data in self.bootstrap(reps): kwargs = _add_extra_kwargs(kw_data, extra_kwargs) if parametric: kwargs['state'] = self.random_state kwargs['params'] = base elif semi: kwargs['params'] = base results[count] = func(*pos_data, **kwargs) if std_err_func is not None: std_err = std_err_func(results[count], *pos_data, **kwargs) studentized_results[count] = (results[count] - base) / std_err elif studentize_reps > 0: # Need new bootstrap of same type nested_bs = self.clone(*pos_data, **kw_data) # Set the seed to ensure reproducability seed = self.random_state.randint(2**31 - 1) nested_bs.seed(seed) cov = nested_bs.cov(func, studentize_reps, extra_kwargs=extra_kwargs) std_err = np.sqrt(np.diag(cov)) studentized_results[count] = (results[count] - base) / std_err count += 1 self._base = np.asarray(base) self._results = np.asarray(results) self._studentized_results = np.asarray(studentized_results) def cov(self, func, reps=1000, recenter=True, extra_kwargs=None): """ Compute parameter covariance using bootstrap Parameters ---------- func : callable Callable function that returns the statistic of interest as a 1-d array reps : int, optional Number of bootstrap replications recenter : bool, optional Whether to center the bootstrap variance estimator on the average of the bootstrap samples (True) or to center on the original sample estimate (False). Default is True. extra_kwargs: dict, optional Dictionary of extra keyword arguments to pass to func Returns ------- cov: array Bootstrap covariance estimator Notes ----- func must have the signature .. code:: python func(params, *args, **kwargs) where params are a 1-dimensional array, and `*args` and `**kwargs` are data used in the the bootstrap. The first argument, params, will be none when called using the original data, and will contain the estimate computed using the original data in bootstrap replications. This parameter is passed to allow parametric bootstrap simulation. Example ------- Bootstrap covariance of the mean >>> from arch.bootstrap import IIDBootstrap >>> import numpy as np >>> def func(x): ... return x.mean(axis=0) >>> y = np.random.randn(1000, 3) >>> bs = IIDBootstrap(y) >>> cov = bs.cov(func, 1000) Bootstrap covariance using a function that takes additional input >>> def func(x, stat='mean'): ... if stat=='mean': ... return x.mean(axis=0) ... elif stat=='var': ... return x.var(axis=0) >>> cov = bs.cov(func, 1000, extra_kwargs={'stat':'var'}) .. note:: Note this is a generic example and so the class used should be the name of the required bootstrap """ self._construct_bootstrap_estimates(func, reps, extra_kwargs) base, results = self._base, self._results if recenter: errors = results - np.mean(results, 0) else: errors = results - base return errors.T.dot(errors) / reps def var(self, func, reps=1000, recenter=True, extra_kwargs=None): """ Compute parameter variance using bootstrap Parameters ---------- func : callable Callable function that returns the statistic of interest as a 1-d array reps : int, optional Number of bootstrap replications recenter : bool, optional Whether to center the bootstrap variance estimator on the average of the bootstrap samples (True) or to center on the original sample estimate (False). Default is True. extra_kwargs: dict, optional Dictionary of extra keyword arguments to pass to func Returns ------- var : 1-d array Bootstrap variance estimator Notes ----- func must have the signature .. code:: python func(params, *args, **kwargs) where params are a 1-dimensional array, and `*args` and `**kwargs` are data used in the the bootstrap. The first argument, params, will be none when called using the original data, and will contain the estimate computed using the original data in bootstrap replications. This parameter is passed to allow parametric bootstrap simulation. Example ------- Bootstrap covariance of the mean >>> from arch.bootstrap import IIDBootstrap >>> import numpy as np >>> def func(x): ... return x.mean(axis=0) >>> y = np.random.randn(1000, 3) >>> bs = IIDBootstrap(y) >>> variances = bs.var(func, 1000) Bootstrap covariance using a function that takes additional input >>> def func(x, stat='mean'): ... if stat=='mean': ... return x.mean(axis=0) ... elif stat=='var': ... return x.var(axis=0) >>> variances = bs.var(func, 1000, extra_kwargs={'stat': 'var'}) .. note:: Note this is a generic example and so the class used should be the name of the required bootstrap """ self._construct_bootstrap_estimates(func, reps, extra_kwargs) base, results = self._base, self._results if recenter: errors = results - np.mean(results, 0) else: errors = results - base return (errors**2).sum(0) / reps def update_indices(self): """ Update indices for the next iteration of the bootstrap. This must be overridden when creating new bootstraps. """ return self.random_state.randint(self._num_items, size=self._num_items) def _resample(self): """ Resample all data using the values in _index """ indices = self._index pos_data = [] for values in self._args: if isinstance(values, (pd.Series, pd.DataFrame)): pos_data.append(values.iloc[indices]) else: pos_data.append(values[indices]) named_data = {} for key, values in iteritems(self._kwargs): if isinstance(values, (pd.Series, pd.DataFrame)): named_data[key] = values.iloc[indices] else: named_data[key] = values[indices] setattr(self, key, named_data[key]) self.pos_data = pos_data self.kw_data = named_data self.data = (pos_data, named_data) return self.data
class RestrictedBoltzmannMachine: # #initialization methods # def __init__(self, visibleLayer, hiddenLayer, temperature=1., sigma=0.01, visibleProportionOn=None, parameterFile=None, rng=None, rngState=None, rngSeed=1337): self.visibleLayer = visibleLayer self.hiddenLayer = hiddenLayer self.temperature = temperature self.beta = 1. / self.temperature if rng is None: self.rng = RandomState(seed=rngSeed) if rngState is not None: self.rng.set_state(rngState) else: self.rng = rng if parameterFile is None: self.initializeVisibleBias(visibleProportionOn=visibleProportionOn) self.initializeHiddenBias() self.initializeWeights(sigma) else: self.loadParameterFile(parameterFile) self.visibleStep = np.zeros_like(self.visibleBias) self.hiddenStep = np.zeros_like(self.hiddenBias) self.weightStep = np.zeros_like(self.weights) def initializeVisibleBias(self, visibleProportionOn=None): if visibleProportionOn is None: self.visibleBias = np.zeros(self.visibleLayer.shape[-1]) else: #find minimum non-zero value nonZeroMin = visibleProportionOn[visibleProportionOn > 0.].min() visibleProportionOn[np.isclose( visibleProportionOn, 0.)] = nonZeroMin + (0. - nonZeroMin) / 2. nonOneMax = visibleProportionOn[visibleProportionOn < 1.].max() print(f'nonZeroMin, nonOneMax: {nonZeroMin}, {nonOneMax}') visibleProportionOn[np.isclose( visibleProportionOn, 1.)] = nonOneMax + (1. - nonOneMax) / 2. self.visibleBias = np.log(visibleProportionOn / (1. - visibleProportionOn)) #self.visibleBias = 1. / visibleProportionOn def initializeHiddenBias(self): self.hiddenBias = np.zeros(self.hiddenLayer.shape[-1]) def initializeWeights(self, sigma=0.01): self.weights = self.rng.normal(scale=sigma, size=(self.visibleLayer.shape[-1], self.hiddenLayer.shape[-1])) def loadParameterFile(self, parameterFile): lv = self.visibleLayer.shape[-1] lh = self.hiddenLayer.shape[-1] visibleSlice = slice(0, lv) hiddenSlice = slice(lv, lv + lh) weightsSlice = slice(lv + lh, lv + lh + lv * lh) fileContents = [float(line.strip()) for line in parameterFile] self.visibleBias = np.array(fileContents[visibleSlice]) self.hiddenBias = np.array(fileContents[hiddenSlice]) self.weights = np.array(fileContents[weightsSlice]).reshape((lv, lh)) def dumpParameterFile(self, parameterFile): #assert type(parameterFile) == file for theta in self.visibleBias: print(f'{theta}', file=parameterFile) for theta in self.hiddenBias: print(f'{theta}', file=parameterFile) for theta in self.weights.flatten(): print(f'{theta}', file=parameterFile) # #prediction methods # def hiddenConditionalProbabilities(self): conditionalEnergies = self.hiddenBias + self.visibleLayer @ self.weights return logistic(self.beta * conditionalEnergies) def visibleConditionalProbabilities(self): conditionalEnergies = self.visibleBias + self.hiddenLayer @ self.weights.T return logistic(self.beta * conditionalEnergies) def rollBernoulliProbabilities(self, probabilities): rolls = self.rng.uniform(size=probabilities.shape) return (rolls < probabilities).astype(np.float_) def gibbsSample(self, hiddenUnitsStochastic=False): #compute hidden activation probabilities given visible hiddenLayerProbabilities = self.hiddenConditionalProbabilities() if hiddenUnitsStochastic: self.hiddenLayer = self.rollBernoulliProbabilities( hiddenLayerProbabilities) else: self.hiddenLayer = hiddenLayerProbabilities #compute visible activation probabilities given hidden self.visibleLayer = self.visibleConditionalProbabilities() return self.visibleLayer, hiddenLayerProbabilities # #training methods # def computePCDGradient(self, miniBatch, miniFantasyBatch, nCDSteps=1, l1Coefficient=None, l2Coefficient=None): visibleDataMean, hiddenDataMean, weightDataMean = self.computePCDGradientPositiveHalf( miniBatch) visibleModelMean, hiddenModelMean, weightModelMean, newFantasy = \ self.computePCDGradientNegativeHalf(miniFantasyBatch, nCDSteps=nCDSteps) #compute gradients & return visibleGradient = visibleDataMean - visibleModelMean hiddenGradient = hiddenDataMean - hiddenModelMean weightGradient = weightDataMean - weightModelMean if l1Coefficient is not None: weightGradient -= l1Coefficient * np.sign(self.weights) if l2Coefficient is not None: weightGradient -= l2Coefficient * self.weights return visibleGradient, hiddenGradient, weightGradient, newFantasy def computePCDGradientPositiveHalf(self, miniBatch): self.visibleLayer = miniBatch hiddenLayerProbabilities = self.hiddenConditionalProbabilities() return self.computeParameterMeans(miniBatch, hiddenLayerProbabilities) def computePCDGradientNegativeHalf(self, miniFantasyBatch, nCDSteps=1): self.visibleLayer = miniFantasyBatch for _ in range(nCDSteps): visibleOut, hiddenOut = self.gibbsSample() visibleModelMean, hiddenModelMean, weightModelMean = \ self.computeParameterMeans(visibleOut, hiddenOut) #store for possible use by adversary self.visibleModel = visibleOut self.hiddenModel = hiddenOut self.visibleModelMean = visibleModelMean self.hiddenModelMean = hiddenModelMean self.weightModelMean = weightModelMean return visibleModelMean, hiddenModelMean, weightModelMean, visibleOut def computeParameterMeans(self, visible, hidden): visibleMean = visible.mean(axis=0) hiddenMean = hidden.mean(axis=0) weightMean = (visible[..., :, None] * hidden[..., None, :]).mean(axis=0) #weightMean = visibleMean[..., :, None] * hiddenMean[..., None, :] * visible.shape[0] return visibleMean, hiddenMean, weightMean def updateParameters(self): self.visibleBias += self.visibleStep self.hiddenBias += self.hiddenStep self.weights += self.weightStep def updateParametersSGD(self, miniBatch, miniFantasyBatch, learningRate, nCDSteps=1, l1Coefficient=None, l2Coefficient=None, verbose=False): visibleGradient, hiddenGradient, weightGradient, newFantasy = \ self.computePCDGradient(miniBatch, miniFantasyBatch, nCDSteps=nCDSteps, l1Coefficient=l1Coefficient, l2Coefficient=l2Coefficient) #hack to stop changing the *Step pointer; req'd for # current implementation of histograms of *Steps self.visibleStep += learningRate * visibleGradient - self.visibleStep self.hiddenStep += learningRate * hiddenGradient - self.hiddenStep self.weightStep += learningRate * weightGradient - self.weightStep self.updateParameters() if verbose is True: print('{:.3f}\t{:.3f}\t{:.3f}'.format(self.visibleStep.mean(), self.hiddenStep.mean(), self.weightStep.mean())) return newFantasy def updateParametersAdam(self, miniBatch, miniFantasyBatch, adams, nCDSteps=1, l1Coefficient=None, l2Coefficient=None, verbose=False): visibleGradient, hiddenGradient, weightGradient, newFantasy = \ self.computePCDGradient(miniBatch, miniFantasyBatch, nCDSteps=nCDSteps, l1Coefficient=l1Coefficient, l2Coefficient=l2Coefficient) #hack to stop changing the *Step pointer; req'd for # current implementation of histograms of *Steps self.visibleStep += adams['visible'].computeAdamStep( visibleGradient) - self.visibleStep self.hiddenStep += adams['hidden'].computeAdamStep( hiddenGradient) - self.hiddenStep self.weightStep += adams['weights'].computeAdamStep( weightGradient) - self.weightStep self.updateParameters() if verbose is True: print('{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}'.format( visibleGradient.mean(), hiddenGradient.mean(), weightGradient.mean(), self.visibleStep.mean(), self.hiddenStep.mean(), self.weightStep.mean())) return newFantasy def updateParametersAdamAdversarial(self, miniBatch, miniFantasyBatch, adams, gamma, adversary, nCDSteps=1, l1Coefficient=None, l2Coefficient=None, verbose=False): visibleGradient, hiddenGradient, weightGradient, newFantasy = \ self.computePCDGradient(miniBatch, miniFantasyBatch, nCDSteps=nCDSteps, l1Coefficient=l1Coefficient, l2Coefficient=l2Coefficient) visisbleGradientAd, hiddenGradientAd, weightGradientAd = self.computeAdversaryGradient( adversary) #hack to stop changing the *Step pointer; req'd for # current implementation of histograms of *Steps self.visibleStep += adams['visible'].computeAdamStep( visibleGradient + visibleGradientAd) - self.visibleStep self.hiddenStep += adams['hidden'].computeAdamStep( hiddenGradient + hiddenGradientAd) - self.hiddenStep self.weightStep += adams['weights'].computeAdamStep( weightGradient + weightGradientAd) - self.weightStep self.updateParameters() if verbose is True: print('{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}'.format( visibleGradient.mean(), hiddenGradient.mean(), weightGradient.mean(), self.visibleStep.mean(), self.hiddenStep.mean(), self.weightStep.mean())) return newFantasy def computeAdversaryGradient(self, adversary): adversaryPredictions = adversary.predict(miniFantasyBatch) adversaryPredictionVariation = adversaryPredictions - adversaryPredictions.mean( ) visibleModelVariation = self.visibleModel - self.visibleModelMean hiddenModelVariation = self.hiddenModel - self.hiddenModelMean weightModelVariation = self.visibleModel[ ..., :, None] * self.hiddenModel[..., None, :] - self.weightModelMean visibleGradient = (adversaryPredictionVariation[:, None] * visibleModelVariation).mean(axis=0) hiddenGradient = (adversaryPredictionVariation[:, None] * hiddenModelVariation).mean(axis=0) weightGradient = (adversaryPredictionVariation[:, None, None] * weightModelVariation).mean(axis=0) return visibleGradient, hiddenGradient, weightGradient # #analysis methods # def computeReconstructionError(self, miniBatch, nCDSteps=1): self.visibleLayer = miniBatch for _ in range(nCDSteps): visibleOut, hiddenOut = self.gibbsSample() #visibleOut = self.rollBernoulliProbabilities(visibleOut) sampleError = miniBatch - visibleOut meanSquaredError = (sampleError * sampleError).mean() return meanSquaredError def computeFreeEnergy(self, miniBatch=None): if miniBatch is not None: self.visibleLayer = miniBatch internalFE = -self.visibleLayer @ self.visibleBias externalConditionalE = self.hiddenBias + self.visibleLayer @ self.weights externalFE = -np.log(1. + np.exp(externalConditionalE)).sum(axis=1) return internalFE + externalFE def computeMeanFreeEnergy(self, miniBatch=None): return self.computeFreeEnergy(miniBatch).mean() # #miscellaneous methods # def copy(self): copyRBM = RestrictedBoltzmannMachine(np.copy(self.visibleLayer), np.copy(self.hiddenLayer), temperature=self.temperature, rngState=self.rng.get_state()) copyRBM.visibleBias = np.copy(self.visibleBias) copyRBM.hiddenBias = np.copy(self.hiddenBias) copyRBM.weights = np.copy(self.weights) copyRBM.visibleStep = np.copy(self.visibleStep) copyRBM.hiddenStep = np.copy(self.hiddenStep) copyRBM.weightStep = np.copy(self.weightStep) return copyRBM def storeHiddenActivationsOnMiniBatch(self, miniBatch, hiddenUnits=None): self.visibleLayer = miniBatch self.hiddenConditionalProbabilities() return np.copy(self.hiddenLayer) if hiddenUnits is None \ else np.copy(self.hiddenLayer[..., hiddenUnits]) def setRngSeed(self, rngSeed): self.rng.seed(rngSeed) def __len__(self): return self.visibleLayer.shape[-1], self.hiddenLayer.shape[-1]
def __init__(self, properties, originalPatternDataSet, patternDataSet, patternDataSetProperties, seedAddition): self.properties = properties # Validating the configuration. seed = properties.get('seed') inputsPerPattern = self.properties.get('inputsPerPattern') target = properties.get('target') minDistance = properties.get('minDistance') Utils.assertInt('Random seed', seed) Utils.assertInt('Inputs per pattern', inputsPerPattern, 1) if target not in ('originalPatterns', 'transformedPatterns'): raise Exception('Target must be "originalPatterns" or "transformedPatterns"') if minDistance == None: raise Exception('Minimum distance not defined.') mean, meanIsProportion = Utils.assertProportionOrFloat('Mean minimum distance', minDistance.get('mean'), 0, 0) stdev, stdevIsProportion = Utils.assertProportionOrFloat('Standard deviation of minimum distance', minDistance.get('stdev'), 0, 0) if (meanIsProportion or stdevIsProportion) and 'distance' not in patternDataSetProperties: raise Exception('Trying to create an input data set proportional to the distance of the pattern data set, but pattern data set has not defined distance.') if meanIsProportion: mean *= patternDataSetProperties['distance']['mean'] if stdevIsProportion: stdev *= patternDataSetProperties['distance']['stdev'] # Initializing the random generator. randomGenerator = RandomState() randomGenerator.seed(seed + seedAddition) # Generating the inputs. patternSize = patternDataSetProperties.get('patternSize') self.originalInputs = [] self.targetDataSet = originalPatternDataSet if target == 'originalPatterns' else patternDataSet if stdev == 0: allFlips = [min(patternSize, int(round(mean)))] * (len(self.targetDataSet) * inputsPerPattern) else: allFlips = map(lambda x: max(0, min(patternSize, int(round(x)))), randomGenerator.normal(mean, stdev, len(self.targetDataSet) * inputsPerPattern)) for i in xrange(len(self.targetDataSet)): insertedInputs = set() j = 0 while j < inputsPerPattern: inputVector = list(self.targetDataSet[i]) flips = allFlips[i + j] componentsToFlip = range(patternSize) for k in xrange(flips): componentIndex = randomGenerator.randint(0, patternSize - k) component = componentsToFlip.pop(componentIndex) inputVector[component] = int(not inputVector[component]) inputVector = tuple(inputVector) if inputVector not in insertedInputs: self.originalInputs.append(inputVector) insertedInputs.add(inputVector) j += 1 # Applying transformations. if target == 'originalPatterns': self.inputs = Utils.transformDataSet(randomGenerator, self.originalInputs, patternDataSetProperties) else: self.inputs = self.originalInputs
def __init__(self, properties, seedAddition): self.properties = properties # Validating the configuration. seed = properties.get('seed') dataSetSize = self.properties.get('dataSetSize') patternSize = properties.get('patternSize') extraBits = properties.get('extraBits') distance = properties.get('distance') scale = properties.get('scale') Utils.assertInt('Random seed', seed) Utils.assertInt('Pattern data set size', dataSetSize, 1) Utils.assertInt('Pattern size', patternSize, 1) if extraBits != None: Utils.assertInt('Number of extra bits', extraBits.get('number'), 1) if extraBits.get('values') not in (0, 1, 'random', 'randomFixed'): raise Exception( 'Extra bits values must be 0, 1, "random" or "randomFixed"' ) if distance != None: Utils.assertFloat('Mean distance', distance.get('mean'), 0) Utils.assertFloat('Standard deviation of distance', distance.get('stdev'), 0) if scale != None: if scale.get('type') == '1D': Utils.assertInt('Scale factor for 1D', scale.get('factor'), 1) elif scale.get('type') == '2D': Utils.assertInt('Scale pattern width', scale.get('patternWidth'), 1) Utils.assertInt('Scale pattern height', scale.get('patternHeight'), 1) Utils.assertInt('Scale width factor', scale.get('widthFactor'), 1) Utils.assertInt('Scale height factor', scale.get('heightFactor'), 1) if scale.get('patternWidth') * scale.get( 'patternHeight') != patternSize: raise Exception( 'Scale pattern width and pattern height do not fit with the given pattern size' ) else: raise Exception('Unknown scale type ' + scale.get('type')) # Initializing the random generator. randomGenerator = RandomState() randomGenerator.seed(seed + seedAddition) # Generating the patterns. self.originalPatterns = Utils.generateDataSet( randomGenerator, dataSetSize, patternSize, self.computeError if 'distance' in self.properties else None, MainPatternGenerator.MAX_TRIES) # Applying transformations. self.patterns = Utils.transformDataSet(randomGenerator, self.originalPatterns, self.properties)
class PRNG(object): """A Pseudorandom Number Generator that yields samples from the set of source blocks using the RSD degree distribution described above. """ def __init__(self, K, delta, c, np = None, enc_num=1,enc_key=[2**32,1103,12345]): """Provide RSD parameters on construction # K is the number of segments # delta and c are parameters that determine the distribution #np is to use numpy random number generator which is faster """ self.K = float(K) self.K_int = int(K) self.delta = delta self.c = c S = self.calc_S() cdf, Z = gen_rsd_cdf(K, S, delta) self.cdf = cdf self.Z = Z #self.inter = inter.interp1d(np.concatenate(([0], cdf)), range(0,K+1)) self.np_rand = RandomState(1) self.np = np self.state = 1 self.enc_num=enc_num self.enc_key=enc_key def calc_S(self): """ A helper function to calculate S, the expected number of degree=1 nodes """ K = self.K S = self.c * log(self.K/self.delta) * sqrt(self.K) self.S = S return S def get_S(self): return self.S def set_seed(self, seed): """Reset the state of the PRNG to the given seed """ self.state = seed def get_state(self): """Returns current state of the linear PRNG """ return self.state def get_src_blocks_wrap(self, seed=None): #a wrapper function to get source blocks. #if np flag is on, it will use a numpy-based method. #otherwise, it will use the native python random function. #np is faster but in compatible with python random which implemented in previous versions. if self.enc_num: return self.get_src_blocks_enc(seed) elif self.np: return self.get_src_blocks_np(seed) else: return self.get_src_blocks(seed) def get_src_blocks_enc(self,seed=None): if seed: self.state = seed blockseed = self.state random.seed(self.state) d = self._sample_d() nums = LCG(blockseed,0,self.K_int,d,self.enc_key) return blockseed, d, nums def get_src_blocks(self, seed=None): """Returns the indices of a set of `d` source blocks sampled from indices i = 1, ..., K-1 uniformly, where `d` is sampled from the RSD described above. """ if seed: self.state = seed blockseed = self.state random.seed(self.state) d = self._sample_d() nums = random.sample(range(self.K_int), d) return blockseed, d, nums def get_src_blocks_np(self, seed=None): """Returns the indices of a set of `d` source blocks sampled from indices i = 1, ..., K-1 uniformly, where `d` is sampled from the RSD described above. Uses numpy for speed. """ if seed: self.state = seed blockseed = self.state self.np_rand.seed(self.state) d = self._sample_d_np() nums = self.np_rand.randint(0, self.K_int, d) return blockseed, d, nums def _sample_d_np(self): """Samples degree given the precomputed distributions above. Uses numpy for speed""" p = self.np_rand.rand() for ix, v in enumerate(self.cdf): if v > p: return ix + 1 return ix + 1 def _sample_d_inter(self): """Samples degree given the precomputed distributions above using interpolation """ p = random.random() return int(self.inter(p))+1 #faster than math.ceil albeit can return the wrong value... # Samples from the CDF of mu def _sample_d(self): """Samples degree given the precomputed distributions above""" p = random.random() for ix, v in enumerate(self.cdf): if v > p: return ix + 1 return ix + 1
class RestrictedBoltzmannModel(Model): STOP_DELTA_WEIGHTS_EPOCHS = 1000 DELTA_WEIGHTS_NORM_N = 1000 def __init__(self, properties, seedAddition): # Validating the configuration. self.seed = properties.get('seed') self.hiddenNeurons = properties.get('hiddenNeurons') self.learningRate = properties.get('learningRate') self.weightDecay = properties.get('weightDecay') self.momentum = properties.get('momentum') self.batchSize = properties.get('batchSize') Utils.assertInt('Random seed', self.seed) Utils.assertInt('Hidden neurons', self.hiddenNeurons, 0) Utils.assertFloat('Learning rate', self.learningRate, 0) Utils.assertFloat('Weight decay', self.weightDecay, 0) Utils.assertBoolean('Momentum', self.momentum) Utils.assertInt('Batch size', self.batchSize, 0) self.batchSize = int(self.batchSize) # Preparing the random generator. self.randomGenerator = RandomState() self.seed += seedAddition # Public methods. A model must implement these methods in order to use it in Main.py def train(self, patternDataSet, patternDataSetProperties): # Initializing the random generator. self.randomGenerator.seed(self.seed) visibleNeurons = len(patternDataSet[0]) # Initializing weights according to Hinton, G. (2010). A practical guide to training restricted Boltzmann machines. Momentum, 9(1), 926. self.weights = self.randomGenerator.normal(0, 0.01, (visibleNeurons, self.hiddenNeurons)) self.visibleOffset = numpy.zeros((1, visibleNeurons)) self.hiddenOffset = numpy.zeros((1, self.hiddenNeurons)) for i in xrange(visibleNeurons): p = sum(map(lambda x: x[i], patternDataSet)) / float(len(patternDataSet)) if p == 0: self.visibleOffset[0, i] = -2.0 elif p == 1: self.visibleOffset[0, i] = 2.0 else: self.visibleOffset[0, i] = numpy.log(p / (1 - p)) # Training. deltaWeights = numpy.zeros((visibleNeurons, self.hiddenNeurons)) deltaVisibleOffset = numpy.zeros((1, visibleNeurons)) deltaHiddenOffset = numpy.zeros((1, self.hiddenNeurons)) epochs = 0 lesserDeltaWeightsNormSummation = None deltaWeightsSummation = 0 lastDeltaWeightsNorm = 0 deltaWeightsStopCounter = 0 bestWeights = None bestVisibleOffset = None bestHiddenOffset = None while True: for i in xrange(0, len(patternDataSet), self.batchSize): visibleBatch0 = numpy.asarray(patternDataSet[i:i + self.batchSize]) # Positive phase: sample hiddenBatch0 from batch hiddenBatch0Probability = self.activation(visibleBatch0, self.weights, self.hiddenOffset) # Sample hiddenBatch0: we binarize the results of the activation function hiddenBatch0 = (hiddenBatch0Probability > self.randomGenerator.rand(self.batchSize, self.hiddenNeurons)) # Negative phase: calculate visibleBatch1 from our hiddenBatch0 samples # Hinton, 2010 recommends not to binarize when updating visible units # No need to sample the last hidden states because they're not used visibleBatch1 = self.activation(hiddenBatch0, self.weights.T, self.visibleOffset) hiddenBatch1Probability = self.activation(visibleBatch1, self.weights, self.hiddenOffset) # Momentum is set as specified by Hinton's practical guide if self.momentum: momentum = 0.5 if epochs > 5 else 0.9 # TODO: > 5 o >= 5? else: momentum = 1 # Update increments of weights and offsets deltaWeights = deltaWeights * momentum + (self.learningRate / self.batchSize) * (numpy.dot(visibleBatch0.T, hiddenBatch0Probability) - numpy.dot(visibleBatch1.T, hiddenBatch1Probability)) - self.weightDecay * self.weights deltaVisibleOffset = deltaVisibleOffset * momentum + (self.learningRate / self.batchSize) * (numpy.sum(visibleBatch0, axis=0) - numpy.sum(visibleBatch1, axis=0)) deltaHiddenOffset = deltaHiddenOffset * momentum + (self.learningRate / self.batchSize) * (numpy.sum(hiddenBatch0Probability, axis=0) - numpy.sum(hiddenBatch1Probability, axis=0)) # Update weights and offsets self.weights += deltaWeights self.visibleOffset += deltaVisibleOffset self.hiddenOffset += deltaHiddenOffset # Stop if looks like it is oscillating. deltaWeightsNorm = numpy.linalg.norm(deltaWeights) deltaWeightsSummation += deltaWeightsNorm if epochs > RestrictedBoltzmannModel.DELTA_WEIGHTS_NORM_N: deltaWeightsSummation -= lastDeltaWeightsNorm if lesserDeltaWeightsNormSummation == None or deltaWeightsSummation < lesserDeltaWeightsNormSummation: lesserDeltaWeightsNormSummation = deltaWeightsSummation deltaWeightsStopCounter = 0 bestWeights, bestVisibleOffset, bestHiddenOffset = self.copyState() else: deltaWeightsStopCounter += 1 if deltaWeightsStopCounter >= RestrictedBoltzmannModel.STOP_DELTA_WEIGHTS_EPOCHS: break lastDeltaWeightsNorm = deltaWeightsNorm epochs += 1 self.weights = bestWeights self.visibleOffset = bestVisibleOffset self.hiddenOffset = bestHiddenOffset return {'trainingEpochs': epochs + 1} def recall(self, visibleValues): # Initializing the random generator. self.randomGenerator.seed(self.seed) # Computing the output. hiddenProbability = self.activation(visibleValues, self.weights, self.hiddenOffset) hiddenValues = (hiddenProbability > self.randomGenerator.rand(1, self.hiddenNeurons)) result = self.activation(hiddenValues, self.weights.T, self.visibleOffset) return tuple(map(lambda x: int(x), result[0] > 0.5)), 1 # Private methods. def copyState(self): return numpy.copy(self.weights), numpy.copy(self.visibleOffset), numpy.copy(self.hiddenOffset) def activation(self, batch, weights, offset): return expit(numpy.dot(batch, weights) + offset)
from networkx import nx from numpy.random import RandomState from pymoreg.structure.graph_generation import random_dag seeds = list(range(101, 200)) rng = RandomState() variables = list(range(15)) for i, s in enumerate(seeds): print('Test {0}/{1}'.format(i + 1, len(seeds))) rng.seed(s) g = random_dag(variables, rng=rng) nx_g = nx.from_scipy_sparse_matrix(g, create_using=nx.DiGraph()) real_edges = g.edges() edges = real_edges.copy() while len(edges): if set(edges) != set(nx_g.edges()): raise ValueError('Error in graph created with seed {2}\n Expected edges: {0}\n got: {1}' .format(s, nx_g.edges(), g.edge())) for v in variables: real_parents = set(nx_g.predecessors(v)) parents = set(g.parents(v)) if parents != real_parents: raise ValueError('Error in graph created with seed {2}\n Expected parents for node {3}: {0}\n got: {1}' .format(s, real_parents, parents, v))
class SchmitzWMSystem(HistBasedWMSystem): """ This class implements Schmitz et al.'s watermarking method, which is adapted for the use with audio data. A watermark is embedded by forming the histogram of the samples' amplitudes and considering the relation of pairs of bins (a_i, b_i), which are pseudo-randomly selected. The embedding and detection key is the seed for the PRNG. The embedding of the i-th bit is realized by swapping the i-th bin pair, if the relation resembles not the wanted proportion. """ # Class constants specifing minimal and maximal step size MIN_STEP = -9 MAX_STEP = 9 # Specifies the keys for the parameter dictionary PARAM_KEYS = ['step'] def __init__(self, step=(-9, 9)): """Constructs and initializes a SchmitzWMSystem object. Keyword arguments: :param step: a tuple or scalar, which specifies the step size :return: None """ self._min_step = None self._max_step = None self._prng = None self.set_params(step=step) self._is_init = True @property def min_step(self): return self._min_step @min_step.setter def min_step(self, value): if value < SchmitzWMSystem.MIN_STEP or value >= 0: raise ValueError('Step exceeds the suggested range') self._min_step = value @property def max_step(self): return self._max_step @max_step.setter def max_step(self, value): if value <= 0 or value > SchmitzWMSystem.MAX_STEP: raise ValueError('Step exceeds the suggested range') self._max_step = value def set_params(self, **kwargs): """Sets the parameters of the watermarking system. This is the implementation of an abstract convenience method, which is specified by a superclass and uses **kwargs, so that it's possible to set a multiple parameters without changing the others to default (in this case kind of redundant. Keyword arguments: :param step: a tuple or scalar, which specifies the step size :return: None """ if 'step' in kwargs: step = kwargs['step'] if not isinstance(step, tuple): self.min_step = abs(step) * -1 self.max_step = abs(step) else: self.min_step, self.max_step = step # Init PRNG self._prng = RandomState() def get_params(self): """Returns all parameters as a dictionary :return: params: a dict containing all parameters """ return dict( zip(SchmitzWMSystem.PARAM_KEYS, [(self.min_step, self.max_step)])) def embed_watermark(self, samples, w, **kwargs): """Embeds the specified mark in the samples. :param samples: the signal to be marked :param w: the watermark :param kwargs: the embedding key (in this case the seed) :return: None """ assert self._is_init, 'WM system NOT initialized' if 'key' in kwargs: # Retrieve seed from kwargs seed = kwargs['key'] else: raise TypeError('Required parameter \'key\'(seed) is missing') print('=============================================') print('Embedding ', w, ' via Schmitz\' method') print('---------------------------------------------') # Make a deep copy of the samples to mark samples_to_mark = np.empty_like(samples) samples_to_mark[:] = samples # Check, if multi channel audio has to be marked if samples.ndim > 1: length, num_channels = samples.shape # Check if watermark matches channel layout wmk = self.check_wmk_alignment(num_channels, w) seed = self.check_key_alignment(num_channels, seed) bin_pairs = np.array([]) for i in range(0, num_channels): print('in channel #', i) print('---------------------------------------------') samples_to_mark[:, i], bp = self._embed_watermark_single_channel( samples_to_mark[:, i], wmk[i], seed[i]) # returns copy if i == 0: bin_pairs = bp else: bin_pairs = np.stack((bin_pairs, bp), axis=0) return samples_to_mark, bin_pairs else: print('in channel #0') print('---------------------------------------------') return self._embed_watermark_single_channel( samples_to_mark, w, seed) def _embed_watermark_single_channel(self, samples_to_mark, w, seed): """Embeds the watermark in a mono signal. :param samples_to_mark: the samples to mark :param w: the watermark :param seed: the key - more precise: the seed for the PRNG :param mean: the original mean of the signal :return: marked_samples, bin_pairs: marked copy of the samples and the bin pairs (detection key) """ hist, bins = self.generate_standard_histogram(samples_to_mark) # Construct a sequence of pseudo-randomly selected bin pairs self._prng.seed(seed) bin_pairs = self._generate_bin_pairs(hist, bins, len(w), seed) bins_to_swap = [] for i, bit in enumerate(w): id1 = bin_pairs[i][0] id2 = bin_pairs[i][1] if bit == 1: if hist[id1] < hist[id2]: # do nothing continue else: # Store the bins to swap bins_to_swap.append((id1, id2)) if bit == 0: if hist[id1] > hist[id2]: continue else: # Store the bins to swap bins_to_swap.append((id1, id2)) # Swap the bins marked_samples = self.swap_bins_at_once(samples_to_mark, bins, bins_to_swap) return marked_samples, bin_pairs @staticmethod def swap_bins_at_once(samples, bins, bins_to_swap): """Swaps the bin pairs, that are specified by bins_to_swap, which contains the bin indices. If a pair (id1, id2) has to be swapped, every sample x, which falls in bin[id1] is modified, so that it falls into bin[id2]. :param samples: the samples to be modified :param bins: the bin edges :param bins_to_swap: a list of pairs to be swapped :return:samples_to_mark: the modified samples """ bin_width = abs(bins[1] - bins[0]) # make a deep copy of the samples to mark samples_to_mark = np.empty_like(samples) samples_to_mark[:] = samples for i, x in enumerate(samples): for j, ids in enumerate(bins_to_swap): if bins[ids[0]] <= x < bins[ids[0] + 1]: samples_to_mark[i] += ((ids[1] - ids[0]) * bin_width) break elif bins[ids[1]] <= x < bins[ids[1] + 1]: samples_to_mark[i] -= ((ids[1] - ids[0]) * bin_width) break return samples_to_mark def extract_watermark(self, samples, **kwargs): """Extracts the watermark from the given samples by means of the given key. :param samples: the marked samples :param kwargs: various parameters; key, syn are required, orig_mean had to be set :return: w: the extracted mark (if samples is a multi-channel signal, then a list of marks is returned) """ assert self._is_init, 'WM system NOT initialized' if 'key' in kwargs: key = kwargs['key'] else: raise TypeError('Required parameter \'key\' is missing') if 'length' in kwargs: wmk_len = kwargs['length'] else: raise TypeError('Required parameter \'length\' is missing') print('=============================================') print("Detecting watermark") print('---------------------------------------------') # Multi-channel signal if samples.ndim > 1: length, num_channels = samples.shape # Check, that for each channel exist a seed (or use the same for # all) key = self.check_key_alignment(num_channels, key) w = np.empty((num_channels, 1), dtype=np.int) for i in range(0, num_channels): print('in channel #', i) print('---------------------------------------------') # Extract watermark w_i = self.extract_watermark_single_channel( samples[:, i], key[i], wmk_len) # Store extracted watermark in 2d output array if i == 0: w = np.array(w_i) else: w = np.vstack((w, w_i)) # Mono signal else: w = self.extract_watermark_single_channel(samples, key, wmk_len) return w def extract_watermark_single_channel(self, samples, key, length): """Extracts watermark from a marked mono signal. :param samples: the marked single channel signal :param key: the extraction key :param length: the number of bits to extract :return: w: the extracted mark """ hist, bins = self.generate_standard_histogram(samples) # Construct the same sequence of pseudo-randomly selected bin pairs # as on the embedder side self._prng.seed(key) bin_pairs = self._generate_bin_pairs(hist, bins, length, key) w = [] for i, p in enumerate(bin_pairs): id1 = p[0] id2 = p[1] if hist[id1] < hist[id2]: w.append(1) elif hist[id1] > hist[id2]: w.append(0) return np.array(w) def _gen_a_i(self, hist, bins, used_bins): """Generates the initial bin a for the i_th bit to embed. :param hist: The histogram of the signal (necessary to check for bin equality or emptiness :param bins: A list which specifies the edges of the histogram bins :param used_bins: A list of already used bins, that cannot be considered anymore :return: """ a_i = self._prng.randint(0, len(bins) - 1) while a_i in used_bins or hist[a_i] == 0: if a_i < len(bins) - 2: a_i += 1 else: return self._gen_a_i(hist, bins, used_bins) used_bins.append(a_i) return a_i def _gen_pair(self, hist, bins, used_bins): """ Generates a single bin pair. :param hist: The histogram of the signal (necessary to check for bin equality or emptiness :param bins: A list which specifies the edges of the histogram bins :param used_bins: a list of already used bins, that cannot be considered anymore :return: """ a_i = self._gen_a_i(hist, bins, used_bins) step = self._prng.randint(max(self.min_step, 0 - a_i), min(len(bins) - 1 - a_i, self.max_step) + 1) b_i = a_i + step if b_i not in used_bins and (hist[a_i] != hist[b_i]) and hist[b_i] > 0: used_bins.append(b_i) return a_i, b_i else: used_bins.remove(a_i) return self._gen_pair(hist, bins, used_bins) def _generate_bin_pairs(self, hist, bins, length, seed): """Constructs the bin pairs, whose relation is used to encode one watermark bit. This is done by using a seeded PRNG. :param hist: The histogram of the signal (necessary to check for bin equality or emptiness :param bins: A list which specifies the edges of the histogram bins :param length: The length of the watermark to be embedded :param seed: The seed for the PRNG :return: bin_pairs: A 2d list, which contains the drawn bin pairs """ self._prng = RandomState() self._prng.seed(seed) bin_pairs = np.empty((0, 2), dtype=np.int) ub = [] for i in range(0, length): a_i, b_i = self._gen_pair(hist, bins, ub) bin_pairs = np.append(bin_pairs, [[a_i, b_i]], axis=0) return bin_pairs
class StackedSAE(object): def __init__(self, INPUT=None, LABELS=None, verbose=False): self.OUTPUT_LAYER = None self.StackLayerInfo = {'LAYER' : [], 'OUTPUT' : self.OUTPUT_LAYER} self.SYSTEM_PARAMS = {'sections' : ['STACK', 'LAYER', 'SMAX_TUNE', 'MET_TUNE', 'LOG_TUNE', 'CPP_LIBRARY', 'FILEIO', 'SYSTEM'] } self.NUM_LAYS = {'WIN' : 0, 'WOUT': 0, 'BIN' : 0, 'BOUT': 0} self.DELETED_IDX = [] self.LAYER_TRAINING = None self.TRNG_DATA = None self.DATA = None self.TIMER = time.clock() self.TRNG_LABS = LABELS self.MET = None self.COST = None self.TUNING_REGIMENT = {} self.RANDO = RandomState() self.BATCHER = None self.CP = ConfigParser.SafeConfigParser(allow_no_value=True) self.CHECKS = {'config_loaded' : False, 'labels_loaded' : False, 'data_loaded' : False, 'lee_wants_rand_off' : False, 'verbose' : verbose } if INPUT is not None: self.DATA = INPUT self.IN_SHAPE = INPUT.shape def LoadStackConfig(self, PATH=None): ''' DESCRPT: A monster, thankfully this is the only call to the config fil of all the other classes IN ARGS: PATH ; strings : config file location NOTES: ''' if PATH == None: PATH = 'config.ini' try: self.CP.read(PATH) except: print "Config Name" raise IOError, 'Config file wasn\'t able to be read' GENERIC_DICT = {} for SECT in self.CP.sections(): GENERIC_DICT[SECT] = {} for OPTS in self.CP.options(SECT): val = self.CP.get(SECT, OPTS) if val == 'True': GENERIC_DICT[SECT][OPTS] = True elif val == 'False': print 'haeeyy' GENERIC_DICT[SECT][OPTS] = False else: try: GENERIC_DICT[SECT][OPTS] = int(val) except: try: GENERIC_DICT[SECT][OPTS] = float(val) except: try: GENERIC_DICT[SECT][OPTS] = val except: GENERIC_DICT[SECT][OPTS] = None for key in list(GENERIC_DICT): if key not in self.SYSTEM_PARAMS['sections']: self.SYSTEM_PARAMS['sections'].append(key) for key in self.SYSTEM_PARAMS['sections']: self.SYSTEM_PARAMS[key] = GENERIC_DICT[key] if self.TRNG_DATA is None: try: self.NewInput() self.CHECK['data_loaded'] = True except IOError: print 'WARNING: NO DATA LOADED' self.SYSTEM_PARAMS['LAYER']['disp'] = self.CHECKS['verbose'] self.CHECKS['config_loaded'] = True self.CHECKS['lee_wants_rand_off'] = self.SYSTEM_PARAMS['STACK']['lee_wants_rand_off'] if self.SYSTEM_PARAMS['STACK']['lee_wants_rand_off']: CONFIG_SEED = self.SYSTEM_PARAMS['STACK']['rand_seed_32bit'] self.RANDO.seed(seed=CONFIG_SEED) CD = 5 print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!' print '------------------------------------------------------' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! ' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! ' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!' for i in range(0, CD): print 'RESUMING IN ', CD + 1 - i, 'seconds!.' print 'The RNG STATE HAS BEEN SET!' print 'SEED SET TO CONFIG PARAM...' print '>>>>> rand_see_32bit : value ', CONFIG_SEED, '<<<<<<<<<' print 'PLEASE SET DETERMINISM PARAMETER TO FALSE BEFORE NON TESTING USE' print '------------------------------------------------------' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! ' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! ' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! ' print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!' else: self.RANDO.seed(seed=np.int32(time.time())) self.UpdateStackConfig() def LoadLabels(self, LABEL_TYPE='default', is_training=False, **kwargs): # # Method for various different ways to create labels # ARG : Options: # # LABEL_TYPE : {'default' | 'generic' } : returns generic labes # {'from_list'} : returns labels indicated bya list # {'from_array'} : returns array # {'from_file'} : tries to load labels from file #------------------------------------------------------------------------------------ if not self.CHECKS['data_loaded']: raise Error, 'Data aint loaded' if LABEL_TYPE == 'default' or LABEL_TYPE == 'generic': NUM_CLASSES = kwargs['num_classes'] NUM_LABS = kwargs['num_labs'] NU_LABS = np.ones((NUM_CLASSES,)).astype(np.int32) START = 0 for i in range(1, NUM_CLASSES): STOP = START + i * NUM_CLASSES NU_LABS[START:STOP] = i START = STOP elif LABEL_TYPE == 'from_list': NU_LABS = np.asarray(kwarg['labels']).astype(np.int32) elif LABEL_TYPE == 'from_array': NU_LABS = kwargs['labels'].astype(np.int32) elif LABEL_TYPE == 'from_file': try: LAB_PATH = kwargs['lab_file_path'] NU_LABS = pickle.load(LAB_PATH) except: raise Warning, 'PATH NAME NOT VALID' return if self.TRNG_LABS is not None: self.CHECKS['labels_loaded'] = True def ProduceFeatureVectors(self, DATA): # # DESCRPT: Returns the stacks final out put layer #----------------------------------------------------------------------------------- SCALE = self.SYSTEM_PARAMS['STACK']['output_scalar'] self.__EncodeInput(DATA, store_output=True) return np.asarray(self.OUTPUT_LAYER *SCALE, dtype=np.float32, order='c') def PreprocData(self, DATA, LABS): ''' Helper method that sets and cleans data before data training. Reduces number of highly correlated vectors (they should be independent) If non binary data, the data is normalized. ''' COLIN_ATTEMPTS = self.SYSTEM_PARAMS['STACK']['stoch_remove_attempts'] self.DELETED_IDX = RemoveColinearity(DATA, COLIN_ATTEMPTS) for i in reversed(self.DELETED_IDX): DATA = np.vstack((DATA[:i], DATA[i+1:])) LABS = np.concatenate((LABS[:i], LABS[i+1])) if np.all([[True for j in row if j==1 or j==0] for i in DATA]) : return DATA.astype(np.float32) else: return DATA/np.atleast_2d(np.sum(DATA,1)).T def StoreLayer(self, LAY): ''' DESCRPT: Updates self.Stacklayerinfo with the next layer ''' self.StackLayerInfo['LAYER'].append(LAY) def __EncodeInput(self, IN=None, store_output=False): ''' DESCRPT: Indiviual layers handle feeding forward data and they return their hidden layer for the next successive layer IN_ARG: IN : np.ndarray : 0 <= IN[i,j] <=1 store_output : bool : True | False ''' if IN is None: IN = self.TRNG_DATA # feed forward and store each layers non-activation layers, and its derivative for LAY in self.StackLayerInfo['LAYER']: if LAY['TYPE'] == 'Logistic': AIN = IN.dot(LAY['WIN']) AIN += LAY['BIN'] IN = Sigm(AIN) print 'LOG' elif LAY['TYPE'] == 'SoftMax': self.StackLayerInfo['OUTPUT'] = IN self.OUTPUT_LAYER = self.StackLayerInfo['OUTPUT'] print 'SMAX' def __DecodeInput(self, IN): ''' DESCRPT: Indiviual layers handle reconstructing data and they return their reconstructed layer for the next successive layer IN_ARG: IN : np.ndarray : 0 <= IN[i,j] <= 1 ''' # get method paramters # same steps as in EncodeInput()z HIDD = IN for LAY in reversed(self.StackLayerInfo['LAYER']): HIDD = Sigm(IN.dot(LAY['WOUT']) + LAY['BOUT']) return HIDD def PreTrainLayers(self, INPUT=None): ''' DESCRPT: Supervises he layer-wise SSAE construction PRECOND: None POSTCON: Stack is built consiting MAX_LAYER number SAE's IN ARGS: INPUT : np.npdarray : 0 <= INPUT[i,j] <= 1 RETURNS: None NOTES: ''' NUM_HIDD = self.SYSTEM_PARAMS['STACK']['num_hidden'] MIN_HIDD = self.SYSTEM_PARAMS['STACK']['min_hidden'] MAX_LAYER = self.SYSTEM_PARAMS['STACK']['max_layer'] DEC_HIDD_BY = self.SYSTEM_PARAMS['STACK']['decrement_num_hidden'] BASE_NOISE = self.SYSTEM_PARAMS['STACK']['base_noise_level'] DisplayItems('Pretraining Layers', self.SYSTEM_PARAMS['STACK'], verbose=self.CHECKS['verbose'], NumHidden=NUM_HIDD, Maximum_layers=MAX_LAYER, MinimumHidden=MIN_HIDD, DecRange=DEC_HIDD_BY) # checks if config file is loaded if not self.CHECKS['config_loaded']: self.LoadStackConfig() # checks if input was passed and if so # the training data is updated if INPUT is None: INPUT = self.TRNG_DATA self.IN_SHAPE = INPUT.shape else: self.TRNG_DATA = INPUT self.IN_SHAPE = INPUT.shape # timer for record keeping startTtime = time.clock() self.LAYER_TRAINING = Layer(self.SYSTEM_PARAMS['LAYER']) IN_SHAPE = INPUT.shape for L in range(0, MAX_LAYER): OUT_SHAPE = (IN_SHAPE[0], NUM_HIDD) [WIN, WOUT, BIN, BOUT, SHAPES] = self.LAYER_TRAINING.CreateLogLayer(IN_SHAPE, NUM_HIDD, self.RANDO) print "{0} | {1} | {2} << {3}".format('COST', 'AVGACT', 'KLD', 'ERR') THETA = self.LAYER_TRAINING.TrainSparseAE(WIN, WOUT, BIN, BOUT, INPUT) [TWIN, TWOUT, TBIN, TBOUT] = self.LAYER_TRAINING.FluffLayer(THETA.x, SHAPES) INPUT = Encode(INPUT, TWIN, TBIN) self.StackLayerInfo['LAYER'].append({'TYPE' : "Logistic", 'WIN' : TWIN, 'WOUT' : TWOUT, 'BIN' : TBIN, 'BOUT' : TBOUT, 'SHAPES' : SHAPES, 'IN_SHAPE' : IN_SHAPE, 'H_SHAPE' : OUT_SHAPE}) DisplayItems('Pretrain', self.SYSTEM_PARAMS['LAYER'], verbose=self.CHECKS['verbose'], Elapsed_epoch_Time=time.clock() - startTtime, Elapsed_Global_Time=time.clock() - self.TIMER) IN_SHAPE = OUT_SHAPE if NUM_HIDD == MIN_HIDD: break else: NEW_NH = NUM_HIDD - DEC_HIDD_BY NUM_HIDD = NEW_NH if NEW_NH >= MIN_HIDD else MIN_HIDD if self.SYSTEM_PARAMS['STACK']['tuning_algo'] == 'softmax_classifier': IN_SHAPE = INPUT.shape NUM_CLASSES = self.SYSTEM_PARAMS['STACK']['num_classes'] MAX_TUNE_EPS = self.SYSTEM_PARAMS['LAYER']['smax_epochs'] W_DECAY = self.SYSTEM_PARAMS['SMAX_TUNE']['smax_weight_decay'] NOISE_SIG = self.SYSTEM_PARAMS['LAYER']['epoch_noise_sigma'] DisplayItems('Pretrain Softmax', self.SYSTEM_PARAMS['LAYER'], verbose=self.CHECKS['verbose'], Elapsed_epoch_Time=time.clock() - startTtime, Elapsed_Global_Time=time.clock() - self.TIMER) if self.SYSTEM_PARAMS['STACK']['use_generic_labels'] or not self.CHECKS['labels_loaded']: self.LoadLabels('generic', num_classes=NUM_CLASSES, num_labs=INPUT.shape[0]) LABS = self.TRNG_LABS SMAXW = self.LAYER_TRAINING.CreateSoftMaxLayer(IN_SHAPE, NUM_CLASSES, self.RANDO) M, P = SMAXW.shape THETA = self.LAYER_TRAINING.TrainSoftMaxLayer(SMAXW, W_DECAY, NOISE_SIG, P, INPUT, LABS) WIN = THETA.x.reshape(M,P) self.StackLayerInfo['LAYER'].append({'TYPE' : 'SoftMax', 'WIN' : WIN, 'WSHAPE': (M,P)}) def TuneStack(self, DATA=None): ''' DESCRPT: Takes in put data and trains the entire stack as if it were a single layer PRECOND: Layers must be pretrainined POSTCON: A trainined stacked auto encoder IN ARGS: DATA : np.ndarray. : 0 <= DATA[i,j] <= 1 RETURNS: None NOTES: ''' # timer for record keeping TSstart = time.clock() # checks if config is loaded if not self.CHECKS['config_loaded']: self.LoadStackConfig() # checks that there is data to train to if DATA is None: INPUT = self.TRNG_DATA else: INPUT = DATA self.IN_SHAPE = INPUT.shape if self.SYSTEM_PARAMS['STACK']['tuning_algo'] == 'softmax_classifier': self.SMaxTuning() DisplayItems('Finished Tuning', self.SYSTEM_PARAMS['STACK'], verbose=self.CHECKS['verbose'], Elapsed_Training_Time=time.clock() - TSstart, Elapsed_Global_Time=time.clock() - self.TIMER) def SMaxTuning(self): # #----------------------------------------------- INPUT = self.TRNG_DATA NOISE_SIG = self.SYSTEM_PARAMS['SMAX_TUNE']['smax_noise_sigma'] W_DECAY = self.SYSTEM_PARAMS['SMAX_TUNE']['smax_weight_decay'] MAX_TUNE_EPS = self.SYSTEM_PARAMS['SMAX_TUNE']['smax_tune_epochs'] NUM_CLASSES = self.SYSTEM_PARAMS['STACK']['num_classes'] SYSPARM = [] SHAPES = [] for LAY in self.GenLayers(): W = LAY['WIN'] SHAPES.append(W.shape) SYSPARM.append(W.flatten()) if LAY.has_key('BIN'): B = LAY['BIN'] SHAPES.append(B.shape) SYSPARM.append(B.flatten()) SYSPARM = np.concatenate(SYSPARM) if self.SYSTEM_PARAMS['SMAX_TUNE']['use_batcher']: if self.BATCHER is None: self.BATCHER = Batcher() NUM_BATCHES = self.SYSTEM_PARAMS['BATCHER']['num_batches'] BATCHES, BAT_LABS = self.BATCHER.GetBatches(INPUT, self.RANDO) DisplayItems('Finetune BATCH SMax', self.SYSTEM_PARAMS['SMAX_TUNE'], num_classes=NUM_CLASSES, verbose=self.CHECKS['verbose'], Elapsed_Global_Time=time.clock() - self.TIMER) T = lambda x: TrainStackedSAEBatchAlgo(x, NUM_CLASSES, SHAPES, W_DECAY, NOISE_SIG, NUM_BATCHES, BATCHES, BAT_LABS) else: if self.SYSTEM_PARAMS['STACK']['use_generic_labels'] or not self.CHECKS['labels_loaded']: self.LoadLabels('generic', num_classes=NUM_CLASSES, num_labs=INPUT.shape[0]) LABS = self.TRNG_LABS DisplayItems('Finetune Online SMax', self.SYSTEM_PARAMS['SMAX_TUNE'], num_classes=NUM_CLASSES, Elapsed_Global_Time=time.clock() - self.TIMER, verbose=self.CHECKS['verbose']) print "{0} | {1} | {2} << {3}".format('COST','AVGACT', 'KLD','ERR') T = lambda x: TrainStackedSAEAlgo(x, NUM_CLASSES, SHAPES, W_DECAY, NOISE_SIG, INPUT, LABS) options_ = {'maxiter' : MAX_TUNE_EPS, 'gtol' : 1e-9 , 'disp' : self.CHECKS['verbose']} RES = mini(T, SYSPARM, method='L-BFGS-B', jac=True, options=options_) FLATWnB = RES.x NEW_SLI = [] for wb, lay in zip(self.GenRebuilt(FLATWnB, SHAPES),self.StackLayerInfo['LAYER']): LAY_DICT = {} LAY_DICT.update(lay) WIN,BIN = wb if BIN != []: LAY_DICT.update({'WIN':WIN, 'BIN':BIN}) else: LAY_DICT.update({'WIN':WIN}) NEW_SLI.append(LAY_DICT) self.StackLayerInfo['LAYER'] = [nu_sli for nu_sli in NEW_SLI] def MetricTuning(self): # #----------------------------------------------- pass def CostTuning(self): # #----------------------------------------------- pass def UpdateStack(self, DATA, sli_binary_path=None): # #----------------------------------------------- pass def GenLayers(self): # #----------------------------------------------- for LAY in self.StackLayerInfo['LAYER']: yield LAY def GenRebuilt(self, FLATWB, SHAPES): W, B = self.RebuildWandB(FLATWB, SHAPES) if len(W)>len(B): Ws_WITH_Bs = W[:len(B)] Ws_WITHOUT = W[len(B):] else: Ws_WITH_Bs = W Ws_WITHOUT = [] for i in range(0,len(W)): if i < len(B): yield Ws_WITH_Bs[i], B[i] else: yield Ws_WITHOUT, [] def RebuildWandB(self, FLAT_WandB, SHAPES): START =0 STOP =0 W_LAY = [] B_LAY = [] for shp in SHAPES: A,B = shp print A, B STOP += A*B if A==1: TB = FLAT_WandB[START:STOP] B_LAY.append(TB.reshape(A,B)) else: TW = FLAT_WandB[START:STOP] W_LAY.append(TW.reshape(A,B)) START = STOP return W_LAY, B_LAY def NewInput(self, DATA=None, LABS=None, PATH=None): ''' DESCRPT: PRECOND: POSTCON: IN ARGS: DATA: np.ndarray | None : New data to load RETURNS: NOTES: If called default: the function will load a data file with the name stored in the config file. Else wise, DATA is stored in the class parameter and the number of hidden units is adjusted ''' NUM_HIDD = self.SYSTEM_PARAMS['STACK']['num_hidden'] if DATA is None: if PATH is None: raise IOError, 'invalid path of <type None>' else: self.TRNG_DATA = LoadText(PATH) else: self.TRNG_DATA = DATA if NUM_HIDD >= np.size(self.TRNG_DATA, 1): self.SYSTEM_PARAMS['STACK']['num_hidden'] = np.size(self.TRNG_DATA, 1) - 1 if LABS is not None: self.TRNG_LABS = LABS self.CHECKS['data_loaded'] = True def UpdateStackConfig(self, SECTION=None, OPTION=None): ''' DESCRPT: PREVIOUSLY: Class Parameters were loading with-in themselves and were frustratingly difficult to interact with from out side. Plus they took up a lot of space LATER: I implement parameter dictionaries for individual class that would be passed back-and-forth from StackSAE to an instantiating class Not bad overall just tedious NOW: ONE dictionary contains all the parameters and behaves exacly like the old way but with way less tedium NOTE: This function backs up the latest version of the config file before writing the updated one. The back up as time stamped ''' import shutil import os # This keeps track of the last config file backed up and is a parameter in # FILEIO LAST_CONFIG_UPDATE_PATH = os.getcwd() LAST_CONFIG_UPDATE_PATH += self.SYSTEM_PARAMS['FILEIO']['config_backup_directory'] LAST_CONFIG_UPDATE_PATH += SaveFileTimeStamp() self.SYSTEM_PARAMS['FILEIO']['last_config_backup'] = LAST_CONFIG_UPDATE_PATH + '_config_bu.ini' shutil.copy2(os.getcwd() + '/config.ini', LAST_CONFIG_UPDATE_PATH) # The config parser instantiated with SSAE is updated if SECTION is not None: sect = list(SECTION) else: sect = self.SYSTEM_PARAMS['sections'] if OPTION is not None: opt = OPTION self.CP.set(sect, opt, self.SYSTEM_PARAMS[sect][opt]) else: for s in sect: for opt in list(self.SYSTEM_PARAMS[s]): self.CP.set(s, opt, str(self.SYSTEM_PARAMS[s][opt])) # new config file is now in the cwd with open('config.ini', 'w') as write_config: self.CP.write(write_config) def CrossValidate(self, INPUT, CVLABEL=None): # #----------------------------------------------- NUM_FOLDS = self.SYSTEM_PARAMS['STACK']['num_folds'] NUM_CLASSES = self.SYSTEM_PARAMS['STACK']['num_classes'] START_TIME = time.clock() if not self.CHECKS['config_loaded']: self.LoadStackConfig() self.NewInput(INPUT) if CVLABEL is None: if self.TRNG_LABS is None: self.LoadLabels() else: self.TRNG_LABS = CVLABEL if np.ndim(self.TRNG_LABS) <= 1: self.TRNG_LABS = self.TRNG_LABS ROWS = np.size(INPUT, 0) PART_SIZE = int(ROWS / NUM_FOLDS) FOLDS = {} LABS = {} print INPUT.shape for i in range(NUM_FOLDS): FOLDS[str(i)] = INPUT[PART_SIZE * i:PART_SIZE * (i + 1)] LABS[str(i)] = self.TRNG_LABS[PART_SIZE * i:PART_SIZE * (i + 1)] for j in range(NUM_FOLDS): t1 = str(j) t2 = str(np.mod(j + 1, NUM_FOLDS)) c1 = str(np.mod(j + 2, NUM_FOLDS)) c2 = str(np.mod(j + 3, NUM_FOLDS)) e1 = str(np.mod(j + 4, NUM_FOLDS)) VECT2TRAIN1 = np.array(FOLDS[t1]) VECT2TRAIN2 = np.array(FOLDS[t2]) VECT2TRAIN = np.concatenate((VECT2TRAIN1, VECT2TRAIN2), 0) TRAINLABS1 = np.array(LABS[t1]) TRAINLABS2 = np.array(LABS[t2]) TRAINLABS = np.concatenate((TRAINLABS1, TRAINLABS2), 0) VECT2CLASS1 = np.array(FOLDS[c1]) VECT2CLASS2 = np.array(FOLDS[c2]) VECT2CLASS = np.concatenate((VECT2CLASS1, VECT2CLASS2), 0) CLASSLABS1 = np.array(LABS[c1]) CLASSLABS2 = np.array(LABS[c2]) CLASSLABS = np.concatenate((CLASSLABS1, CLASSLABS2), 0) VECT2EVAL = np.array(FOLDS[e1]) EVALLABS = np.array(LABS[e1]) self.NewInput(DATA=VECT2TRAIN, LABS=TRAINLABS) self.PreTrainLayers() self.TuneStack() TFV=self.ProduceFeatureVectors(VECT2CLASS) EFV =self.ProduceFeatureVectors(VECT2EVAL) SCORES = BulkClassify(i, VECT2CLASS, VECT2EVAL, TFV, EFV, EVALLABS, CLASSLABS, NUM_CLASSES) self.ClearStackLayerInfo() def ClearStackLayerInfo(self): # #----------------------------------------------- self.StackLayerInfo = {'LAYER' : [], 'OUTPUT' : None} self.OUTPUT_LAYER = None DisplayItems('Stack Clearing Complete', self.StackLayerInfo, verbose=self.CHECKS['verbose']) def SaveStackLayerInfo(self, save_path=None): # #----------------------------------------------- import os if save_path is None: PATH = os.getcwd() PATH += self.SYSTEM_PARAMS['FILEIO']['binaries_dir'] PATH += SaveFileTimeStamp() + '_sli.pkl' else: PATH = save_path print PATH SaveBinary(self.StackLayerInfo, PATH, EXTENSION='pkl') self.SYSTEM_PARAMS['FILEIO']['last_sli_save'] = PATH self.UpdateStackConfig() def UpdateStored(self): ''' DESCRPT: Redundant function to function EncodeInput() will be removed PRECOND: None POSTCON: Layers within in StackLayerInfo will be 'fresh' IN ARGS: None RETURNS: None NOTES: STOP USING THIS. LET MY MISTAKES DIE ''' Warning, 'UpdateStored will be depreciated in future versions' self.__EncodeInput(store_output=True) def LoadStackLayerInfo(self, load_path=None): ''' DESCRPT: Loads the pickled stacklayerinfo.pkl which is the main data structure for the StackedSAE PRECOND: stacklayerinfo.pkl has been created and saved POSTCON: self.StackLayerInfo holds the value of stacklayerinfo.pkl IN ARGS: DIR : path to save directory : string FNAME : desired file name : string RETURNS: None NOTES: None ''' import os if not self.CHECKS['config_loaded']: self.LoadStackConfig() if load_path is None: if self.SYSTEM_PARAMS['FILEIO'].has_key('last_sli_save'): load_path = self.SYSTEM_PARAMS['FILEIO']['last_sli_save'] else: raise IOError, 'No file path given or available' self.StackLayerInfo = LoadBinary(load_path) def __TidyUp(self, just_stack=False, just_layers=False): # #----------------------------------------------- if just_layers: LAYERS = self.StackLayerInfo['LAYER'] for LAY in LAYERS: LAY.CleanLayer() gc.collect()
class Generator(): seed = None random = None def __init__(self, seed=1): super(Generator, self).__init__() self.random = RandomState(seed) self.seed = seed def reseed(self): self.random = RandomState(self.seed) def randSyllable(self): c1_dice = ( self.random.random_sample() < 0.91 ) #Chance that a regular consonant will start the syllable s1_dice = ( self.random.random_sample() < 0.05 ) #Chance that a special conjunction consonant is used v1_dice = ( self.random.random_sample() < 0.85 ) #Chance that a regular vowel will be used c2_add_dice = ( self.random.random_sample() < 0.28 ) #Chance that it has an ending consonant c2_dice = ( self.random.random_sample() < 0.91 ) #Chance that a regular consonant will end the syllable s2_dice = ( self.random.random_sample() < 0.03 ) #Chance that the ending has an addon consonant c1 = self.random.choice(REGULAR_CONSONANTS) if c1_dice else self.random.choice(COMPOSITE_CONSONANTS) s1 = self.random.choice(SPECIAL_CONSONANTS) if s1_dice else '' v1 = self.random.choice(REGULAR_VOWELS) if v1_dice else self.random.choice(COMPOSITE_VOWELS) c2 = ( self.random.choice(REGULAR_CONSONANTS) if c2_dice else self.random.choice(ENDING_CONSONANTS) ) if c2_add_dice else '' s2 = self.random.choice(ADDON_ENDING_CONSONANTS) if s2_dice else '' syllable = c1+s1+v1+c2+s2 # print(syllable) return syllable def randWord(self, s=2): """ s = number of syllables in int """ word = '' for syllable in range(0, s): word += self.randSyllable() return word def randSentence(self, meter=[2, 2, 1, 2, 3, 2, 1, 2, 2]): sentence = [] for syllable in meter: sentence.append(self.randWord(syllable)) return ' '.join(sentence) def randParagraph(self): paragraph = [] rand_wordcount = [ self.random.randint(3, 6) for i in range(0, self.random.randint( 4, 5 )) ] for words in rand_wordcount: rand_meter = [ self.random.randint(1, 4) for i in range(0, words) ] sentence = self.randSentence(rand_meter) paragraph.append(sentence) return '. '.join(paragraph) def randDictionary(self, word_list=['apple', 'banana', 'cake', 'dog', 'elephant', 'fruit', 'guava', 'human', 'island', 'joke', 'king', 'love', 'mother', 'nature', 'ocean', 'pie', 'queen', 'random', 'start', 'tree', 'up', 'vine', 'wisdom', 'yellow', 'zoo' ]): rand_dict_e2r = { word: self.randWord() for word in word_list } rand_dict_r2e = { v: k for k, v in rand_dict_e2r.items() } ordered_e2r = OrderedDict() print("English to Random Language") for key in sorted(rand_dict_e2r.keys()): print(key+ ' : '+rand_dict_e2r[key]) ordered_e2r[key] = rand_dict_e2r[key] ordered_r2e = OrderedDict() print("\n\nRandom Language to English") for key in sorted(rand_dict_r2e.keys()): print(key+ ' : '+rand_dict_r2e[key]) ordered_r2e[key] = rand_dict_r2e[key] return ( ordered_e2r, ordered_r2e ) def convertWord(self, word): word = word.lower() saved_state = self.random.get_state() # Word mapping method : md5 # To make it more natural, this mapping should be updated # to reflect natural language patterns md5 = hashlib.md5(bytes(word, encoding='utf-8')) wordseed = ( self.seed + int.from_bytes(md5.digest(), 'little') ) % (2**31) # print(wordseed) self.random.seed( wordseed ) randword = self.randWord( math.ceil( abs( self.random.normal(2, 1) ) ) ) self.random.set_state(saved_state) return randword def convertSentence(self, sentence): words = sentence.split() converted = [self.convertWord(word) for word in words] return ' '.join(converted)
class Configurator: '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def __init__(self, NU_TRNG_DATA, NU_TRNG_LABS, verbose = False, NU_CFG_PATH = None): # DATA and LAB MEMBERS # Should be your training data and corresponding labels self.TRNG_DATA = NU_TRNG_DATA self.TRNG_LABS = NU_TRNG_LABS # Just in case you want to use a special Config file if NU_CFG_PATH is None: self.CFG_PATH = DEF_CFG_PATH else: self.CFG_PATH = NU_CFG_PATH # Different Containers for Data self.CFG = {'sections' : ['STACK', 'LAYER', 'SMAX_TUNE', 'MET_TUNE', 'LOG_TUNE', 'CPP_LIBRARY','FILEIO', 'SYSTEM' ]} self.CFG_STACK = [] # RandomState Object self.RANDO = RS() # Congfig parser self.CP = CFPR(allow_no_value=True) self.TLAY = None self.CHECKS = {'cfg_loaded' : False, 'lay_obj_initd' : False, 'verbose' : verbose} self.SWARM_SIZE = 5 self.ALL_CHECKS = self.CHECKS.keys() self.FLOAT_STEP = .00001 self.INT_STEP = 1 self.GIVE_UP_SCALE = np.linspace(-100,100,5000) self.PATIENT_LEVEL = 2500 self.SCORE_STACK = [999999.] self.START_TIME = time.time() self.STOP_AT_HOUR = 1.0 '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def CFGControl(self): for TC in [self.isPatient(), not self.isStopTime()]: print TC yield TC '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def GetPatiencesLevel(self): return self.GIVE_UP_SCALE[self.PATIENT_LEVEL] '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def isPatient(self): return self.GIVE_UP_SCALE[np.minimum(self.PATIENT_LEVEL, len(self.GIVE_UP_SCALE))] > -50. '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def TimeElapsed(self): return ((time.time() - self.START_TIME)/360./60.) '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def isStopTime(self): return self.STOP_AT_HOUR < self.TimeElapsed() '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def isStackEmpty(self, STACK): return len(STACK) == 0 '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def PatienceUp(self): self.PATIENT_LEVEL+=1 '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def PatienceDown(self): self.PATIENT_LEVEL-=1 '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def BuzzEm(self, VAL, BUZZ): if BUZZ in GenInts(): NU_INT = self.BuzzInt(VAL) return NU_INT if NU_INT>0 else VAL else: NU_FLOAT = self.BuzzFloat(VAL) return NU_FLOAT if NU_FLOAT>0 else VAL '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def BuzzInt(self, VAL): RANGE = [VAL-self.INT_STEP, VAL+self.INT_STEP] SEQ = np.arange(RANGE[0], RANGE[1]+1,self.INT_STEP) NEW_INT, THROW_AWAY = SEQ[0],SEQ[1:] return NEW_INT '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def BuzzFloat(self, VAL): RANGE = [VAL-self.FLOAT_STEP, VAL+self.FLOAT_STEP] SEQ = np.arange(RANGE[0], RANGE[1]+1,self.FLOAT_STEP) NEW_FLOAT, THROW_AWAY = SEQ[0],SEQ[1:] return NEW_FLOAT '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def LoadAndConfigure(self, CFG_PATH=None): if CFG_PATH is not None: self.CFG_PATH = CFG_PATH self.LoadConfig() self.Configure() '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def Configure(self, CFG_PATH = None): self.CFG_STACK.insert(0, self.CFG) LAY = Layer(self.CFG['LAYER']) while all([C for C in self.CFGControl()]): LAY.ClearLayerParams() INT_IDX = [[i,j] for i,j in GenInts()] FLOAT_IDX =[[i,j] for i,j in GenFloats()] ALL_BUZZERS = self.RANDO.permutation([i for i in INT_IDX]+[f for f in FLOAT_IDX]) SPLIT_AT = np.minimum(self.SWARM_SIZE, ALL_BUZZERS.size) BUZZERS = ALL_BUZZERS[:SPLIT_AT] REJECTS = ALL_BUZZERS[SPLIT_AT+1:] for [SECT, PARAM] in BUZZERS: OLD_VAL = self.CFG[SECT][PARAM] self.CFG[SECT][PARAM] = self.BuzzEm(OLD_VAL, [SECT, PARAM]) for [SECT,PARAM] in REJECTS: self.CFG[SECT][PARAM] = self.CFG[SECT][PARAM] print self.CFG.keys() LAY.SetNewParams(self.CFG) RESULT = self.LogTrain(LAY) if RESULT['fun'][0][-1] <= self.SCORE_STACK[0]: self.CFG_STACK.insert(0, self.CFG) self.PatienceUp() else: if self.isStackEmpty(self.CFG_STACK): self.SWARM_SIZE += np.ceil(self.SWARM_SIZE/2.) self.FLOAT_STEP = .00002 self.INT_STEP = 2 self.PatienceDown() else: self.CFG_STACK.pop(0) self.PatienceDown() '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def GetCFG(self): return self.CFG '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def DeepCopy2CFG(self, NU_CFG): self.CFG = {SECT : {PARAM : NU_CFG[SECT][PARAM] for PARAM in GenParams(NU_CFG) } for SECT in GenSects(NU_CFG)} '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def DeepCopyCFG2NU(self): return {SECT : {PARAM : self.CFG[SECT][PARAM] for PARAM in GenParams(self.CFG) } for SECT in GenSects(self.CFG)} '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def PushBehave(self): self.BHAVE_STACK.insert(0, {}) self.BHAVE_STACK[0].update(self.CURR_BEHAVIOR) '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def PushConfig(self, LATEST_CFG): self.CFG_STACK.insert(0, {}) for SECT, PARM in GenSectsAndParams(LATEST_CFG): self.CFG_STACK[0][SECT][PARM] = LATEST_CFG[SECT][PARM] self.NUM_CFG_STACKED+=1 '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def PopConfig(self): if self.NUM_CFG_STACKED > 0: NU_CFG = {SECT : {PARM : self.CFG_STACK[0][SECT][PARM] for PARM in GenParams(self.CFG_STACK[0][SECT]) } for SECT in GenSects(self.CFG_STACK[0])} self.CFG_STACK.pop(0) self.SCORE_STACK.pop(0) self.NUM_CFG_STACKED -= 1 return NU_CFG else: raise IndexError, "None more configurations to pop off stack." '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def LoadConfig(self): ''' DESCRPT: A monster, thankfully this is the only call to the config fil of all the other classes IN ARGS: PATH ; strings : config file location NOTES: ''' try: self.CP.read(self.CFG_PATH) except: print "Config Name" raise IOError, 'Config file wasn\'t able to be read' GENERIC_DICT = {} for SECT in self.CP.sections(): GENERIC_DICT[SECT] = {} for OPTS in self.CP.options(SECT): val = self.CP.get(SECT, OPTS) if val == 'True': GENERIC_DICT[SECT][OPTS] = True elif val == 'False': print 'haeeyy' GENERIC_DICT[SECT][OPTS] = False else: try: GENERIC_DICT[SECT][OPTS] = int(val) except: try: GENERIC_DICT[SECT][OPTS] = float(val) except: try: GENERIC_DICT[SECT][OPTS] = val except: GENERIC_DICT[SECT][OPTS] = None for key in GenSects(GENERIC_DICT): if key not in self.CFG['sections']: self.CFG['sections'].append(key) for key in self.CFG['sections']: self.CFG[key] = GENERIC_DICT[key] self.CFG['LAYER']['disp'] = self.CHECKS['verbose'] self.CHECKS['config_loaded'] = True self.CHECKS['lee_wants_rand_off'] = self.CFG['STACK']['lee_wants_rand_off'] if self.CFG['STACK']['lee_wants_rand_off']: CONFIG_SEED = self.CFG['STACK']['rand_seed_32bit'] self.RANDO.seed(seed=CONFIG_SEED) else: self.RANDO.seed(seed=np.int32(time.time())) self.CHECKS['cfg_loaded'] = True '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def GetChecks(self, CHECK_NAME): if self.CHECKS.has_key(CHECK_NAME): return self.CHECKS[CHECK_NAME] else: raise Warning, "Thats not a valid Check" '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def UpdateConfig(self, SECTION=None, OPTION=None): ''' DESCRPT: PREVIOUSLY: Class Parameters were loading with-in themselves and were frustratingly difficult to interact with from out side. Plus they took up a lot of space LATER: I implement parameter dictionaries for individual class that would be passed back-and-forth from StackSAE to an instantiating class Not bad overall just tedious NOW: ONE dictionary contains all the parameters and behaves exacly like the old way but with way less tedium NOTE: This function backs up the latest version of the config file before writing the updated one. The back up as time stamped ''' import shutil # This keeps track of the last config file backed up and is a parameter in # FILEIO LAST_CONFIG_UPDATE_PATH = os.getcwd() self.CFG['FILEIO']['last_config_backup'] = LAST_CONFIG_UPDATE_PATH + '_config_bu.ini' shutil.copy2(os.getcwd() + '/config.ini', LAST_CONFIG_UPDATE_PATH) # The config parser instantiated with SSAE is updated if SECTION is not None: sect = list(SECTION) else: sect = self.CFG['sections'] if OPTION is not None: opt = OPTION self.CP.set(sect, opt, self.CFG[sect][opt]) else: for s in sect: for opt in list(self.CFG[s]): self.CP.set(s, opt, str(self.CFG[s][opt])) # new config file is now in the cwd with open('config.ini', 'w') as write_config: self.CP.write(write_config) '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def ProcessResult(self, RES): self.IntakeNuResults() for ATT in RES.keys(): if ATT in self.PREV_RESULT.keys(): self.CURR_RESULT[ATT] = RES.get(ATT) self.TLAY.ClearLayerParams() '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def StopTrain(self): self.ChangePhase('stop') self.ProcessResult({'phase_name': self.CURR_PHASE}) '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def LogTrain(self, LAY): IN_SHAPE = self.TRNG_DATA.shape NUM_HIDD = self.CFG['STACK']['num_hidden'] MIN_HIDD = self.CFG['STACK']['min_hidden'] MAX_LAYER = self.CFG['STACK']['max_layer'] DEC_HIDD_BY = self.CFG['STACK']['decrement_num_hidden'] BASE_NOISE = self.CFG['STACK']['base_noise_level'] OUT_SHAPE = (IN_SHAPE[0], NUM_HIDD) [WIN, WOUT, BIN, BOUT, SHAPES] = LAY.CreateLogLayer(IN_SHAPE, NUM_HIDD, self.RANDO) THETA = LAY.TrainSparseAE(WIN, WOUT, BIN, BOUT, self.TRNG_DATA) return {'success': THETA.success, 'message': THETA.message, 'fun' : THETA.fun, 'nfev' : THETA.nfev, 'nit' : THETA.nit } '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def FlattenData(self, DATA): N,M = DATA.shape self.TRNG_DATA = DATA.reshape(N*M) '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def SetTrainingData(self, DATA): if DATA.ndim > 1: self.FlattenData(DATA) else: self.TRNG_DATA = DATA '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def SetTrainingLabs(self, LABS): self.TRNG_LABS = LABS '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def ChangePhase(self, NU_PHASE): if NU_PHASE in self.ALLOWED_PHASE: self.CURR_PHASE = NU_PHASE else: raise ValueError, 'Non allowable phase passed' '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def BuildNewLay_CurrParam(self, PHASE = None): if PHASE is not None and self.CURR_PHASE != PHASE: self.ChangePhase(PHASE) self.TEST_LAY = Layer(self.CFG['LAYER']) '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def BuildNewLay_NuParam(self, PHASE = None, **NU_LAY_PARAM): if PHASE is not None and self.CURR_PHASE != PHASE: self.ChangePhase(PHASE) self.TEST_LAY = Layer(MergeNu2Old(self.CFG, NU_LAY_PARAM)) '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''' def CFGStackIsEmpty(self): return len(self.CFG_STACK) == 0
class RandomGenerator(object): """Container around a random number generator. Enables reproducibility of player behavior, matches, and tournaments.""" def __init__(self, seed: Optional[int] = None): # _random is the internal object that generators random values self._random = RandomState() self.original_seed = seed self.seed(seed) def seed(self, seed_: Optional[int] = None): """Sets a seed""" self._random.seed(seed_) def random(self, *args, **kwargs): return self._random.rand(*args, **kwargs) def randint(self, *args, **kwargs): return self._random.randint(*args, **kwargs) def random_seed_int(self) -> int: return self.randint(low=0, high=2**32 - 1, dtype="uint64") def choice(self, *args, **kwargs): return self._random.choice(*args, **kwargs) def uniform(self, *args, **kwargs): return self._random.uniform(*args, **kwargs) def random_choice(self, p: float = 0.5) -> Action: """ Return C with probability `p`, else return D No random sample is carried out if p is 0 or 1. Parameters ---------- p : float The probability of picking C Returns ------- axelrod.Action """ if p == 0: return D if p == 1: return C r = self.random() if r < p: return C return D def random_flip(self, action: Action, threshold: float) -> Action: """ Return flipped action with probability `threshold` No random sample is carried out if threshold is 0 or 1. Parameters ---------- action: The action to flip or not threshold : float The probability of flipping action Returns ------- axelrod.Action """ if self.random_choice(threshold) == C: return action.flip() return action def randrange(self, a: int, b: int) -> int: """Returns a random integer uniformly between a and b: [a, b).""" c = b - a r = c * self.random() return a + int(r) def random_vector(self, size): """Create a random vector of values in [0, 1] that sums to 1.""" vector = self.random(size) return np.array(vector) / np.sum(vector)
class IIDBootstrap(object): """ Bootstrap using uniform resampling Parameters ---------- args Positional arguments to bootstrap kwargs Keyword arguments to bootstrap Attributes ---------- index : array The current index of the bootstrap data : tuple Two-element tuple with the pos_data in the first position and kw_data in the second (pos_data, kw_data) pos_data : tuple Tuple containing the positional arguments (in the order entered) kw_data : dict Dictionary containing the keyword arguments random_state : RandomState RandomState instance used by bootstrap Notes ----- Supports numpy arrays and pandas Series and DataFrames. Data returned has the same type as the input date. Data entered using keyword arguments is directly accessibly as an attribute. Examples -------- Data can be accessed in a number of ways. Positional data is retained in the same order as it was entered when the bootstrap was initialized. Keyword data is available both as an attribute or using a dictionary syntax on kw_data. >>> from arch.bootstrap import IIDBootstrap >>> from numpy.random import standard_normal >>> y = standard_normal((500, 1)) >>> x = standard_normal((500,2)) >>> z = standard_normal(500) >>> bs = IIDBootstrap(x, y=y, z=z) >>> for data in bs.bootstrap(100): ... bs_x = data[0][0] ... bs_y = data[1]['y'] ... bs_z = bs.z """ def __init__(self, *args, **kwargs): self.random_state = RandomState() self._initial_state = self.random_state.get_state() self._args = args self._kwargs = kwargs if args: self._num_items = len(args[0]) elif kwargs: key = list(kwargs.keys())[0] self._num_items = len(kwargs[key]) all_args = list(args) all_args.extend([v for v in itervalues(kwargs)]) for arg in all_args: if len(arg) != self._num_items: raise ValueError("All inputs must have the same number of " "elements in axis 0") self._index = np.arange(self._num_items) self._parameters = [] self._seed = None self.pos_data = args self.kw_data = kwargs self.data = (args, kwargs) self._base = None self._results = None self._studentized_results = None self._last_func = None self._name = 'IID Bootstrap' for key, value in iteritems(kwargs): attr = getattr(self, key, None) if attr is None: self.__setattr__(key, value) else: raise ValueError(key + ' is a reserved name') def __str__(self): repr = self._name repr += '(no. pos. inputs: ' + str(len(self.pos_data)) repr += ', no. keyword inputs: ' + str(len(self.kw_data)) + ')' return repr def __repr__(self): return self.__str__()[:-1] + ', ID: ' + hex(id(self)) + ')' def _repr_html(self): html = '<strong>' + self._name + '</strong>(' html += '<strong>no. pos. inputs</strong>: ' + str(len(self.pos_data)) html += ', <strong>no. keyword inputs</strong>: ' + str(len(self.kw_data)) html += ', <strong>ID</strong>: ' + hex(id(self)) + ')' return html @property def index(self): """ Returns the current index of the bootstrap """ return self._index def get_state(self): """ Gets the state of the bootstrap's random number generator Returns ------- state : RandomState state vector Array containing the state """ return self.random_state.get_state() def set_state(self, state): """ Sets the state of the bootstrap's random number generator Parameters ---------- state : RandomState state vector Array containing the state """ return self.random_state.set_state(state) def seed(self, value): """ Seeds the bootstrap's random number generator Parameters ---------- value : int Integer to use as the seed """ self._seed = value self.random_state.seed(value) return None def reset(self, use_seed=True): """ Resets the bootstrap to either its initial state or the last seed. Parameters ---------- use_seed : bool, optional Flag indicating whether to use the last seed if provided. If False or if no seed has been set, the bootstrap will be reset to the initial state. Default is True """ self._index = np.arange(self._num_items) self._resample() self.random_state.set_state(self._initial_state) if use_seed and self._seed is not None: self.seed(self._seed) return None def bootstrap(self, reps): """ Iterator for use when bootstrapping Parameters ---------- reps : int Number of bootstrap replications Example ------- The key steps are problem dependent and so this example shows the use as an iterator that does not produce any output >>> from arch.bootstrap import IIDBootstrap >>> import numpy as np >>> bs = IIDBootstrap(np.arange(100), x=np.random.randn(100)) >>> for posdata, kwdata in bs.bootstrap(1000): ... # Do something with the positional data and/or keyword data ... pass .. note:: Note this is a generic example and so the class used should be the name of the required bootstrap Notes ----- The iterator returns a tuple containing the data entered in positional arguments as a tuple and the data entered using keywords as a dictionary """ for _ in range(reps): indices = np.asarray(self.update_indices()) self._index = indices yield self._resample() def conf_int(self, func, reps=1000, method='basic', size=0.95, tail='two', extra_kwargs=None, reuse=False, sampling='nonparametric', std_err_func=None, studentize_reps=1000): """ Parameters ---------- func : callable Function the computes parameter values. See Notes for requirements reps : int, optional Number of bootstrap replications method : string, optional One of 'basic', 'percentile', 'studentized', 'norm' (identical to 'var', 'cov'), 'bc' (identical to 'debiased', 'bias-corrected'), or 'bca' size : float, optional Coverage of confidence interval tail : string, optional One of 'two', 'upper' or 'lower'. reuse : bool, optional Flag indicating whether to reuse previously computed bootstrap results. This allows alternative methods to be compared without rerunning the bootstrap simulation. Reuse is ignored if reps is not the same across multiple runs, func changes across calls, or method is 'studentized'. sampling : string, optional Type of sampling to use: 'nonparametric', 'semi-parametric' (or 'semi') or 'parametric'. The default is 'nonparametric'. See notes about the changes to func required when using 'semi' or 'parametric'. extra_kwargs : dict, optional Extra keyword arguments to use when calling func and std_err_func, when appropriate std_err_func : callable, optional Function to use when standardizing estimated parameters when using the studentized bootstrap. Providing an analytical function eliminates the need for a nested bootstrap studentize_reps : int, optional Number of bootstraps to use in the innter component when using the studentized bootstrap. Ignored when ``std_err_func`` is provided Returns ------- intervals : 2-d array Computed confidence interval. Row 0 contains the lower bounds, and row 1 contains the upper bounds. Each column corresponds to a parameter. When tail is 'lower', all upper bounds are inf. Similarly, 'upper' sets all lower bounds to -inf. Examples -------- >>> import numpy as np >>> def func(x): ... return x.mean(0) >>> y = np.random.randn(1000, 2) >>> from arch.bootstrap import IIDBootstrap >>> bs = IIDBootstrap(y) >>> ci = bs.conf_int(func, 1000) Notes ----- When there are no extra keyword arguments, the function is called .. code:: python func(*args, **kwargs) where args and kwargs are the bootstrap version of the data provided when setting up the bootstrap. When extra keyword arguments are used, these are appended to kwargs before calling func. The standard error function, if provided, must return a vector of parameter standard errors and is called .. code:: python std_err_func(params, *args, **kwargs) where ``params`` is the vector of estimated parameters using the same bootstrap data as in args and kwargs. The bootstraps are: * 'basic' - Basic confidence using the estimated parameter and difference between the estimated parameter and the bootstrap parameters * 'percentile' - Direct use of bootstrap percentiles * 'norm' - Makes use of normal approximation and bootstrap covariance estimator * 'studentized' - Uses either a standard error function or a nested bootstrap to estimate percentiles and the bootstrap covariance for scale * 'bc' - Bias corrected using estimate bootstrap bias correction * 'bca' - Bias corrected and accelerated, adding acceleration parameter to 'bc' method """ studentized = 'studentized' if not 0.0 < size < 1.0: raise ValueError('size must be strictly between 0 and 1') tail = tail.lower() if tail not in ('two', 'lower', 'upper'): raise ValueError('tail must be one of two-sided, lower or upper') studentize_reps = studentize_reps if method == studentized else 0 _reuse = False if reuse: # check conditions for reuse _reuse = (self._results is not None and len(self._results) == reps and method != studentized and self._last_func is func) if not _reuse: if reuse: import warnings warn = 'The conditions to reuse the previous bootstrap has ' \ 'not been satisfied. A new bootstrap will be constructed' warnings.warn(warn, RuntimeWarning) self._construct_bootstrap_estimates(func, reps, extra_kwargs, std_err_func=std_err_func, studentize_reps=studentize_reps, sampling=sampling) base, results = self._base, self._results studentized_results = self._studentized_results std_err = [] if method in ('norm', 'var', 'cov', studentized): errors = results - results.mean(axis=0) std_err = np.sqrt(np.diag(errors.T.dot(errors) / reps)) if tail == 'two': alpha = (1.0 - size) / 2 else: alpha = (1.0 - size) percentiles = [alpha, 1.0 - alpha] norm_quantiles = stats.norm.ppf(percentiles) if method in ('norm', 'var', 'cov'): lower = base + norm_quantiles[0] * std_err upper = base + norm_quantiles[1] * std_err elif method in ('percentile', 'basic', studentized, 'debiased', 'bc', 'bias-corrected', 'bca'): values = results if method == studentized: # studentized uses studentized parameter estimates values = studentized_results if method in ('debiased', 'bc', 'bias-corrected', 'bca'): # bias corrected uses modified percentiles, but is # otherwise identical to the percentile method p = (results < base).mean(axis=0) b = stats.norm.ppf(p) b = b[:, None] if method == 'bca': nobs = self._num_items jk_params = _loo_jackknife(func, nobs, self._args, self._kwargs) u = (nobs - 1) * (jk_params - base) numer = np.sum(u ** 3, 0) denom = 6 * (np.sum(u ** 2, 0) ** (3.0 / 2.0)) small = denom < (np.abs(numer) * np.finfo(np.float64).eps) if small.any(): message = 'Jackknife variance estimate {jk_var} is ' \ 'too small to use BCa' raise RuntimeError(message.format(jk_var=denom)) a = numer / denom a = a[:, None] else: a = 0.0 percentiles = stats.norm.cdf(b + (b + norm_quantiles) / (1.0 - a * (b + norm_quantiles))) percentiles = list(100 * percentiles) else: percentiles = [100 * p for p in percentiles] # Rescale if method not in ('bc', 'debiased', 'bias-corrected', 'bca'): ci = np.asarray(np.percentile(values, percentiles, axis=0)) lower = ci[0, :] upper = ci[1, :] else: k = values.shape[1] lower = np.zeros(k) upper = np.zeros(k) for i in range(k): lower[i], upper[i] = np.percentile(values[:, i], list(percentiles[i])) # Basic and studentized use the lower empirical quantile to # compute upper and vice versa. Bias corrected and percentile use # upper to estimate the upper, and lower to estimate the lower if method == 'basic': lower_copy = lower + 0.0 lower = 2.0 * base - upper upper = 2.0 * base - lower_copy elif method == studentized: lower_copy = lower + 0.0 lower = base - upper * std_err upper = base - lower_copy * std_err else: raise ValueError('Unknown method') if tail == 'lower': upper = np.zeros_like(base) upper.fill(np.inf) elif tail == 'upper': lower = np.zeros_like(base) lower.fill(-1 * np.inf) return np.vstack((lower, upper)) def clone(self, *args, **kwargs): """ Clones the bootstrap using different data. Parameters ---------- args Positional arguments to bootstrap kwargs Keyword arguments to bootstrap Returns ------- bs Bootstrap instance """ pos_arguments = copy.deepcopy(self._parameters) pos_arguments.extend(args) bs = self.__class__(*pos_arguments, **kwargs) if self._seed is not None: bs.seed(self._seed) return bs def apply(self, func, reps=1000, extra_kwargs=None): """ Applies a function to bootstrap replicated data Parameters ---------- func : callable Function the computes parameter values. See Notes for requirements reps : int, optional Number of bootstrap replications extra_kwargs : dict, optional Extra keyword arguments to use when calling func. Must not conflict with keyword arguments used to initialize bootstrap Returns ------- results : array reps by nparam array of computed function values where each row corresponds to a bootstrap iteration Notes ----- When there are no extra keyword arguments, the function is called .. code:: python func(params, *args, **kwargs) where args and kwargs are the bootstrap version of the data provided when setting up the bootstrap. When extra keyword arguments are used, these are appended to kwargs before calling func Examples -------- >>> import numpy as np >>> x = np.random.randn(1000,2) >>> from arch.bootstrap import IIDBootstrap >>> bs = IIDBootstrap(x) >>> def func(y): ... return y.mean(0) >>> results = bs.apply(func, 100) """ kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs) base = func(*self._args, **kwargs) try: num_params = base.shape[0] except: num_params = 1 results = np.zeros((reps, num_params)) count = 0 for pos_data, kw_data in self.bootstrap(reps): kwargs = _add_extra_kwargs(kw_data, extra_kwargs) results[count] = func(*pos_data, **kwargs) count += 1 return results def _construct_bootstrap_estimates(self, func, reps, extra_kwargs=None, std_err_func=None, studentize_reps=0, sampling='nonparametric'): # Private, more complicated version of apply self._last_func = func semi = parametric = False if sampling == 'parametric': parametric = True elif sampling == 'semiparametric': semi = True if extra_kwargs is not None: if any(k in self._kwargs for k in extra_kwargs): raise ValueError('extra_kwargs contains keys used for variable' ' names in the bootstrap') kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs) base = func(*self._args, **kwargs) num_params = 1 if np.isscalar(base) else base.shape[0] results = np.zeros((reps, num_params)) studentized_results = np.zeros((reps, num_params)) count = 0 for pos_data, kw_data in self.bootstrap(reps): kwargs = _add_extra_kwargs(kw_data, extra_kwargs) if parametric: kwargs['state'] = self.random_state kwargs['params'] = base elif semi: kwargs['params'] = base results[count] = func(*pos_data, **kwargs) if std_err_func is not None: std_err = std_err_func(results[count], *pos_data, **kwargs) studentized_results[count] = (results[count] - base) / std_err elif studentize_reps > 0: # Need new bootstrap of same type nested_bs = self.clone(*pos_data, **kw_data) # Set the seed to ensure reproducability seed = self.random_state.randint(2 ** 31 - 1) nested_bs.seed(seed) cov = nested_bs.cov(func, studentize_reps, extra_kwargs=extra_kwargs) std_err = np.sqrt(np.diag(cov)) studentized_results[count] = (results[count] - base) / std_err count += 1 self._base = np.asarray(base) self._results = np.asarray(results) self._studentized_results = np.asarray(studentized_results) def cov(self, func, reps=1000, recenter=True, extra_kwargs=None): """ Compute parameter covariance using bootstrap Parameters ---------- func : callable Callable function that returns the statistic of interest as a 1-d array reps : int, optional Number of bootstrap replications recenter : bool, optional Whether to center the bootstrap variance estimator on the average of the bootstrap samples (True) or to center on the original sample estimate (False). Default is True. extra_kwargs: dict, optional Dictionary of extra keyword arguments to pass to func Returns ------- cov: array Bootstrap covariance estimator Notes ----- func must have the signature .. code:: python func(params, *args, **kwargs) where params are a 1-dimensional array, and `*args` and `**kwargs` are data used in the the bootstrap. The first argument, params, will be none when called using the original data, and will contain the estimate computed using the original data in bootstrap replications. This parameter is passed to allow parametric bootstrap simulation. Example ------- Bootstrap covariance of the mean >>> from arch.bootstrap import IIDBootstrap >>> import numpy as np >>> def func(x): ... return x.mean(axis=0) >>> y = np.random.randn(1000, 3) >>> bs = IIDBootstrap(y) >>> cov = bs.cov(func, 1000) Bootstrap covariance using a function that takes additional input >>> def func(x, stat='mean'): ... if stat=='mean': ... return x.mean(axis=0) ... elif stat=='var': ... return x.var(axis=0) >>> cov = bs.cov(func, 1000, extra_kwargs={'stat':'var'}) .. note:: Note this is a generic example and so the class used should be the name of the required bootstrap """ self._construct_bootstrap_estimates(func, reps, extra_kwargs) base, results = self._base, self._results if recenter: errors = results - np.mean(results, 0) else: errors = results - base return errors.T.dot(errors) / reps def var(self, func, reps=1000, recenter=True, extra_kwargs=None): """ Compute parameter variance using bootstrap Parameters ---------- func : callable Callable function that returns the statistic of interest as a 1-d array reps : int, optional Number of bootstrap replications recenter : bool, optional Whether to center the bootstrap variance estimator on the average of the bootstrap samples (True) or to center on the original sample estimate (False). Default is True. extra_kwargs: dict, optional Dictionary of extra keyword arguments to pass to func Returns ------- var : 1-d array Bootstrap variance estimator Notes ----- func must have the signature .. code:: python func(params, *args, **kwargs) where params are a 1-dimensional array, and `*args` and `**kwargs` are data used in the the bootstrap. The first argument, params, will be none when called using the original data, and will contain the estimate computed using the original data in bootstrap replications. This parameter is passed to allow parametric bootstrap simulation. Example ------- Bootstrap covariance of the mean >>> from arch.bootstrap import IIDBootstrap >>> import numpy as np >>> def func(x): ... return x.mean(axis=0) >>> y = np.random.randn(1000, 3) >>> bs = IIDBootstrap(y) >>> variances = bs.var(func, 1000) Bootstrap covariance using a function that takes additional input >>> def func(x, stat='mean'): ... if stat=='mean': ... return x.mean(axis=0) ... elif stat=='var': ... return x.var(axis=0) >>> variances = bs.var(func, 1000, extra_kwargs={'stat': 'var'}) .. note:: Note this is a generic example and so the class used should be the name of the required bootstrap """ self._construct_bootstrap_estimates(func, reps, extra_kwargs) base, results = self._base, self._results if recenter: errors = results - np.mean(results, 0) else: errors = results - base return (errors ** 2).sum(0) / reps def update_indices(self): """ Update indices for the next iteration of the bootstrap. This must be overridden when creating new bootstraps. """ return self.random_state.randint(self._num_items, size=self._num_items) def _resample(self): """ Resample all data using the values in _index """ indices = self._index pos_data = [] for values in self._args: if isinstance(values, (pd.Series, pd.DataFrame)): pos_data.append(values.iloc[indices]) else: pos_data.append(values[indices]) named_data = {} for key, values in iteritems(self._kwargs): if isinstance(values, (pd.Series, pd.DataFrame)): named_data[key] = values.iloc[indices] else: named_data[key] = values[indices] setattr(self, key, named_data[key]) self.pos_data = pos_data self.kw_data = named_data self.data = (pos_data, named_data) return self.data
class Smearer(Analyzer): '''Applies a numerical smearing to objects Example: from heppy.analyzers.triggerrates.Smearer import Smearer jetToElectronTrasformer = cfg.Analyzer( Smearer , input_collection = 'jets', output_collection = 'l1tEGamma', distribution_file = "convFile.root", smearing_distribution_prefix = "l1tObjectPtDistributionBinnedInGenJet", bins = [0, 10, 20, 30, 40, 50, 60], object_x_range = (0, 200), probability_file = "probFile.root", probability_histogram = "efficiencyPlot" ) * input_collection : input collection containing the jets * output_collection : output collection which the converted jet will be stored in, i.e. the type of object the jet will be converted to * convolution_file : file containing the jet-to-object convolution curves * convolution_histogram_prefix : prefix in the convolution file, it will be followed by _10_20, if 10 is the low bin and 20 is the high bin * bins : bins of the convolution file * object_x_range : range in which the momentum of the genrated object will be located, helps in generating new object faster if the TH1F is very big * probability_file : file containing the binned fraction of object to smear (a sort of trasformation probability). If omitted assumed to be 1. * probability_histogram : name of the histogram containing the probabilities NOTE: A property 'match' is added to the input object to connect it to the smeared version ''' def beginLoop(self, setup): super(Smearer, self).beginLoop(setup) self.rng = RandomState() self.rng.seed() self.convolutionHistograms = [] self.convolutionFile = TFile(self.cfg_ana.convolution_file) for x in xrange(0, len(self.cfg_ana.bins) - 1): self.convolutionHistograms.append( self.convolutionFile.Get( self.cfg_ana.convolution_histogram_prefix + "_" + str(self.cfg_ana.bins[x]) + "_" + str(self.cfg_ana.bins[x + 1]))) self.probabilityFileName = getattr(self.cfg_ana, "probability_file", "") if self.probabilityFileName != "": self.probabilityFile = TFile(self.probabilityFileName) self.probabilityHistogram = self.probabilityFile.Get( self.cfg_ana.probability_histogram) else: self.probabilityFile = None self.probabilityHistogram = None # End beginLoop def process(self, event): jets = getattr(event, self.cfg_ana.input_collection) output_collection = [] # jetIdx = 0 for jet in jets: jetPt = jet.pt() jetEta = jet.eta() jetPhi = jet.phi() jetE = jet.e() factorIndex = bisect_right(self.cfg_ana.bins, jetPt) - 1 if factorIndex >= len(self.cfg_ana.bins) - 1: jet.match = None #factorIndex = len(self.cfg_ana.bins) - 2 continue rndNumber = self.rng.uniform(0, 1) if self.probabilityHistogram is None: isMisidentified = True else: isMisidentified = rndNumber < self.probabilityHistogram.GetBinContent( factorIndex + 1) if not isMisidentified: jet.match = None continue #Creating a new object with the same properties trgObject = deepcopy(jet) # Getting the quantity to add in order to smear # I will use the root method which uses the cumulative probability function # Reference https://root.cern.ch/doc/master/TH1_8cxx_source.html#l04710 convolutionHistogram = self.convolutionHistograms[factorIndex] rndX = convolutionHistogram.GetRandom() trgObject._tlv.SetPtEtaPhiE(jetPt + rndX, jetEta, jetPhi, jetE) jet.match = trgObject jet.matches = [trgObject] jet.dr = 0 trgObject.matches = [jet] trgObject.match = jet output_collection.append(trgObject) setattr(event, self.cfg_ana.output_collection, output_collection) # End process