def _clean_data(self): """ Sanitize the assigned data :param: self :return: None """ data = self.data # if data is a list-like, convert to 1D np.array if isinstance(data, LISTLIKE): data = np.array(data).ravel() elif isinstance(data, set): data = np.array(list(data)).ravel() else: raise ControlledError( "Error: could not cast data into an np.array") # Check that entries are numbers check(all([isinstance(n, numbers.Real) for n in data]), 'not all entries in data are real numbers') # Cast as 1D np.array of floats data = data.astype(float) # Keep only finite numbers data = data[np.isfinite(data)] try: if not (len(data) > 0): raise ControlledError( 'Input check failed, data must have length > 0: data = %s' % data) except ControlledError as e: print(e) sys.exit(1) try: data_spread = max(data) - min(data) if not np.isfinite(data_spread): raise ControlledError( 'Input check failed. Data[max]-Data[min] is not finite: Data spread = %s' % data_spread) except ControlledError as e: print(e) sys.exit(1) try: if not (data_spread > 0): raise ControlledError( 'Input check failed. Data[max]-Data[min] must be > 0: data_spread = %s' % data_spread) except ControlledError as e: print(e) sys.exit(1) # Set cleaned data self.data = data
def __init__(self, dataset='old_faithful_eruption_times'): # Check that dataset is valid check(dataset in self.list(), 'Distribution "%s" not recognized.' % dataset) # Set file dataset file_name = '%s/%s.txt' % (data_dir, dataset) # Load data self._load_dataset(file_name)
def demo(example='real_data'): """ Performs a demonstration of suftware. Parameters ---------- example: (str) A string specifying which demo to run. Must be 'real_data', 'simulated_data', or 'custom_data'. Return ------ None. """ import os example_dir = os.path.dirname(__file__) example_dict = { 'custom_data': 'docs/example_custom.py', 'simulated_data': 'docs/example_wide.py', 'real_data': 'docs/example_alcohol.py' } check(example in example_dict, 'example = %s is not valid. Must be one of %s'%\ (example, example_dict.keys())) file_name = '%s/%s' % (example_dir, example_dict[example]) with open(file_name, 'r') as f: content = f.read() line = '-------------------------------------------------------------' print('Running %s:\n%s\n%s\n%s'%\ (file_name, line, content, line)) exec(open(file_name).read())
def _inputs_check(self): """ Check all inputs NOT having to do with the choice of grid :param self: :return: None """ if self.grid_spacing is not None: # max_t_step is a number check( isinstance(self.grid_spacing, numbers.Real), 'type(grid_spacing) = %s; must be a number' % type(self.grid_spacing)) # grid_spacing is positive check(self.grid_spacing > 0, 'grid_spacing = %f; must be > 0.' % self.grid_spacing) if self.grid is not None: # grid is a list or np.array types = (list, np.ndarray, np.matrix) check( isinstance(self.grid, types), 'type(grid) = %s; must be a list or np.ndarray' % type(self.grid)) # cast grid as np.array as ints try: self.grid = np.array(self.grid).ravel().astype(float) except: # SHOULD BE MORE SPECIFIC raise ControlledError( 'Cannot cast grid as 1D np.array of floats.') # grid has appropriate number of points check( 2 * self.alpha <= len(self.grid) <= MAX_NUM_GRID_POINTS, 'len(grid) = %d; must have %d <= len(grid) <= %d.' % (len(self.grid), 2 * self.alpha, MAX_NUM_GRID_POINTS)) # grid is ordered diffs = np.diff(self.grid) check(all(diffs > 0), 'grid is not monotonically increasing.') # grid is evenly spaced check( all(np.isclose(diffs, diffs.mean())), 'grid is not evenly spaced; grid spacing = %f +- %f' % (diffs.mean(), diffs.std())) # alpha is int check(isinstance(self.alpha, int), 'type(alpha) = %s; must be int.' % type(self.alpha)) # alpha in range check(1 <= self.alpha <= 4, 'alpha = %d; must have 1 <= alpha <= 4' % self.alpha) if self.num_grid_points is not None: # num_grid_points is an integer check( isinstance(self.num_grid_points, int), 'type(num_grid_points) = %s; must be int.' % type(self.num_grid_points)) # num_grid_points is in the right range check( 2 * self.alpha <= self.num_grid_points <= MAX_NUM_GRID_POINTS, 'num_grid_points = %d; must have %d <= num_grid_poitns <= %d.' % (self.num_grid_points, 2 * self.alpha, MAX_NUM_GRID_POINTS)) # bounding_box if self.bounding_box is not None: # bounding_box is right type box_types = (list, tuple, np.ndarray) check( isinstance(self.bounding_box, box_types), 'type(bounding_box) = %s; must be one of %s' % (type(self.bounding_box), box_types)) # bounding_box has right length check( len(self.bounding_box) == 2, 'len(bounding_box) = %d; must be %d' % (len(self.bounding_box), 2)) # bounding_box entries must be numbers check( isinstance(self.bounding_box[0], numbers.Real) and isinstance(self.bounding_box[1], numbers.Real), 'bounding_box = %s; entries must be numbers' % repr(self.bounding_box)) # bounding_box entries must be sorted check( self.bounding_box[0] < self.bounding_box[1], 'bounding_box = %s; entries must be sorted' % repr(self.bounding_box)) # reset bounding_box as tuple self.bounding_box = (float(self.bounding_box[0]), float(self.bounding_box[1])) # periodic is bool check(isinstance(self.periodic, bool), 'type(periodic) = %s; must be bool' % type(self.periodic)) # evaluation_method_for_Z is valid Z_evals = ['Lap', 'Lap+Imp', 'Lap+Fey'] check( self.Z_evaluation_method in Z_evals, 'Z_eval = %s; must be in %s' % (self.Z_evaluation_method, Z_evals)) # num_samples_for_Z is an integer check( isinstance(self.num_samples_for_Z, numbers.Integral), 'type(self.num_samples_for_Z) = %s; ' % type(self.num_samples_for_Z) + 'must be integer.') self.num_samples_for_Z = int(self.num_samples_for_Z) # num_samples_for_Z is in range check( 0 <= self.num_samples_for_Z <= MAX_NUM_SAMPLES_FOR_Z, 'self.num_samples_for_Z = %d; ' % self.num_samples_for_Z + ' must satisfy 0 <= num_samples_for_Z <= %d.' % MAX_NUM_SAMPLES_FOR_Z) # max_t_step is a number check( isinstance(self.max_t_step, numbers.Real), 'type(max_t_step) = %s; must be a number' % type(self.max_t_step)) # max_t_step is positive check(self.max_t_step > 0, 'maxt_t_step = %f; must be > 0.' % self.max_t_step) # print_t is bool check(isinstance(self.print_t, bool), 'type(print_t) = %s; must be bool.' % type(self.print_t)) # tolerance is float check(isinstance(self.tolerance, numbers.Real), 'type(tolerance) = %s; must be number' % type(self.tolerance)) # tolerance is positive check(self.tolerance > 0, 'tolerance = %f; must be > 0' % self.tolerance) # resolution is number check(isinstance(self.resolution, numbers.Real), 'type(resolution) = %s; must be number' % type(self.resolution)) # resolution is positive check(self.resolution > 0, 'resolution = %f; must be > 0' % self.resolution) if self.seed is not None: # seed is int check(isinstance(self.seed, int), 'type(seed) = %s; must be int' % type(self.seed)) # seed is in range check(0 <= self.seed <= 2**32 - 1, 'seed = %d; must have 0 <= seed <= 2**32 - 1' % self.seed) # sample_only_at_l_star is bool check( isinstance(self.sample_only_at_l_star, bool), 'type(sample_only_at_l_star) = %s; must be bool.' % type(self.sample_only_at_l_star)) # num_posterior_samples is int check( isinstance(self.num_posterior_samples, numbers.Integral), 'type(num_posterior_samples) = %s; must be integer' % type(self.num_posterior_samples)) self.num_posterior_samples = int(self.num_posterior_samples) # num_posterior_samples is nonnegative check( 0 <= self.num_posterior_samples <= MAX_NUM_POSTERIOR_SAMPLES, 'num_posterior_samples = %f; need ' % self.num_posterior_samples + '0 <= num_posterior_samples <= %d.' % MAX_NUM_POSTERIOR_SAMPLES) # max_log_evidence_ratio_drop is number check( isinstance(self.max_log_evidence_ratio_drop, numbers.Real), 'type(max_log_evidence_ratio_drop) = %s; must be number' % type(self.max_log_evidence_ratio_drop)) # max_log_evidence_ratio_drop is positive check( self.max_log_evidence_ratio_drop > 0, 'max_log_evidence_ratio_drop = %f; must be > 0' % self.max_log_evidence_ratio_drop)
def _run(self): """ Estimates the probability density from data using the DEFT algorithm. Also samples posterior densities """ # Extract information from Deft1D object data = self.data G = self.num_grid_points h = self.grid_spacing alpha = self.alpha periodic = self.periodic Z_eval = self.Z_evaluation_method num_Z_samples = self.num_samples_for_Z DT_MAX = self.max_t_step print_t = self.print_t tollerance = self.tolerance resolution = self.resolution deft_seed = self.seed num_pt_samples = self.num_posterior_samples fix_t_at_t_star = self.sample_only_at_l_star max_log_evidence_ratio_drop = self.max_log_evidence_ratio_drop # Start clock start_time = time.clock() # If deft_seed is specified, set it if not (deft_seed is None): np.random.seed(deft_seed) else: np.random.seed(None) # Create Laplacian laplacian_start_time = time.clock() if periodic: op_type = '1d_periodic' else: op_type = '1d_bilateral' Delta = laplacian.Laplacian(op_type, alpha, G) laplacian_compute_time = time.clock() - laplacian_start_time if print_t: print('Laplacian computed de novo in %f sec.' % laplacian_compute_time) # Get histogram counts and grid centers # Histogram based on bin centers counts, _ = np.histogram(data, self.bin_edges) N = sum(counts) # Make sure a sufficient number of bins are nonzero num_nonempty_bins = sum(counts > 0) check( num_nonempty_bins > self.alpha, 'Histogram has %d nonempty bins; must be > %d.' % (num_nonempty_bins, self.alpha)) # Compute initial t t_start = min(0.0, sp.log(N) - 2.0 * alpha * sp.log(alpha / h)) if print_t: print('t_start = %0.2f' % t_start) # Do DEFT density estimation core_results = deft_core.run(counts, Delta, Z_eval, num_Z_samples, t_start, DT_MAX, print_t, tollerance, resolution, num_pt_samples, fix_t_at_t_star, max_log_evidence_ratio_drop) # Fill in results results = core_results # Get all results from deft_core # Normalize densities properly results.h = h results.L = G * h results.R /= h results.M /= h results.Q_star /= h results.l_star = h * (sp.exp(-results.t_star) * N)**(1 / (2. * alpha)) for p in results.map_curve.points: p.Q /= h if not (num_pt_samples == 0): results.Q_samples /= h results.Delta = Delta # Store results self.results = results
def get_stats(self, use_weights=True, show_samples=False): """ Computes summary statistics for the estimated density parameters ---------- show_samples: (bool) If True, summary stats are computed for each posterior sample. If False, summary stats are returned for the "star" estimate, the histogram, and the maxent estimate, along with the mean and RMSD values of these stats across posterior samples. use_weights: (bool) If True, mean and RMSD are computed using importance weights. returns ------- df: (pd.DataFrame) A pandas data frame listing summary statistics for the estimated probability densities. These summary statistics include "entropy" (in bits), "mean", "variance", "skewness", and "kurtosis". If ``show_samples = False``, results will be shown for the best estimate, as well as mean and RMDS values across all samples. If ``show_samples = True``, results will be shown for each sample. A column showing column weights will also be included. """ # Check inputs check(isinstance(use_weights, bool), 'use_weights = %s; must be True or False.' % use_weights) check(isinstance(show_samples, bool), 'show_samples = %s; must be True or False.' % show_samples) # Define a function for each summary statistic def entropy(Q): h = self.grid_spacing eps = 1E-10 assert (all(Q >= 0)) return np.sum(h * Q * np.log2(Q + eps)) def mean(Q): x = self.grid h = self.grid_spacing return np.sum(h * Q * x) def variance(Q): mu = mean(Q) x = self.grid h = self.grid_spacing return np.sum(h * Q * (x - mu)**2) def skewness(Q): mu = mean(Q) x = self.grid h = self.grid_spacing return np.sum(h * Q * (x - mu)**3) / np.sum(h * Q * (x - mu)**2)**(3 / 2) def kurtosis(Q): mu = mean(Q) x = self.grid h = self.grid_spacing return np.sum(h * Q * (x - mu)**4) / np.sum(h * Q * (x - mu)**2)**2 # Index functions by their names and set these as columns col2func_dict = { 'entropy': entropy, 'mean': mean, 'variance': variance, 'skewness': skewness, 'kurtosis': kurtosis } cols = list(col2func_dict.keys()) if show_samples: cols += ['weight'] # Create list of row names if show_samples: rows = ['sample %d' % n for n in range(self.num_posterior_samples)] else: rows = [ 'star', 'histogram', 'maxent', 'posterior mean', 'posterior RMSD' ] # Initialize data frame df = pd.DataFrame(columns=cols, index=rows) # Set sample weights if use_weights: ws = self.sample_weights else: ws = np.ones(self.num_posterior_samples) # Fill in data frame column by column for col_num, col in enumerate(cols): # If listing weights, do so if col == 'weight': df.loc[:, col] = ws # If computing a summary statistic else: # Get summary statistic function func = col2func_dict[col] # Compute func value for each sample ys = np.zeros(self.num_posterior_samples) for n in range(self.num_posterior_samples): ys[n] = func(self.sample_values[:, n]) # If recording individual results for all samples, do so if show_samples: df.loc[:, col] = ys # Otherwise, record individual entries else: # Fill in func value for start density df.loc['star', col] = func(self.values) # Fill in func value for histogram df.loc['histogram', col] = func(self.histogram) # Fill in func value for maxent point df.loc['maxent', col] = func(self.maxent) # Record mean and rmsd values across samples mu = np.sum(ys * ws) / np.sum(ws) df.loc['posterior mean', col] = mu df.loc['posterior RMSD', col] = np.sqrt( np.sum(ws * (ys - mu)**2) / np.sum(ws)) # Return data frame to user return df
def evaluate_samples(self, x, resample=True): """ Evaluate sampled densities at specified locations. parameters ---------- x: (number or list-like collection of numbers) The locations in the data domain at which to evaluate sampled density. resample: (bool) Whether to use importance resampling, i.e., should the values returned be from the original samples (obtained using a Laplace approximated posterior) or should they be resampled to account for the deviation between the true Bayesian posterior and its Laplace approximation. returns ------- A 1D np.array (if x is a number) or a 2D np.array (if x is list-like), representing the values of the posterior sampled densities at the specified locations. The first index corresponds to values in x, the second to sampled densities. """ # Clean input x_arr, is_number = clean_numerical_input(x) # Check resample type check(isinstance(resample, bool), 'type(resample) = %s. Must be bool.' % type(resample)) # Make sure that posterior samples were taken check( self.num_posterior_samples > 0, 'Cannot evaluate samples because no posterior samples' 'have been computed.') assert (len(self.sample_density_funcs) == self.num_posterior_samples) # Evaluate all sampled densities at x values = np.array( [d.evaluate(x_arr) for d in self.sample_density_funcs]).T # If requested, resample columns of values array based on # sample weights if resample: probs = self.sample_weights / self.sample_weights.sum() old_cols = np.array(range(self.num_posterior_samples)) new_cols = np.random.choice(old_cols, size=self.num_posterior_samples, replace=True, p=probs) values = values[:, new_cols] # If number was passed as input, return 1D np.array if is_number: values = values.ravel() return values
def _set_grid(self): """ Sets the grid based on user input """ data = self.data grid = self.grid grid_spacing = self.grid_spacing num_grid_points = self.num_grid_points bounding_box = self.bounding_box alpha = self.alpha # If grid is specified if grid is not None: # Check and set number of grid points num_grid_points = len(grid) assert (num_grid_points >= 2 * alpha) # Check and set grid spacing diffs = np.diff(grid) grid_spacing = diffs.mean() assert (grid_spacing > 0) assert (all(np.isclose(diffs, grid_spacing))) # Check and set grid bounds grid_padding = grid_spacing / 2 lower_bound = grid[0] - grid_padding upper_bound = grid[-1] + grid_padding bounding_box = np.array([lower_bound, upper_bound]) box_size = upper_bound - lower_bound # If grid is not specified if grid is None: ### First, set bounding box ### # If bounding box is specified, use that. if bounding_box is not None: assert bounding_box[0] < bounding_box[1] lower_bound = bounding_box[0] upper_bound = bounding_box[1] box_size = upper_bound - lower_bound # Otherwise set bounding box based on data else: assert isinstance(data, np.ndarray) assert all(np.isfinite(data)) assert min(data) < max(data) # Choose bounding box to encapsulate all data, with extra room data_max = max(data) data_min = min(data) data_span = data_max - data_min lower_bound = data_min - .2 * data_span upper_bound = data_max + .2 * data_span # Autoadjust lower bound if data_min >= 0 and lower_bound < 0: lower_bound = 0 # Autoadjust upper bound if data_max <= 0 and upper_bound > 0: upper_bound = 0 if data_max <= 1 and upper_bound > 1: upper_bound = 1 if data_max <= 100 and upper_bound > 100: upper_bound = 100 # Extend bounding box outward a little for numerical safety lower_bound -= SMALL_NUM * data_span upper_bound += SMALL_NUM * data_span box_size = upper_bound - lower_bound # Set bounding box bounding_box = np.array([lower_bound, upper_bound]) ### Next, define grid based on bounding box ### # If grid_spacing is specified if (grid_spacing is not None): assert isinstance(grid_spacing, float) assert np.isfinite(grid_spacing) assert grid_spacing > 0 # Set number of grid points num_grid_points = np.floor(box_size / grid_spacing).astype(int) # Check num_grid_points isn't too small check( 2 * self.alpha <= num_grid_points, 'Using grid_spacing = %f ' % grid_spacing + 'produces num_grid_points = %d, ' % num_grid_points + 'which is too small. Reduce grid_spacing or do not set.') # Check num_grid_points isn't too large check( num_grid_points <= MAX_NUM_GRID_POINTS, 'Using grid_spacing = %f ' % grid_spacing + 'produces num_grid_points = %d, ' % num_grid_points + 'which is too big. Increase grid_spacing or do not set.') # Define grid padding # Note: grid_spacing/2 <= grid_padding < grid_spacing grid_padding = (box_size - (num_grid_points - 1) * grid_spacing) / 2 assert (grid_spacing / 2 <= grid_padding < grid_spacing) # Define grid to be centered in bounding box grid_start = lower_bound + grid_padding grid_stop = upper_bound - grid_padding grid = np.linspace( grid_start, grid_stop * (1 + SMALL_NUM), # For safety num_grid_points) # Otherwise, if num_grid_points is specified elif (num_grid_points is not None): assert isinstance(num_grid_points, int) assert 2 * alpha <= num_grid_points <= MAX_NUM_GRID_POINTS # Set grid spacing grid_spacing = box_size / num_grid_points # Define grid padding grid_padding = grid_spacing / 2 # Define grid to be centered in bounding box grid_start = lower_bound + grid_padding grid_stop = upper_bound - grid_padding grid = np.linspace( grid_start, grid_stop * (1 + SMALL_NUM), # For safety num_grid_points) # Otherwise, set grid_spacing and num_grid_points based on data else: assert isinstance(data, np.ndarray) assert all(np.isfinite(data)) assert min(data) < max(data) # Compute default grid spacing default_grid_spacing = box_size / DEFAULT_NUM_GRID_POINTS # Set minimum number of grid points min_num_grid_points = 2 * alpha # Set minimum grid spacing data.sort() diffs = np.diff(data) min_grid_spacing = min(diffs[diffs > 0]) min_grid_spacing = min(min_grid_spacing, box_size / min_num_grid_points) # Set grid_spacing grid_spacing = max(min_grid_spacing, default_grid_spacing) # Set number of grid points num_grid_points = np.floor(box_size / grid_spacing).astype(int) # Set grid padding grid_padding = grid_spacing / 2 # Define grid to be centered in bounding box grid_start = lower_bound + grid_padding grid_stop = upper_bound - grid_padding grid = np.linspace( grid_start, grid_stop * (1 + SMALL_NUM), # For safety num_grid_points) # Set final grid self.grid = grid self.grid_spacing = grid_spacing self.grid_padding = grid_padding self.num_grid_points = num_grid_points self.bounding_box = bounding_box self.lower_bound = lower_bound self.upper_bound = upper_bound self.box_size = box_size # Make sure that the final number of gridpoints is ok. check( 2 * self.alpha <= self.num_grid_points <= MAX_NUM_GRID_POINTS, 'After setting grid, we find that num_grid_points = %d; must have %d <= len(grid) <= %d. ' % (self.num_grid_points, 2 * self.alpha, MAX_NUM_GRID_POINTS) + 'Something is wrong with input values of grid, grid_spacing, num_grid_points, or bounding_box.' ) # Set bin edges self.bin_edges = np.concatenate( ([lower_bound], grid[:-1] + grid_spacing / 2, [upper_bound]))
def __init__(self, distribution='gaussian', num_data_points=100, seed=None): # Check that distribution is valid check(distribution in self.list(), 'distribution = %s is not valid' % distribution) # Check num_data_points is integral check(isinstance(num_data_points, numbers.Integral), 'num_data_points = %s is not an integer.' % num_data_points) # Cast num_data_points as an integer num_data_points = int(num_data_points) # Check value check( 0 < num_data_points <= MAX_DATASET_SIZE, 'num_data_points = %d; must have 0 < num_data_points <= %d.' % (num_data_points, MAX_DATASET_SIZE)) # Run seed and catch errors try: np.random.seed(seed) except TypeError: raise ControlledError('type(seed) = %s; invalid type.' % type(seed)) except ValueError: raise ControlledError('seed = %s; invalid value.' % seed) # Set default value for periodic periodic = False # If gaussian distribution if distribution == 'gaussian': description = 'Gaussian distribution' mus = [0.] sigmas = [1.] weights = [1.] bounding_box = [-5, 5] data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights, mus, sigmas, bounding_box) # If mixture of two gaussian distributions elif distribution == 'narrow': description = 'Gaussian mixture, narrow separation' mus = [-1.25, 1.25] sigmas = [1., 1.] weights = [1., 1.] bounding_box = [-6, 6] data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights, mus, sigmas, bounding_box) # If mixture of two gaussian distributions elif distribution == 'wide': description = 'Gaussian mixture, wide separation' mus = [-2.0, 2.0] sigmas = [1.0, 1.0] weights = [1.0, 0.5] bounding_box = [-6.0, 6.0] data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights, mus, sigmas, bounding_box) elif distribution == 'foothills': description = 'Foothills (Gaussian mixture)' mus = [0., 5., 8., 10, 11] sigmas = [2., 1., 0.5, 0.25, 0.125] weights = [1., 1., 1., 1., 1.] bounding_box = [-5, 12] data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights, mus, sigmas, bounding_box) elif distribution == 'accordian': description = 'Accordian (Gaussian mixture)' mus = [0., 5., 8., 10, 11, 11.5] sigmas = [2., 1., 0.5, 0.25, 0.125, 0.0625] weights = [16., 8., 4., 2., 1., 0.5] bounding_box = [-5, 13] data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights, mus, sigmas, bounding_box) elif distribution == 'goalposts': description = 'Goalposts (Gaussian mixture)' mus = [-20, 20] sigmas = [1., 1.] weights = [1., 1.] bounding_box = [-25, 25] data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights, mus, sigmas, bounding_box) elif distribution == 'towers': description = 'Towers (Gaussian mixture)' mus = [-20, -15, -10, -5, 0, 5, 10, 15, 20] sigmas = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] weights = [1., 1., 1., 1., 1., 1., 1., 1., 1.] bounding_box = [-25, 25] data, pdf_py, pdf_js = gaussian_mixture(num_data_points, weights, mus, sigmas, bounding_box) # If uniform distribution elif distribution == 'uniform': data = stats.uniform.rvs(size=num_data_points) bounding_box = [0, 1] description = 'Uniform distribution' pdf_js = "1.0" pdf_py = "1.0" # Convex beta distribution elif distribution == 'beta_convex': data = stats.beta.rvs(a=0.5, b=0.5, size=num_data_points) bounding_box = [0, 1] description = 'Convex beta distribtuion' pdf_js = "Math.pow(x,-0.5)*Math.pow(1-x,-0.5)*math.gamma(1)/(math.gamma(0.5)*math.gamma(0.5))" pdf_py = "np.power(x,-0.5)*np.power(1-x,-0.5)*math.gamma(1)/(math.gamma(0.5)*math.gamma(0.5))" # Concave beta distribution elif distribution == 'beta_concave': data = stats.beta.rvs(a=2, b=2, size=num_data_points) bounding_box = [0, 1] description = 'Concave beta distribution' pdf_js = "Math.pow(x,1)*Math.pow(1-x,1)*math.gamma(4)/(math.gamma(2)*math.gamma(2))" pdf_py = "np.power(x,1)*np.power(1-x,1)*math.gamma(4)/(math.gamma(2)*math.gamma(2))" # Exponential distribution elif distribution == 'exponential': data = stats.expon.rvs(size=num_data_points) bounding_box = [0, 5] description = 'Exponential distribution' pdf_js = "Math.exp(-x)" pdf_py = "np.exp(-x)" # Gamma distribution elif distribution == 'gamma': data = stats.gamma.rvs(a=3, size=num_data_points) bounding_box = [0, 10] description = 'Gamma distribution' pdf_js = "Math.pow(x,2)*Math.exp(-x)/math.gamma(3)" pdf_py = "np.power(x,2)*np.exp(-x)/math.gamma(3)" # Triangular distribution elif distribution == 'triangular': data = stats.triang.rvs(c=0.5, size=num_data_points) bounding_box = [0, 1] description = 'Triangular distribution' pdf_js = "2-4*Math.abs(x - 0.5)" pdf_py = "2-4*np.abs(x - 0.5)" # Laplace distribution elif distribution == 'laplace': data = stats.laplace.rvs(size=num_data_points) bounding_box = [-5, 5] description = "Laplace distribution" pdf_js = "0.5*Math.exp(- Math.abs(x))" pdf_py = "0.5*np.exp(- np.abs(x))" # von Misses distribution elif distribution == 'vonmises': data = stats.vonmises.rvs(1, size=num_data_points) bounding_box = [-3.14159, 3.14159] periodic = True description = 'von Mises distribution' pdf_js = "Math.exp(Math.cos(x))/7.95493" pdf_py = "np.exp(np.cos(x))/7.95493" else: raise ControlledError('Distribution type "%s" not recognized.' % distribution) # Set these attributes = { 'data': data, 'bounding_box': bounding_box, 'distribution': distribution, 'pdf_js': pdf_js, 'pdf_py': pdf_py, 'periodic': periodic } for key, value in attributes.items(): setattr(self, key, value)