def build_experiment(self): df = pd.DataFrame(doe.ccdesign(len(self.f), (1, 1), face='cci'), columns=[f.name for f in self.f]) for f in self.f: df[f.name] = (df[f.name] * f.delta) + f.centre df[f.name] = myround(df[f.name], f.precision) self.data = df
def plan_matrix(n, m): print(f'\nПлан матриці при n = {n}, m = {m}') y = np.zeros(shape=(n, m)) for i in range(n): for j in range(m): y[i][j] = random.randint(y_min, y_max) no = n - 14 if n > 14 else 1 x_norm = ccdesign(3, center=(0, no)) x_norm = np.insert(x_norm, 0, 1, axis=1) for i in range(4, 11): x_norm = np.insert(x_norm, i, 0, axis=1) l = 1.215 for i in range(len(x_norm)): for j in range(len(x_norm[i])): if x_norm[i][j] < -1 or x_norm[i][j] > 1: x_norm[i][j] = -l if x_norm[i][j] < 0 else l def add_sq_nums(x): for i in range(len(x)): x[i][4] = x[i][1] * x[i][2] x[i][5] = x[i][1] * x[i][3] x[i][6] = x[i][2] * x[i][3] x[i][7] = x[i][1] * x[i][3] * x[i][2] x[i][8] = x[i][1]**2 x[i][9] = x[i][2]**2 x[i][10] = x[i][3]**2 return x x_norm = add_sq_nums(x_norm) x = np.ones(shape=(len(x_norm), len(x_norm[0])), dtype=np.int64) for i in range(8): for j in range(1, 4): x[i][j] = x_range[j - 1][0] if x_norm[i][j] == -1 else x_range[j - 1][1] for i in range(8, len(x)): for j in range(1, 3): x[i][j] = (x_range[j - 1][0] + x_range[j - 1][1]) / 2 dx = [ x_range[i][1] - (x_range[i][0] + x_range[i][1]) / 2 for i in range(3) ] x[8][1] = l * dx[0] + x[9][1] x[9][1] = -l * dx[0] + x[9][1] x[10][2] = l * dx[1] + x[9][2] x[11][2] = -l * dx[1] + x[9][2] x[12][3] = l * dx[2] + x[9][3] x[13][3] = -l * dx[2] + x[9][3] x = add_sq_nums(x) print('\nX:\n', x) print('\nX нормалізоване:\n') for i in x_norm: print([round(x, 2) for x in i]) print('\nY:\n', y) return x, y, x_norm
def generate_samples(num_samples_inside, num_samples_boundary, x_k, delta_k): """ Generates samples from both `inside the region` and `on the boundary`. To sample from the boundary, the *Central Composite* is used. To sample from the region interior, the *Latin Hypercube Sampling* is used. Args: num_samples_inside (int): Number of desired samples inside the region. num_samples_boundary (int): Number of desired samples on the region boundary. x_k (vector): Center of the boundary. delta_k (float): Radius of the boundary. Returns: total_samples: list of new samples form the boundary """ # 1. Determine the dimension of x_k: used for knowing the random vector dimensions x_k_dim = x_k.shape[0] # 2. Sample from boundary: Central Composite if x_k_dim == 1: samples_boundary = np.array([[-1.0], [1.0]]) else: res = [ normalize(v) for v in ccdesign(x_k_dim, center=[0 for i in range(x_k_dim)]) ] samples_boundary = np.array(res) samples_boundary = random.choices(delta_k * samples_boundary + x_k, k=num_samples_boundary) # 3. Sample from region: Latin Hypercube Sampling samples_inside = lhs(x_k_dim, samples=num_samples_inside) scaled_samples_inside = np.vectorize(lambda x: -delta_k + x * delta_k * 2)( samples_inside) samples_inside = scaled_samples_inside + x_k # 4. Add directions to the center point x_k to get new candidate points total_samples = np.concatenate((np.concatenate( (samples_boundary, samples_inside)), [[0.0 for i in range(x_k_dim)]])) for i in range(5): total_samples = np.concatenate( (total_samples, [[0.0 for i in range(x_k_dim)]])) return total_samples # if __name__ == "__main__": # x_k = np.array([0.0], dtype=np.float32) # delta_k = 2.0 # print(generate_samples(5, 3, x_k, delta_k))
def gen_matrix(self): self.seq = [[1], [2], [3], [1, 2], [1, 3], [2, 3], [1, 2, 3], [1, 1], [2, 2], [3, 3]] if self.y_only_int == True: self.y = np.random.randint(self.y_min, self.y_max + 1, (self.N, self.m)) else: self.y = np.random.sample( (self.N, self.m)) * (self.y_max - self.y_min) + self.y_min self.y_mean = [i.mean() for i in self.y] self.y_std = [np.std(i) for i in self.y] delta_x = [np.abs(m[1] - m[0]) / 2 for m in self.x] x_i_0 = [(m[1] + m[0]) / 2 for m in self.x] self.extended = ccdesign(3, center=(0, 1)) self.extended_real = np.zeros((15, 3)) for i in range(self.extended.shape[0]): for j in range(self.extended.shape[1]): if self.extended[i][j] == 1: self.extended_real[i][j] = self.x[j][1] elif self.extended[i][j] == -1: self.extended_real[i][j] = self.x[j][0] elif self.extended[i][j] == 0: self.extended_real[i][j] = x_i_0[j] elif self.extended[i][j] > 0: self.extended[i][j] = self.l self.extended_real[i][j] = self.l * delta_x[j] + x_i_0[j] else: self.extended_real[i][j] = -self.l * delta_x[j] + x_i_0[j] self.extended[i][j] = -self.l num = self.extended.shape[1] seq = [[1, 2], [1, 3], [2, 3], [1, 2, 3], [1, 1], [2, 2], [3, 3]] for i in seq: app = np.array([1] * self.extended.shape[0]) app_real = np.array([1] * self.extended.shape[0]) for j in i: app = app * self.extended.T[j - 1] app_real = app_real * self.extended_real.T[j - 1] self.extended = np.insert(self.extended, num, app.T, axis=1) self.extended_real = np.insert(self.extended_real, num, app_real.T, axis=1) num += 1
def gen_matrix(self): self.seq = [[1], [2], [3], [1, 2], [1, 3], [2, 3], [1, 2, 3], [1, 1], [2, 2], [3, 3]] delta_x = [np.abs(m[1] - m[0]) / 2 for m in self.x] x_i_0 = [(m[1] + m[0]) / 2 for m in self.x] self.extended = ccdesign(3, center=(0, 0)) # self.extended = np.delete(self.extended, -1, 0) self.extended_real = np.zeros((14, 3)) for i in range(self.extended.shape[0]): for j in range(self.extended.shape[1]): if self.extended[i][j] == 1: self.extended_real[i][j] = self.x[j][1] elif self.extended[i][j] == -1: self.extended_real[i][j] = self.x[j][0] elif self.extended[i][j] == 0: self.extended_real[i][j] = x_i_0[j] elif self.extended[i][j] > 0: self.extended[i][j] = self.l self.extended_real[i][j] = self.l * delta_x[j] + x_i_0[j] else: self.extended_real[i][j] = -self.l * delta_x[j] + x_i_0[j] self.extended[i][j] = -self.l self.y = np.array([[ self.y_func(*self.extended_real[i][:3]) + np.random.rand() * 10 + 5 for j in range(self.m) ] for i in range(self.N)]) self.y_mean = [i.mean() for i in self.y] self.y_std = [np.std(i) for i in self.y] num = self.extended.shape[1] seq = [[1, 2], [1, 3], [2, 3], [1, 2, 3], [1, 1], [2, 2], [3, 3]] for i in seq: app = np.array([1] * self.extended.shape[0]) app_real = np.array([1] * self.extended.shape[0]) for j in i: app = app * self.extended.T[j - 1] app_real = app_real * self.extended_real.T[j - 1] self.extended = np.insert(self.extended, num, app.T, axis=1) self.extended_real = np.insert(self.extended_real, num, app_real.T, axis=1) num += 1
def sampling(self, nx, xlimits, method='LHS', random_seed=10000): ''' create nx samples bounded by xlimits using specified method. xlimits defines lb and ub, in np.array([[LB1, UB1], [LB2, UB2], ...]) format. method = 'LHS': Latin hypercube sampling, 'CCD': centralized composite design, 'PBD': Plackett-Burman design, 'PB-CCD': Plackett-Burman centralized composite design ''' n_var = xlimits.shape[0] # Sampling if method.lower() == 'lhs': x = DOE.lhs(n_var, samples=nx, criterion='correlation', random_state=random_seed) * 2.0 - 1.0 elif method.lower() == 'ccd': if n_var > 8: raise ValueError( 'number of variables is TOO LARGE for centralized composite design (CCD).' ) if n_var > 7: warnings.warn( 'number of variables is TOO LARGE for centralized composite design (CCD).' ) x = DOE.ccdesign(n_var, center=(0, 1), alpha='rotatable', face='inscribed') elif method.lower() == 'pbd': x = DOE.pbdesign(n_var) elif method.lower() in ['pb-ccd', 'pbccd']: l = np.sqrt(n_var) x = DOE.pbdesign(n_var) / l x = np.append(x, -x / 2.0, axis=0) for idx in range(0, n_var): z = np.zeros((1, n_var)) z[0, idx] = 1.0 x = np.append(x, z, axis=0) z[0, idx] = -1.0 x = np.append(x, z, axis=0) x = np.append(x, np.zeros((1, n_var)), axis=0) # Scale for idx in range(0, xlimits.shape[0]): x[:, idx] = (x[:, idx] + 1.0) / 2.0 * ( xlimits[idx, 1] - xlimits[idx, 0]) + xlimits[idx, 0] # Return return x
def ccd_lhs(self, ccd_factors, lhs_factors, add_samples=5, seed=None, center_fill=False): """ Get experiments corresponding to a hybrid central composit and latin hypercube design. """ # Run cc design cc = pd.DataFrame(ccdesign(len(ccd_factors), (0, 1), 'o', 'cci'), columns=ccd_factors) cc = Data(cc) cc.standardize(target=None, scaler='minmax') # Either fill lh design or a single number if center_fill: lh = pd.DataFrame(np.ones((len(cc.data), len(lhs_factors))) * 0.5, columns=lhs_factors) else: lh = pd.DataFrame(lhs(len(lhs_factors), samples=len(cc.data), criterion='center', random_state=seed), columns=lhs_factors) lh2 = pd.DataFrame(lhs(self.N, samples=add_samples, criterion='center', random_state=seed), columns=ccd_factors + lhs_factors) # Concatenate design = pd.concat([cc.data, lh], axis=1) design = pd.concat([design, lh2], axis=0).reset_index(drop=True) # Reorder self.design = design.copy()[self.names]
for i in range(len(x)): x[i][4] = x[i][1] * x[i][2] x[i][5] = x[i][1] * x[i][3] x[i][6] = x[i][2] * x[i][3] x[i][7] = x[i][1] * x[i][3] * x[i][2] x[i][8] = x[i][1]**2 x[i][9] = x[i][2]**2 x[i][10] = x[i][3]**2 return x if n > 14: no = n - 14 else: no = 1 xn = ccdesign(3, center=(0, no)) xn = np.insert(xn, 0, 1, axis=1) for i in range(4, 11): xn = np.insert(xn, i, 0, axis=1) l = 1.215 for i in range(len(xn)): for j in range(len(xn[i])): if xn[i][j] < -1 or xn[i][j] > 1: if xn[i][j] < 0: xn[i][j] = -l else: xn[i][j] = l
def plan_matrix5(n, m): print(f'\nГереруємо матрицю планування для n = {n}, m = {m}') y = np.zeros(shape=(n, m)) # створюємо матрицю з нулів for i in range(n): for j in range(m): y[i][j] = random.randint(y_min, y_max) # заповнюємо цю матрицю ігриками start = time.time() if n > 14: no = n - 14 else: no = 1 print("Время проверки 1 = ", start-time.time()) x_norm = ccdesign(3, center=(0, no)) # Central-Composite designs x_norm = np.insert(x_norm, 0, 1, axis=1) for i in range(4, 11): x_norm = np.insert(x_norm, i, 0, axis=1) l = 1.215 # матриця планування з нормовaними значеннями for i in range(len(x_norm)): for j in range(len(x_norm[i])): start = time.time() if x_norm[i][j] < -1 or x_norm[i][j] > 1: if x_norm[i][j] < 0: x_norm[i][j] = -l else: x_norm[i][j] = l print("Время проверки 2 = ", start-time.time()) def add_sq_nums(x): # рахуємо квадратні числа for i in range(len(x)): x[i][4] = x[i][1] * x[i][2] x[i][5] = x[i][1] * x[i][3] x[i][6] = x[i][2] * x[i][3] x[i][7] = x[i][1] * x[i][3] * x[i][2] x[i][8] = x[i][1] ** 2 x[i][9] = x[i][2] ** 2 x[i][10] = x[i][3] ** 2 return x x_norm = add_sq_nums(x_norm) # додаємо їх в матрицю x = np.ones(shape=(len(x_norm), len(x_norm[0])), dtype=np.int64) # заповнюємо матрицю одиницями # матриця планування з натуральними значеннями факторів for i in range(8): for j in range(1, 4): start = time.time() if x_norm[i][j] == -1: x[i][j] = x_range[j - 1][0] else: x[i][j] = x_range[j - 1][1] print("Время проверки = ", start-time.time()) for i in range(8, len(x)): for j in range(1, 3): x[i][j] = (x_range[j - 1][0] + x_range[j - 1][1]) / 2 dx = [x_range[i][1] - (x_range[i][0] + x_range[i][1]) / 2 for i in range(3)] x[8][1] = l * dx[0] + x[9][1] x[9][1] = -l * dx[0] + x[9][1] x[10][2] = l * dx[1] + x[9][2] x[11][2] = -l * dx[1] + x[9][2] x[12][3] = l * dx[2] + x[9][3] x[13][3] = -l * dx[2] + x[9][3] x = add_sq_nums(x) # додаємо квадратні числа в матрицю за натуральними значеннями print('\nX:\n', x) print('\nX нормоване:\n') for i in x_norm: print([round(x, 2) for x in i]) print('\nY:\n', y) return x, y, x_norm
def plan_matrix5(n, m): print(f'\nГереруємо матрицю планування для n = {n}, m = {m}') y = np.zeros(shape=(n, m)) for i in range(n): for j in range(m): y[i][j] = random.randint(y_min, y_max) if n > 14: no = n - 14 else: no = 1 x_norm = ccdesign(3, center=(0, no)) x_norm = np.insert(x_norm, 0, 1, axis=1) for i in range(4, 11): x_norm = np.insert(x_norm, i, 0, axis=1) l = 1.215 # матриця планування з нормовaними значеннями for i in range(len(x_norm)): for j in range(len(x_norm[i])): if x_norm[i][j] < -1 or x_norm[i][j] > 1: if x_norm[i][j] < 0: x_norm[i][j] = -l else: x_norm[i][j] = l # рахуємо квадратні числа def add_sq_nums(x): for i in range(len(x)): x[i][4] = x[i][1] * x[i][2] x[i][5] = x[i][1] * x[i][3] x[i][6] = x[i][2] * x[i][3] x[i][7] = x[i][1] * x[i][3] * x[i][2] x[i][8] = x[i][1] ** 2 x[i][9] = x[i][2] ** 2 x[i][10] = x[i][3] ** 2 return x x_norm = add_sq_nums(x_norm) x = np.ones(shape=(len(x_norm), len(x_norm[0])), dtype=np.int64) # матриця планування з натуральними значеннями факторів for i in range(8): for j in range(1, 4): if x_norm[i][j] == -1: x[i][j] = x_range[j - 1][0] else: x[i][j] = x_range[j - 1][1] for i in range(8, len(x)): for j in range(1, 3): x[i][j] = (x_range[j - 1][0] + x_range[j - 1][1]) / 2 dx = [x_range[i][1] - (x_range[i][0] + x_range[i][1]) / 2 for i in range(3)] x[8][1] = l * dx[0] + x[9][1] x[9][1] = -l * dx[0] + x[9][1] x[10][2] = l * dx[1] + x[9][2] x[11][2] = -l * dx[1] + x[9][2] x[12][3] = l * dx[2] + x[9][3] x[13][3] = -l * dx[2] + x[9][3] x = add_sq_nums(x) # квадратні числа в матрицю за натуральними значеннями print('\nX:\n', x) print('\nX нормоване:\n') for i in x_norm: print([round(x, 2) for x in i]) print('\nY:\n', y) return x, y, x_norm
class ExperimentDesigner: _matrix_designers = { 'fullfactorial2levels': pyDOE2.ff2n, 'fullfactorial3levels': lambda n: pyDOE2.fullfact([3] * n), 'placketburman': pyDOE2.pbdesign, 'boxbehnken': lambda n: pyDOE2.bbdesign(n, 1), 'ccc': lambda n: pyDOE2.ccdesign(n, (0, 3), face='ccc'), 'ccf': lambda n: pyDOE2.ccdesign(n, (0, 3), face='ccf'), 'cci': lambda n: pyDOE2.ccdesign(n, (0, 3), face='cci'), } def __init__(self, factors, design_type, responses, skip_screening=True, at_edges='distort', relative_step=.25, gsd_reduction='auto', model_selection='brute', n_folds='loo', manual_formula=None, shrinkage=1.0, q2_limit=0.5, gsd_span_ratio=0.5): try: assert at_edges in ('distort', 'shrink'),\ 'unknown action at_edges: {0}'.format(at_edges) assert relative_step is None or 0 < relative_step < 1,\ 'relative_step must be float between 0 and 1 not {}'.format(relative_step) assert model_selection in ('brute', 'greedy', 'manual'), \ 'model_selection must be "brute", "greedy", "manual".' assert n_folds == 'loo' or (isinstance(n_folds, int) and n_folds > 0), \ 'n_folds must be "loo" or positive integer' assert 0.9 <= shrinkage <= 1, 'shrinkage must be float between 0.9 and 1.0, not {}'.format( shrinkage) assert 0 <= q2_limit <= 1, 'q2_limit must be float between 0 and 1, not {}'.format( q2_limit) if model_selection == 'manual': assert isinstance(manual_formula, str), \ 'If model_selection is "manual" formula must be provided.' except AssertionError as e: raise ValueError(str(e)) self.factors = OrderedDict() factor_types = list() for factor_name, f_spec in factors.items(): factor = factor_from_spec(f_spec) if isinstance(factor, CategoricalFactor) and skip_screening: raise DesignerError( 'Can\'t perform optimization with categorical ' 'variables without prior screening.') self.factors[factor_name] = factor logging.debug('Sets factor {}: {}'.format(factor_name, factor)) factor_types.append(f_spec.get('type', 'continuous')) self.skip_screening = skip_screening self.step_length = relative_step self.design_type = design_type self.responses = responses self.response_values = None self.gsd_reduction = gsd_reduction self.model_selection = model_selection self.n_folds = n_folds self.shrinkage = shrinkage self.q2_limit = q2_limit self._formula = manual_formula self._edge_action = at_edges self._allowed_phases = ['optimization', 'screening'] self._phase = 'optimization' if self.skip_screening else 'screening' self._n_screening_evaluations = 0 self._factor_types = factor_types self._gsd_span_ratio = gsd_span_ratio self._stored_transform = lambda x: x self._best_experiment = { 'optimal_x': pd.Series([]), 'optimal_y': None, 'weighted_y': None } n = len(self.factors) try: self._matrix_designers[self.design_type.lower()] except KeyError: raise UnsupportedDesign(self.design_type) if len(self.responses) > 1: self._desirabilites = { name: make_desirability_function(factor) for name, factor in self.responses.items() } else: self._desirabilites = None def new_design(self): """ :return: Experimental design-sheet. :rtype: pandas.DataFrame """ if self._phase == 'screening': return self._new_screening_design(reduction=self.gsd_reduction) else: return self._new_optimization_design() def write_factor_csv(self, out_file): factors = list() idx = pd.Index(['fixed_value', 'current_low', 'current_high']) for name, factor in self.factors.items(): current_min = None current_high = None fixed_value = None if issubclass(type(factor), NumericFactor): current_min = factor.current_low current_high = factor.current_high elif isinstance(factor, CategoricalFactor): fixed_value = factor.fixed_value else: raise NotImplementedError data = [fixed_value, current_min, current_high] factors.append(pd.Series(data, index=idx, name=name)) factors_df = pd.DataFrame(factors) logging.info('Saving factor settings to {}'.format(out_file)) factors_df.to_csv(out_file) def update_factors_from_csv(self, csv_file): factors_df = pd.DataFrame.from_csv(csv_file) logging.info('Reading factor settings from {}'.format(csv_file)) for name, factor in self.factors.items(): logging.info('Updating factor {}'.format(name)) if issubclass(type(factor), NumericFactor): current_low = factors_df.loc[name]['current_low'] current_high = factors_df.loc[name]['current_high'] logging.info('Factor: {}. Setting current_low to {}'.format( name, current_low)) logging.info('Factor: {}. Setting current_high to {}'.format( name, current_high)) factor.current_low = current_low factor.current_high = current_high elif isinstance(factor, CategoricalFactor): if pd.isnull(factors_df.loc[name]['fixed_value']): fixed_value = None logging.info( 'Factor: {}. Had no fixed_value.'.format(name)) else: fixed_value = factors_df.loc[name]['fixed_value'] logging.info( 'Factor: {}. Setting fixed_value to {}.'.format( name, fixed_value)) factor.fixed_value = fixed_value def get_optimal_settings(self, response): """ Calculate optimal factor settings given response. Returns calculated optimum. If the current phase is 'screening': returns the factor settings of the best run and updates the current factor settings. If the current phase is 'optimization': returns the factor settings of the predicted optimum, but doesn't update current factor settings in case a validation step is to be run first :param pandas.DataFrame response: Response sheet. :returns: Calculated optimum. :rtype: OptimizationResult """ self._response_values = response.copy() response = response.copy() # Perform any transformations or weigh together multiple responses: treated_response, criterion = self.treat_response(response) if self._phase == 'screening': # Find the best screening result and update factors accordingly self._screening_response = treated_response self._screening_criterion = criterion return self._evaluate_screening(treated_response, criterion, self._gsd_span_ratio) else: # Predict optimal parameter settings, but don't update factors return self._predict_optimum_settings(treated_response, criterion) def _update_best_experiment(self, result): update = False if self._best_experiment['optimal_x'].empty: update = True elif result['criterion'] == 'maximize': if result['weighted_response'] > self._best_experiment[ 'weighted_y']: update = True elif result['criterion'] == 'minimize': if result['weighted_response'] < self._best_experiment[ 'weighted_y']: update = True if update: self._best_experiment['optimal_x'] = result['factor_settings'] self._best_experiment['optimal_y'] = result['response'] self._best_experiment['weighted_y'] = result['weighted_response'] return update def get_best_experiment(self, experimental_sheet, response_sheet, use_index=1): """ Accepts an experimental design and the corresponding response values. Finds the best experiment and updates self._best_experiment. Returns the best experiment, to be used in fnc update_factors_from_optimum """ assert isinstance(experimental_sheet, pd.core.frame.DataFrame), \ 'The input experimental sheet must be a pandas DataFrame' assert isinstance(response_sheet, pd.core.frame.DataFrame), \ 'The input response sheet must be a pandas DataFrame' assert sorted(experimental_sheet.columns) == sorted(self.factors), \ 'The factors of the experimental sheet must match those in the \ pipeline. You input:\n{}\nThey should be:\n{}' .format( list(experimental_sheet.columns), list(self.factors.keys())) assert sorted(response_sheet.columns) == sorted(self.responses), \ 'The responses of the response sheet must match those in the \ pipeline. You input:\n{}\nThey should be:\n{}' .format( list(response_sheet.columns), list(self.responses.keys())) response = response_sheet.copy() treated_response, criterion = self.treat_response( response, perform_transform=False) treated_response = treated_response.iloc[:, 0] if criterion == 'maximize': optimum_i = treated_response.argsort().iloc[-use_index] elif criterion == 'minimize': optimum_i = treated_response.argsort().iloc[use_index - 1] else: raise NotImplementedError optimum_settings = experimental_sheet.iloc[optimum_i] results = OrderedDict() optimal_weighted_response = np.array(treated_response.iloc[optimum_i]) optimal_response = response_sheet.iloc[optimum_i] results['factor_settings'] = optimum_settings results['weighted_response'] = optimal_weighted_response results['response'] = optimal_response results['criterion'] = criterion results['new_best'] = False results['old_best'] = self._best_experiment has_multiple_responses = response_sheet.shape[1] > 1 logging.debug('The best response was found in experiment:\n{}'.format( optimum_settings.name)) logging.debug('The response values were:\n{}'.format( response_sheet.iloc[optimum_i])) if has_multiple_responses: logging.debug('The weighed response was:\n{}'.format( treated_response.iloc[optimum_i])) logging.debug('Will return optimum settings:\n{}'.format( results['factor_settings'])) logging.debug('And best response:\n{}'.format(results['response'])) if self._update_best_experiment(results): results['new_best'] = True return results def update_factors_from_optimum(self, optimal_experiment, tol=0.25, recovery=False): """ Updates the factor settings based on how far the current settings are from those supplied in optimal_experiment['factor_settings']. :param OrderedDict optimal_experiment: Output from get_best_experiment :param float tol: Accepted relative distance to design space edge. :returns: Calculated optimum. :rtype: OptimizationResult """ are_numeric = np.array(self._factor_types) != 'categorical' numeric_names = np.array(list(self.factors.keys()))[are_numeric] numeric_factors = np.array(list(self.factors.values()))[are_numeric] optimal_x = optimal_experiment['factor_settings'] optimal_y = optimal_experiment['weighted_response'] criterion = optimal_experiment['criterion'] # Get only numeric factors if recovery: optimal_x = optimal_x.iloc[optimal_x.index.isin(numeric_names)] centers = np.array([f.center for f in numeric_factors]) spans = np.array([f.span for f in numeric_factors]) ratios = (optimal_x - centers) / spans if not recovery: logging.debug( 'The distance of the factor optimas from the factor centers, ' 'expressed as the ratio of the step length:\n{}'.format( ratios)) if (abs(ratios) < tol).all(): converged = True if not recovery: logging.info('Convergence reached.') else: converged = False if not recovery: logging.info('Convergence not reached. Moves design.') for ratio, name, factor in zip(ratios, numeric_names, numeric_factors): if abs(ratio) < tol: if not recovery: logging.debug( ('Factor {} not updated - within tolerance ' 'limits.').format(name)) continue if not recovery: self._update_numeric_factor(factor, name, ratio) converged, reached_limits = self._check_convergence(centers, converged, criterion, optimal_y, numeric_factors, recovery=recovery) optimization_results = pd.Series(index=self._design_sheet.columns, dtype=object) for name, factor in self.factors.items(): if isinstance(factor, CategoricalFactor): optimization_results[name] = factor.fixed_value else: optimization_results[name] = optimal_x[name] results = OptimizationResult(optimization_results, converged, tol, reached_limits, empirically_found=True) return results def _predict_optimum_settings(self, response, criterion): """ Calculate a model from the response and find the optimum. :returns: Calculated optimum. :rtype: OptimizationResult """ logging.info('Predicting optimum') are_numeric = np.array(self._factor_types) != 'categorical' numeric_names = np.array(list(self.factors.keys()))[are_numeric] optimal_x, model, prediction = predict_optimum( self._design_sheet.loc[:, are_numeric], response.iloc[:, 0].values, numeric_names, criterion=criterion, n_folds=self.n_folds, model_selection=self.model_selection, manual_formula=self._formula, q2_limit=self.q2_limit) optimization_results = pd.Series(index=self._design_sheet.columns, dtype=object) if not optimal_x.empty: # If Q2 of model was above the limit and if an optimum was found for name, factor in self.factors.items(): if isinstance(factor, CategoricalFactor): optimization_results[name] = factor.fixed_value elif isinstance(factor, OrdinalFactor): optimization_results[name] = int(np.round(optimal_x[name])) else: optimization_results[name] = optimal_x[name] result = OptimizationResult(optimization_results, converged=False, tol=0, reached_limits=False, empirically_found=False) return result def treat_response(self, response, perform_transform=True): """ Perform any specified transformations on the response. If several responses are defined, combine them into one. The geometric mean of Derringer and Suich's desirability functions will be used for optimization, see: Derringer, G., and Suich, R., (1980), "Simultaneous Optimization of Several Response Variables," Journal of Quality Technology, 12, 4, 214-219. Returns a single response variable and the associated maximize/minimize criterion. """ has_multiple_responses = response.shape[1] > 1 for name, spec in self.responses.items(): transform = spec.get('transform', None) response_values = response[name] if perform_transform: if transform == 'log': logging.debug('Log-transforming response {}'.format(name)) response_values = np.log(response_values) self._stored_transform = np.log elif transform == 'box-cox': response_values, lambda_ = scipy.stats.boxcox( response_values) logging.debug('Box-cox transforming response {} ' '(lambda={:.4f})'.format(name, lambda_)) self._stored_transform = _make_stored_boxcox(lambda_) else: self._stored_transform = lambda x: x if has_multiple_responses: desirability_function = self._desirabilites[name] response_values = [ desirability_function(value) for value in response_values ] response[name] = response_values if has_multiple_responses: response = np.power(response.product(axis=1), (1 / response.shape[1])) response = response.to_frame('combined_response') criterion = 'maximize' else: criterion = list(self.responses.values())[0]['criterion'] return response, criterion def reevaluate_screening(self): if self._screening_response is None: raise DesignerError('screening must be run before re-evaluation') return self._evaluate_screening(self._screening_response, self._screening_criterion, self._gsd_span_ratio, self._n_screening_evaluations + 1) def _validate_new_factor_limits(self, factor, factor_name, low_limit, high_limit): # If the proposed step change takes us below or above min and max: logging.debug('Factor {}: Proposed new factor low is {}.'.format( factor_name, low_limit)) logging.debug('Factor {}: Proposed new factor high is {}.'.format( factor_name, high_limit)) adjusted_settings = False if low_limit < factor.min: nudge = abs(low_limit - factor.min) logging.debug( 'Factor {}: Minimum allowed setting ({}) would be exceeded by ' 'the proposed new factor low.'.format(factor_name, factor.min)) low_limit += nudge high_limit += nudge adjusted_settings = True elif high_limit > factor.max: nudge = abs(high_limit - factor.max) logging.debug( 'Factor {}: Maximum allowed setting ({}) would be exceeded by ' 'the proposed new factor high.'.format(factor_name, factor.max)) low_limit -= nudge high_limit -= nudge adjusted_settings = True if adjusted_settings: logging.debug('Factor {}: Adjusted the proposed new factor ' 'settings by {}.'.format(factor_name, nudge)) logging.debug('Factor {}: New factor low is {}.'.format( factor_name, low_limit)) logging.debug('Factor {}: New factor high is {}.'.format( factor_name, high_limit)) return (low_limit, high_limit) def _evaluate_screening(self, response, criterion, span_ratio, use_index=1): """ :param float span_ratio: The ratio of the span between gsd points that will be used in the following optimization design. """ self._n_screening_evaluations += 1 logging.info('Evaluating screening results.') response_series = response.iloc[:, 0] factor_items = sorted(self.factors.items()) if criterion == 'maximize': optimum_i = response_series.argsort().iloc[-use_index] elif criterion == 'minimize': optimum_i = response_series.argsort().iloc[use_index - 1] else: raise NotImplementedError optimum_design_row = self._design_matrix[optimum_i] optimum_settings = OrderedDict() # Update all factors according to current results. For each factor, # the current_high and current_low will be set to factors level above # and below the point in the screening design with the best response. for factor_level, (name, factor) in zip(optimum_design_row, factor_items): if isinstance(factor, CategoricalFactor): factor_levels = np.array(factor.values) factor.fixed_value = factor_levels[factor_level] else: factor_levels = sorted(self._design_sheet[name].unique()) min_ = factor_levels[max([0, factor_level - 1])] max_ = factor_levels[min( [factor_level + 1, len(factor_levels) - 1])] span = max_ - min_ # Shrink the span a bit logging.debug('Factor {} span: {}'.format(name, span)) logging.debug('Factor {}: adjusting span with ' 'gsd_span_ratio {}'.format(name, span_ratio)) span = span * span_ratio if isinstance(factor, OrdinalFactor) and span < 2.0: # Make sure ordinal factors' spans don't shrink to the # point where there's no spread in the exp. design logging.debug('Factor {}: span ({}) too small, adjusting ' 'to minimal span for ordinal factor.'.format( name, span)) span = 2.0 logging.debug('Factor {} span: {}'.format(name, span)) # center around best point best_point = factor_levels[factor_level] new_low = best_point - span / 2 new_high = best_point + span / 2 if isinstance(factor, OrdinalFactor): new_low = int(np.round(new_low)) new_high = int(np.round(new_high)) # nudge new high and low so we don't exceed the limits new_low, new_high = self._validate_new_factor_limits( factor, name, new_low, new_high) # update factors factor.current_low = new_low factor.current_high = new_high optimum_settings[name] = factor_levels[factor_level] logging.info('New settings for factor {}:\n{}'.format( name, factor)) results = OptimizationResult(pd.Series(optimum_settings), converged=False, tol=0, reached_limits=False, empirically_found=True) logging.info('Best screening result was exp no {}'.format(optimum_i)) logging.info('The corresponding response was:\n{}'.format( self._response_values.iloc[optimum_i])) if len(self._response_values.columns) > 1: logging.info('The combined response was:\n{}'.format( response.iloc[optimum_i])) logging.info('The factor settings were:\n{}'.format( results.predicted_optimum)) # update current best experiment self.get_best_experiment( self._design_sheet, self._response_values if len(self._response_values.columns) > 1 else response) self._phase = 'optimization' return results def set_phase(self, phase): assert phase in self._allowed_phases, 'phase must be one of {}'.format( self._allowed_phases) self._phase = phase def _update_numeric_factor(self, factor, name, ratio): logging.info('Factor {}: Updating settings.'.format(name)) logging.info('Factor {}: Current settings: {}'.format(name, factor)) step_length = self.step_length if self.step_length is not None \ else abs(ratio) step = factor.span * step_length * np.sign(ratio) logging.debug( 'Factor {}: Step by which settings are adjusted is {}.'.format( name, step)) logging.debug( 'Factor {}: Current span between high and low is {}.'.format( name, factor.span)) logging.debug('Factor {}: Will shrink the span by {}.'.format( name, self.shrinkage)) new_span = factor.span * self.shrinkage logging.debug('Factor {}: New span is {}.'.format(name, new_span)) if isinstance(factor, QuantitativeFactor): current_low_new = factor.center + step - new_span / 2 current_high_new = factor.center + step + new_span / 2 elif isinstance(factor, OrdinalFactor): current_low_new = np.round(factor.center + step - new_span / 2) current_high_new = np.round(factor.center + step + new_span / 2) else: raise NotImplementedError # If the proposed step change takes us below or above min and max: new_low, new_high = self._validate_new_factor_limits( factor, name, current_low_new, current_high_new) factor.current_low = new_low factor.current_high = new_high logging.info('Factor {}: New settings: {}'.format(name, factor)) logging.info('Factor {}: Done updating.'.format(name)) def _new_screening_design(self, reduction='auto'): factor_items = sorted(self.factors.items()) levels = list() names = list() dtypes = list() for name, factor in factor_items: names.append(name) if isinstance(factor, CategoricalFactor): levels.append(factor.values) dtypes.append(object) continue num_levels = factor.screening_levels spacing = getattr(factor, 'screening_spacing', 'linear') min_ = factor.min max_ = factor.max if not np.isfinite([min_, max_]).all(): raise ValueError( 'Can\'t perform screening with unbounded factors') space = np.linspace if spacing == 'linear' else np.logspace values = space(min_, max_, num_levels) if isinstance(factor, OrdinalFactor): values = sorted(np.unique(np.round(values))) dtypes.append(int) else: dtypes.append(float) levels.append(values) design_matrix = pyDOE2.gsd( [len(values) for values in levels], reduction if reduction is not 'auto' else len(levels)) factor_matrix = list() for i, (values, dtype) in enumerate(zip(levels, dtypes)): values = np.array(values)[design_matrix[:, i]] series = pd.Series(values, dtype=dtype) factor_matrix.append(series) self._design_matrix = design_matrix self._design_sheet = pd.concat(factor_matrix, axis=1, keys=names) return self._design_sheet def _new_optimization_design(self): matrix_designer = self._matrix_designers[self.design_type.lower()] numeric_factors = [(name, factor) for name, factor in self.factors.items() if isinstance(factor, NumericFactor)] numeric_factor_names = [name for name, factor in numeric_factors] design_matrix = matrix_designer(len(numeric_factors)) mins = np.array([f.min for _, f in numeric_factors]) maxes = np.array([f.max for _, f in numeric_factors]) span = np.array([f.span for _, f in numeric_factors]) centers = np.array([f.center for _, f in numeric_factors]) factor_matrix = design_matrix * (span / 2.0) + centers # Check if current settings are outside allowed design space. # Also, for factors that are specified as ordinal, adjust their values # in the design matrix to be rounded floats for i, (factor_name, factor) in enumerate(numeric_factors): if isinstance(factor, OrdinalFactor): factor_matrix[:, i] = np.round(factor_matrix[:, i]) logging.debug('Current setting {}: {}'.format(factor_name, factor)) if (factor_matrix < mins).any() or (factor_matrix > maxes).any(): logging.warning(('Out of design space factors. Adjusts factors' 'by {}.'.format(self._edge_action + 'ing'))) if self._edge_action == 'distort': # Simply cap out-of-boundary values at mins and maxes. capped_mins = np.maximum(factor_matrix, mins) capped_mins_and_maxes = np.minimum(capped_mins, maxes) factor_matrix = capped_mins_and_maxes elif self._edge_action == 'shrink': raise NotImplementedError factors = list() for name, factor in self.factors.items(): if isinstance(factor, CategoricalFactor): values = np.repeat(factor.fixed_value, len(design_matrix)) factors.append(pd.Series(values)) else: i = numeric_factor_names.index(name) dtype = int if isinstance(factor, OrdinalFactor) else float factors.append(pd.Series(factor_matrix[:, i].astype(dtype))) self._design_sheet = pd.concat(factors, axis=1, keys=self.factors.keys()) return self._design_sheet def _check_convergence(self, centers, converged, criterion, prediction, numeric_factors, recovery=False): # It's possible that the optimum is predicted to be at the edge of the allowed # min or max factor setting. This will produce a high 'ratio' and the algorithm # is not considered to have converged (above). However, in this situation we # can't move the space any further and we should stop iterating. new_centers = np.array([f.center for f in numeric_factors]) if (centers == new_centers).all(): if not recovery: logging.info( 'The design has not moved since last iteration. Converged.' ) converged = True reached_limits = True if len(self.responses) > 1 and prediction < 1: reached_limits = False elif len(self.responses) == 1: r_spec = list(self.responses.values())[0] low_limit = self._stored_transform(r_spec.get('low_limit', 1)) high_limit = self._stored_transform(r_spec.get( 'high_limit', 1)) if criterion == 'maximize' and 'low_limit' in r_spec: reached_limits = prediction >= low_limit elif criterion == 'minimize' and 'high_limit' in r_spec: reached_limits = prediction <= high_limit elif criterion == 'target' and 'low_limit' in r_spec and 'high_limit' in r_spec: reached_limits = low_limit <= prediction <= high_limit else: reached_limits = False return converged, reached_limits
def run( self, seed, n, dims, sample_type, scale, scale_factor, outfile, x0, x1, n_line, hard_bounds, ): np.random.seed(seed) n_samples = n n_dims = dims hard_bounds = hard_bounds sample_type = sample_type if sample_type == "random": x = np.random.random((n_samples, n_dims)) elif sample_type == "grid": subdivision = int(pow(n_samples, 1 / float(n_dims))) temp = [np.linspace(0, 1.0, subdivision) for i in range(n_dims)] X = np.meshgrid(*temp) x = np.stack([xx.flatten() for xx in X], axis=1) elif sample_type == "lhs": x = doe.lhs(n_dims, samples=n_samples, random_state=seed) elif sample_type == "lhd": _x = doe.lhs(n_dims, samples=n_samples, random_state=seed) x = norm(loc=0.5, scale=0.125).ppf(_x) elif sample_type == "star": _x = doe.doe_star.star(n_dims)[0] x = 0.5 * (_x + 1.0) # transform to center at 0.5 (range 0-1) elif sample_type == "ccf" or sample_type == "ccc" or sample_type == "cci": _x = np.unique(doe.ccdesign(n_dims, face=sample_type), axis=0) x = 0.5 * (_x + 1.0) else: raise ValueError(sample_type + " is not a valid choice for sample_type!") scales = process_scale(scale) if scales is not None: limits = [] do_log = [] for scale in scales: limits.append((scale[0], scale[1])) if len(scale) < 3: scale.append("linear") if scale[2] == "log": do_log.append(True) else: do_log.append(False) x = scale_samples(x, limits, do_log=do_log) # scale the whole box x = scale_factor * x # add x0 if x0 is not None: x0 = np.atleast_2d(np.load(x0)) if scales is not None: sa = scale_factor * np.array(scales)[:, :2].astype("float") center = np.mean(sa, axis=1) else: center = scale_factor * 0.5 # Loop over all x0 points all_x = [] for _x0 in x0: _x = x + _x0 - center # replace the first entry with x0 for the random ones if sample_type == "lhs" or sample_type == "lhd": _x[0] = _x0 else: # add it for the stencil points _x = np.insert(_x, 0, _x0, axis=0) if x1 is not None: x1 = np.load(x1) line_range = np.linspace(0, 1, n_line + 1, endpoint=False)[1:] line_samples = _x0 + np.outer(line_range, (x1 - _x0)) _x = np.vstack((_x, line_samples)) all_x.append(_x) x = np.vstack(all_x) if hard_bounds: if scales is None: x = np.clip(x, 0, 1) else: for i, dim in enumerate(scales): x[:, i] = np.clip(x[:, i], dim[0], dim[1]) print(x) np.save(outfile, x)
def plan_matrix5(n, m): y = np.zeros(shape=(n, m)) for i in range(n): for j in range(m): y[i][j] = random.randint(y_min, y_max) if n > 14: no = n - 14 else: no = 1 x_norm = ccdesign(3, center=(0, no)) x_norm = np.insert(x_norm, 0, 1, axis=1) for i in range(4, 11): x_norm = np.insert(x_norm, i, 0, axis=1) l = 1.215 for i in range(len(x_norm)): for j in range(len(x_norm[i])): if x_norm[i][j] < -1 or x_norm[i][j] > 1: if x_norm[i][j] < 0: x_norm[i][j] = -l else: x_norm[i][j] = l def add_sq_nums(x): for i in range(len(x)): x[i][4] = x[i][1] * x[i][2] x[i][5] = x[i][1] * x[i][3] x[i][6] = x[i][2] * x[i][3] x[i][7] = x[i][1] * x[i][3] * x[i][2] x[i][8] = x[i][1]**2 x[i][9] = x[i][2]**2 x[i][10] = x[i][3]**2 return x x_norm = add_sq_nums(x_norm) x = np.ones(shape=(len(x_norm), len(x_norm[0])), dtype=np.int64) for i in range(8): for j in range(1, 4): if x_norm[i][j] == -1: x[i][j] = x_range[j - 1][0] else: x[i][j] = x_range[j - 1][1] for i in range(8, len(x)): for j in range(1, 3): x[i][j] = (x_range[j - 1][0] + x_range[j - 1][1]) / 2 dx = [ x_range[i][1] - (x_range[i][0] + x_range[i][1]) / 2 for i in range(3) ] x[8][1] = l * dx[0] + x[9][1] x[9][1] = -l * dx[0] + x[9][1] x[10][2] = l * dx[1] + x[9][2] x[11][2] = -l * dx[1] + x[9][2] x[12][3] = l * dx[2] + x[9][3] x[13][3] = -l * dx[2] + x[9][3] x = add_sq_nums(x) print('\nМатриця X:\n') for i in x: for j in i: print("{0:5}".format(j), end='|') print() print('\nX нормоване:\n') for i in x_norm: for j in i: print("{0:5.3}".format(j), end='|') print() print('Y') for i in y: for j in i: print("{0:5.8}".format(j), end='|') print() return x, y, x_norm