def __init__(self, cvxpy_problem, name="problem", log_level=None, parallel=True, tight_constraints=True, **solver_options): """ Inizialize optimizer. Parameters ---------- problem : cvxpy.Problem Problem in CVXPY format. name : str Problem name. solver_options : dict, optional A dict of options for the internal solver. """ if log_level is not None: stg.logger.setLevel(log_level) self._problem = Problem(cvxpy_problem, solver=stg.DEFAULT_SOLVER, tight_constraints=tight_constraints, **solver_options) self._solver_cache = None self.name = name self._learner = None self.encoding = None self.X_train = None self.y_train = None
def test_small(self): """Test small continuous LP""" # Define problem c = np.array([-1, -2]) x = cp.Variable(2, integer=True) # x = cp.Variable(2) cost = c @ x constraints = [ x[1] <= 0.5 * x[0] + 1.5, x[1] <= -0.5 * x[0] + 3.5, x[1] <= -5.0 * x[0] + 10, x >= 0, x <= 1, ] cvxpy_problem = cp.Problem(cp.Minimize(cost), constraints) problem = Problem(cvxpy_problem) # Solve and compute strategy results = problem.solve() # violation1 = problem.infeasibility() # Solve just with strategy results_new = problem.solve(strategy=results["strategy"]) # violation2 = problem.infeasibility() # Verify both solutions are equal npt.assert_almost_equal(results["x"], results_new["x"], decimal=TOL) npt.assert_almost_equal(results["cost"], results_new["cost"], decimal=TOL)
def test_violation(self): """Test problem violation""" np.random.seed(1) # Define problem n = 5 m = 5 x = cp.Variable(n) c = np.random.randn(n) A = np.random.randn(m, n) b = np.random.randn(m) prob_cvxpy = cp.Problem(cp.Minimize(c @ x), [A @ x <= b]) mlprob = Problem(prob_cvxpy) data, _, _ = prob_cvxpy.get_problem_data(solver=DEFAULT_SOLVER) # Set variable value x_val = 10 * np.random.randn(n) x.value = x_val # Check violation viol_cvxpy = mlprob.infeasibility(x_val, data) viol_manual = np.linalg.norm(np.maximum(A.dot(x_val) - b, 0), np.inf) / (1 + np.linalg.norm(b, np.inf)) self.assertTrue(abs(viol_cvxpy - viol_manual) <= TOL)
def test_parallel_vs_serial_learning(self): """Test parallel VS serial learning""" # Generate data np.random.seed(1) T = 5 M = 2. h = 1. c = 1. p = 1. x_init = 2. radius = 2. n_train = 1000 # Number of samples # Define problem x = cp.Variable(T + 1) u = cp.Variable(T) # Define parameter and sampling points d = cp.Parameter(T, nonneg=True, name="d") d_bar = 3. * np.ones(T) X_d = uniform_sphere_sample(d_bar, radius, n=n_train) df = pd.DataFrame({'d': list(X_d)}) # Constaints constraints = [x[0] == x_init] for t in range(T): constraints += [x[t + 1] == x[t] + u[t] - d[t]] constraints += [u >= 0, u <= M] # Objective cost = cp.sum(cp.maximum(h * x, -p * x)) + c * cp.sum(u) # Define problem cvxpy_problem = cp.Problem(cp.Minimize(cost), constraints) problem = Problem(cvxpy_problem) # Solve for all theta in serial results_serial = problem.solve_parametric(df, parallel=False) # Solve for all theta in parallel results_parallel = problem.solve_parametric(df, parallel=True) # Assert all results match for i in range(n_train): serial = results_serial[i] parallel = results_parallel[i] # Compare x npt.assert_array_almost_equal(serial['x'], parallel['x'], decimal=TOL) # Compare cost npt.assert_array_almost_equal(serial['cost'], parallel['cost'], decimal=TOL) # Compare strategy self.assertTrue(serial['strategy'] == parallel['strategy'])
def test_warm_start(self): """Solve with Gurobi twice and check warm start.""" np.random.seed(0) m, n = 80, 30 A = np.random.rand(m, n) b = np.random.randn(m) x = cp.Variable(n, integer=True) cost = cp.norm(A @ x - b, 1) cvxpy_problem = cp.Problem(cp.Minimize(cost)) problem = Problem(cvxpy_problem) results_first = problem.solve() results_second = problem.solve() npt.assert_array_less(results_second['time'], results_first['time'])
def test_random_integer(self): """Mixed-integer random LP test""" # Seed for reproducibility np.random.seed(1) # Define problem n = 20 m = 70 # Define constraints v = np.random.rand(n) # Solution A = spa.random(m, n, density=0.8, data_rvs=np.random.randn, format="csc") b = A.dot(v) + 10 * np.random.rand(m) # Split in 2 parts A1 = A[:int(m / 2), :] b1 = b[:int(m / 2)] A2 = A[int(m / 2):, :] b2 = b[int(m / 2):] # Cost c = np.random.rand(n) x = cp.Variable(n) # Variable y = cp.Variable(integer=True) # Variable cost = c @ x - cp.sum(y) + y # Define constraints constraints = [A1 @ x - y <= b1, A2 @ x + y <= b2, y >= 2] # Problem cvxpy_problem = cp.Problem(cp.Minimize(cost), constraints) problem = Problem(cvxpy_problem) # Solve and compute strategy results = problem.solve() # Solve just with strategy results_new = problem.solve(strategy=results["strategy"]) # Verify both solutions are equal npt.assert_almost_equal(results["x"], results_new["x"], decimal=TOL) npt.assert_almost_equal(results["cost"], results_new["cost"], decimal=TOL)
def test_random_cont_qp_reform(self): """Test random continuous QP reform test""" # Seed for reproducibility np.random.seed(1) # Define problem n = 100 m = 250 # Define constraints v = np.random.rand(n) # Solution A = spa.random(m, n, density=0.8, data_rvs=np.random.randn, format="csc") b = A.dot(v) + np.random.rand(m) # Split in 2 parts A1 = A[:int(m / 2), :] b1 = b[:int(m / 2)] A2 = A[int(m / 2):, :] b2 = b[int(m / 2):] # Cost c = np.random.rand(n) x = cp.Variable(n) # Variable cost = cp.sum_squares(c @ x) + cp.norm(x, 1) # Define constraints constraints = [A1 @ x <= b1, A2 @ x <= b2] # Problem cvxpy_problem = cp.Problem(cp.Minimize(cost), constraints) problem = Problem(cvxpy_problem) # Solve and compute strategy results = problem.solve() # Solve just with strategy results_new = problem.solve(strategy=results["strategy"]) # Verify both solutions are equal npt.assert_almost_equal(results["x"], results_new["x"], decimal=TOL) npt.assert_almost_equal(results["cost"], results_new["cost"], decimal=TOL)
def test_small_inventory(self): # Generate data np.random.seed(1) T = 5 M = 2.0 h = 1.0 c = 1.0 p = 1.0 x_init = 2.0 # Define problem x = cp.Variable(T + 1) u = cp.Variable(T) t = cp.Variable(T + 1) # Explicitly define parameter d = np.array( [3.94218985, 2.98861724, 2.48309709, 1.91226946, 2.33123841]) # Constaints constraints = [x[0] == x_init] for t in range(T): constraints += [x[t + 1] == x[t] + u[t] - d[t]] constraints += [u >= 0, u <= M] # Maximum constraints += [t >= h * x, t >= -p * x] # Objective cost = cp.sum(t) + c * cp.sum(u) # Define problem cvxpy_problem = cp.Problem(cp.Minimize(cost), constraints) problem = Problem(cvxpy_problem) results = problem.solve() # Solve with strategy! results_strategy = problem.solve(strategy=results["strategy"]) # Verify both solutions are equal npt.assert_almost_equal(results["x"], results_strategy["x"], decimal=TOL) npt.assert_almost_equal(results["cost"], results_strategy["cost"], decimal=TOL)
def test_solve_cvxpy(self): """Solve cvxpy problem vs optimizer problem. Expect similar solutions.""" np.random.seed(1) n = 5 m = 15 x = cp.Variable(n) c = np.random.randn(n) A = np.random.randn(m, n) b = np.random.randn(m) cost = c @ x constraints = [A @ x <= b] cvxpy_problem = cp.Problem(cp.Minimize(cost), constraints) cvxpy_problem.solve(solver=DEFAULT_SOLVER) x_cvxpy = deepcopy(x.value) cost_cvxpy = cost.value problem = Problem(cvxpy_problem) problem.solve() x_problem = x.value cost_problem = cost.value npt.assert_almost_equal(x_problem, x_cvxpy, decimal=TOL) npt.assert_almost_equal(cost_problem, cost_cvxpy, decimal=TOL)
class Optimizer(object): """ Machine Learning Optimizer class. """ def __init__(self, cvxpy_problem, name="problem", log_level=None, parallel=True, tight_constraints=True, **solver_options): """ Inizialize optimizer. Parameters ---------- problem : cvxpy.Problem Problem in CVXPY format. name : str Problem name. solver_options : dict, optional A dict of options for the internal solver. """ if log_level is not None: stg.logger.setLevel(log_level) self._problem = Problem(cvxpy_problem, solver=stg.DEFAULT_SOLVER, tight_constraints=tight_constraints, **solver_options) self._solver_cache = None self.name = name self._learner = None self.encoding = None self.X_train = None self.y_train = None @property def n_strategies(self): """Number of strategies.""" if self.encoding is None: e.value_error("Model has been trained yet to " + "return the number of strategies.") return len(self.encoding) def variables(self): """Problem variables.""" return self._problem.variables() @property def parameters(self): """Problem parameters.""" return self._problem.parameters @property def n_parameters(self): """Number of parameters.""" return self._problem.n_parameters def samples_present(self): """Check if samples have been generated.""" return (self.X_train is not None) and \ (self.y_train is not None) and \ (self.encoding is not None) def sample(self, sampling_fn, parallel=False): """ Sample parameters. """ # Create sampler self._sampler = Sampler(self._problem, sampling_fn) # Sample parameters self.X_train, self.y_train, self.obj_train, self.encoding = \ self._sampler.sample(parallel=parallel) def save_training_data(self, file_name, delete_existing=False): """ Save training data to file. Avoids the need to recompute data. Parameters ---------- file_name : string File name of the compressed optimizer. delete_existing : bool, optional Delete existing file with the same name? Defaults to False. """ # Check if file already exists if os.path.isfile(file_name): if not delete_existing: p = None while p not in ['y', 'n', 'N', '']: p = input("File %s already exists. " % file_name + "Would you like to delete it? [y/N] ") if p == 'y': os.remove(file_name) else: return else: os.remove(file_name) if not self.samples_present(): e.value_error("You need to get the strategies " + "from the data first by training the model.") # Save to file with open(file_name, 'wb') \ as data: data_dict = {'X_train': self.X_train, 'y_train': self.y_train, 'obj_train': self.obj_train, '_problem': self._problem, 'encoding': self.encoding} # if hasattr(self, '_solver_cache'): # data_dict['_solver_cache'] = self._solver_cache # Store strategy filter if hasattr(self, '_filter'): data_dict['_filter'] = self._filter pkl.dump(data_dict, data) def load_training_data(self, file_name): """ Load pickled training data from file name. Parameters ---------- file_name : string File name of the data. """ # Check if file exists if not os.path.isfile(file_name): e.value_error("File %s does not exist." % file_name) # Load optimizer with open(file_name, "rb") as f: data_dict = pkl.load(f) # Store data internally self.X_train = data_dict['X_train'] self.y_train = data_dict['y_train'] self.obj_train = data_dict['obj_train'] self._problem = data_dict['_problem'] self.encoding = data_dict['encoding'] # Set n_train in learner if self._learner is not None: self._learner.n_train = len(self.y_train) stg.logger.info("Loaded %d points with %d strategies" % (len(self.y_train), len(self.encoding))) if ('_solver_cache' in data_dict): self._solver_cache = data_dict['_solver_cache'] # Full strategies backup after filtering if ('_filter' in data_dict): self._filter = data_dict['_filter'] # Compute Good turing estimates self._sampler = Sampler(self._problem, n_samples=len(self.X_train)) self._sampler.compute_good_turing(self.y_train) def get_samples(self, X=None, sampling_fn=None, parallel=True, filter_strategies=stg.FILTER_STRATEGIES): """Get samples either from data or from sampling function""" # Assert we have data to train or already trained if X is None and sampling_fn is None and not self.samples_present(): e.value_error("Not enough arguments to train the model") if X is not None and sampling_fn is not None: e.value_error("You can pass only one value between X " "and sampling_fn") # Check if data is passed, otherwise train # if (X is not None) and not self.samples_present(): if X is not None: stg.logger.info("Use new data") self.X_train = X self.y_train = None self.encoding = None # Encode training strategies by solving # the problem for all the points results = self._problem.solve_parametric(X, parallel=parallel, message="Compute " + "tight constraints " + "for training set") stg.logger.info("Checking for infeasible points") not_feasible_points = {i: x for i, x in tqdm(enumerate(results)) if np.isnan(x['x']).any()} if not_feasible_points: e.value_error("Infeasible points found. Number of infeasible " "points %d" % len(not_feasible_points)) stg.logger.info("No infeasible point found.") self.obj_train = [r['cost'] for r in results] train_strategies = [r['strategy'] for r in results] # Check if the problems are solvable # for r in results: # assert r['status'] in cps.SOLUTION_PRESENT, \ # "The training points must be feasible" # Encode strategies self.y_train, self.encoding = \ encode_strategies(train_strategies) # Compute Good turing estimates self._sampler = Sampler(self._problem, n_samples=len(self.X_train)) self._sampler.compute_good_turing(self.y_train) # Condense strategies if filter_strategies: self.filter_strategies(parallel=parallel) elif sampling_fn is not None and not self.samples_present(): stg.logger.info("Use iterative sampling") # Create X_train, y_train and encoding from # sampling function self.sample(sampling_fn, parallel=parallel) # Condense strategies if filter_strategies: self.filter_strategies(parallel=parallel) # Add factorization faching if # 1. Problem is MIQP # 2. Parameters do not enter in matrices if self._problem.is_qp() and \ (self._solver_cache is None) and \ not self._problem.parameters_in_matrices: self.cache_factors() def filter_strategies(self, parallel=True, **filter_options): # Store full non filtered strategies self.encoding_full = self.encoding self.y_train_full = self.y_train # Define strategies filter (not run it yet) self._filter = Filter(X_train=self.X_train, y_train=self.y_train, obj_train=self.obj_train, encoding=self.encoding, problem=self._problem) self.y_train, self.encoding = \ self._filter.filter(parallel=parallel, **filter_options) def train(self, X=None, sampling_fn=None, parallel=True, learner=stg.DEFAULT_LEARNER, filter_strategies=stg.FILTER_STRATEGIES, **learner_options): """ Train optimizer using parameter X. This function needs one argument between data points X or sampling function sampling_fn. It will raise an error otherwise because there is no way to sample data. Parameters ---------- X : pandas dataframe or numpy array, optional Data samples. Each row is a new sample points. sampling_fn : function, optional Function to sample data taking one argument being the number of data points to be sampled and returning a structure of the same type as X. parallel : bool Perform training in parallel. learner : str Learner to use. Learners are defined in :mod:`mlopt.settings` learner_options : dict, optional A dict of options for the learner. """ # Get training samples self.get_samples(X, sampling_fn, parallel=parallel, filter_strategies=filter_strategies) # Define learner if learner not in installed_learners(): e.value_error("Learner specified not installed. " "Available learners are: %s" % installed_learners()) self._learner = LEARNER_MAP[learner](n_input=n_features(self.X_train), n_classes=len(self.encoding), **learner_options) # Train learner self._learner.train(pandas2array(self.X_train), self.y_train) def cache_factors(self): """Cache linear system solver factorizations""" self._solver_cache = [] stg.logger.info("Caching KKT solver factors for each strategy ") for strategy_idx in tqdm(range(self.n_strategies)): # Get a parameter giving that strategy strategy = self.encoding[strategy_idx] idx_param = np.where(self.y_train == strategy_idx)[0] theta = self.X_train.iloc[idx_param[0]] # Populate self._problem.populate(theta) # Get problem data data, inverse_data, solving_chain = \ self._problem._get_problem_data() # Apply strategy strategy.apply(data, inverse_data[-1]) # Old # self._problem.populate(theta) # # self._problem._relax_disc_var() # # reduced_problem = \ # self._problem._construct_reduced_problem(strategy) # # data, full_chain, inv_data = \ # reduced_problem.get_problem_data(solver=KKT) # Get KKT matrix KKT_mat = create_kkt_matrix(data) solve_kkt = factorize_kkt_matrix(KKT_mat) cache = {} cache['factors'] = solve_kkt # cache['inverse_data'] = inverse_data # cache['chain'] = solving_chain self._solver_cache += [cache] def choose_best(self, problem_data, labels, parallel=False, batch_size=stg.JOBLIB_BATCH_SIZE, use_cache=True): """ Choose best strategy between provided ones Parameters ---------- labels : list Strategy labels to compare. parallel : bool, optional Perform `n_best` strategies evaluation in parallel. True by default. use_cache : bool, optional Use solver cache if available. True by default. Returns ------- dict Results as a dictionary. """ n_best = self._learner.options['n_best'] # For each n_best classes get x, y, time and store the best one x = [] time = [] infeas = [] cost = [] strategies = [self.encoding[label] for label in labels] # Cache is a list of solver caches to pass cache = [None] * n_best if self._solver_cache and use_cache: cache = [self._solver_cache[label] for label in labels] n_jobs = u.get_n_processes(n_best) if parallel else 1 results = Parallel(n_jobs=n_jobs, batch_size=batch_size)( delayed(self._problem.solve)(problem_data, strategy=strategies[j], cache=cache[j]) for j in range(n_best)) x = [r["x"] for r in results] time = [r["time"] for r in results] infeas = [r["infeasibility"] for r in results] cost = [r["cost"] for r in results] # Pick best class between k ones infeas = np.array(infeas) cost = np.array(cost) idx_filter = np.where(infeas <= stg.INFEAS_TOL)[0] if len(idx_filter) > 0: # Case 1: Feasible points # -> Get solution with best cost # between feasible ones if self._problem.sense() == Minimize: idx_pick = idx_filter[np.argmin(cost[idx_filter])] elif self._problem.sense() == Maximize: idx_pick = idx_filter[np.argmax(cost[idx_filter])] else: e.value_error('Objective type not understood') else: # Case 2: No feasible points # -> Get solution with minimum infeasibility idx_pick = np.argmin(infeas) # Store values we are interested in result = {} result['x'] = x[idx_pick] result['time'] = np.sum(time) result['strategy'] = strategies[idx_pick] result['cost'] = cost[idx_pick] result['infeasibility'] = infeas[idx_pick] return result def solve(self, X, message="Predict optimal solution", use_cache=True, verbose=False, ): """ Predict optimal solution given the parameters X. Parameters ---------- X : pandas DataFrame or Series Data points. use_cache : bool, optional Use solver cache? Defaults to True. Returns ------- list List of result dictionaries. """ if isinstance(X, pd.Series): X = pd.DataFrame(X).transpose() n_points = len(X) if use_cache and not self._solver_cache: e.warning("Solver cache requested but the cache has " "not been computed for this problem. " "Possibly parameters in proble matrices.") # Change verbose setting if verbose: self._problem.verbose = True # Define array of results to return results = [] # Predict best n_best classes for all the points X_pred = pandas2array(X) t_start = time() classes = self._learner.predict(X_pred) t_predict = (time() - t_start) / n_points # Average predict time if n_points > 1: stg.logger.info(message) ran = tqdm(range(n_points)) else: # Do not print anything if just one point ran = range(n_points) for i in ran: # Populate problem with i-th data point self._problem.populate(X.iloc[i]) problem_data = self._problem._get_problem_data() results.append(self.choose_best(problem_data, classes[i, :], use_cache=use_cache)) # Append predict time for r in results: r['pred_time'] = t_predict r['solve_time'] = r['time'] r['time'] = r['pred_time'] + r['solve_time'] if len(results) == 1: results = results[0] return results def save(self, file_name, delete_existing=False): """ Save optimizer to a specific tar.gz file. Parameters ---------- file_name : string File name of the compressed optimizer. delete_existing : bool, optional Delete existing file with the same name? Defaults to False. """ if self._learner is None: e.value_error("You cannot save the optimizer without " + "training it before.") # Add .tar.gz if the file has no extension if not file_name.endswith('.tar.gz'): file_name += ".tar.gz" # Check if file already exists if os.path.isfile(file_name): if not delete_existing: p = None while p not in ['y', 'n', 'N', '']: p = input("File %s already exists. " % file_name + "Would you like to delete it? [y/N] ") if p == 'y': os.remove(file_name) else: return else: os.remove(file_name) # Create temporary directory to create the archive # and store relevant files with tempfile.TemporaryDirectory() as tmpdir: # Save learner self._learner.save(os.path.join(tmpdir, "learner")) # Save optimizer with open(os.path.join(tmpdir, "optimizer.pkl"), 'wb') \ as optimizer: file_dict = {'_problem': self._problem, # '_solver_cache': self._solver_cache, # Cannot pickle 'learner_name': self._learner.name, 'learner_options': self._learner.options, 'learner_best_params': self._learner.best_params, 'encoding': self.encoding } pkl.dump(file_dict, optimizer) # Create archive with the files tar = tarfile.open(file_name, "w:gz") for f in glob(os.path.join(tmpdir, "*")): tar.add(f, os.path.basename(f)) tar.close() @classmethod def from_file(cls, file_name): """ Create optimizer from a specific compressed tar.gz file. Parameters ---------- file_name : string File name of the exported optimizer. """ # Add .tar.gz if the file has no extension if not file_name.endswith('.tar.gz'): file_name += ".tar.gz" # Check if file exists if not os.path.isfile(file_name): e.value_error("File %s does not exist." % file_name) # Extract file to temporary directory and read it with tempfile.TemporaryDirectory() as tmpdir: with tarfile.open(file_name) as tar: tar.extractall(path=tmpdir) # Load optimizer optimizer_file_name = os.path.join(tmpdir, "optimizer.pkl") if not optimizer_file_name: e.value_error("Optimizer pkl file does not exist.") with open(optimizer_file_name, "rb") as f: optimizer_dict = pkl.load(f) name = optimizer_dict.get('name', 'problem') # Create optimizer using loaded dict problem = optimizer_dict['_problem'].cvxpy_problem optimizer = cls(problem, name=name) # Assign strategies encoding optimizer.encoding = optimizer_dict['encoding'] optimizer._sampler = optimizer_dict.get('_sampler', None) # Load learner learner_name = optimizer_dict['learner_name'] learner_options = optimizer_dict['learner_options'] learner_best_params = optimizer_dict['learner_best_params'] optimizer._learner = \ LEARNER_MAP[learner_name](n_input=optimizer.n_parameters, n_classes=len(optimizer.encoding), **learner_options) optimizer._learner.best_params = learner_best_params optimizer._learner.load(os.path.join(tmpdir, "learner")) return optimizer def performance(self, theta, results_test=None, results_heuristic=None, parallel=False, use_cache=True): """ Evaluate optimizer performance on data theta by comparing the solution to the optimal one. Parameters ---------- theta : DataFrame Data to predict. parallel : bool, optional Solve problems in parallel? Defaults to True. Returns ------- dict Results summarty. dict Detailed results summary. """ stg.logger.info("Performance evaluation") if results_test is None: # Get strategy for each point results_test = self._problem.solve_parametric( theta, parallel=parallel, message="Compute " + "tight constraints " + "for test set") if results_heuristic is None: self._problem.solver_options['MIPGap'] = 0.1 # 10% MIP Gap # Get strategy for each point results_heuristic = self._problem.solve_parametric( theta, parallel=parallel, message="Compute " + "tight constraints " + "with heuristic MIP Gap 10 %%" + "for test set") self._problem.solver_options.pop('MIPGap') # Remove MIP Gap option time_test = [r['time'] for r in results_test] cost_test = [r['cost'] for r in results_test] time_heuristic = [r['time'] for r in results_heuristic] cost_heuristic = [r['cost'] for r in results_heuristic] # Get predicted strategy for each point results_pred = self.solve(theta, message="Predict tight constraints for " + "test set", use_cache=use_cache) time_pred = [r['time'] for r in results_pred] solve_time_pred = [r['solve_time'] for r in results_pred] pred_time_pred = [r['pred_time'] for r in results_pred] cost_pred = [r['cost'] for r in results_pred] infeas = np.array([r['infeasibility'] for r in results_pred]) n_test = len(theta) n_train = self._learner.n_train # Number of training samples n_theta = n_features(theta) # Number of parameters n_strategies = len(self.encoding) # Number of strategies # Compute comparative statistics time_comp = np.array([time_test[i] / time_pred[i] for i in range(n_test)]) time_comp_heuristic = np.array([time_heuristic[i] / time_pred[i] for i in range(n_test)]) subopt = np.array([suboptimality(cost_pred[i], cost_test[i], self._problem.sense()) for i in range(n_test)]) subopt_real = subopt[np.where(infeas <= stg.INFEAS_TOL)[0]] if any(subopt_real): max_subopt = np.max(subopt_real) avg_subopt = np.mean(subopt_real) std_subopt = np.std(subopt_real) else: max_subopt = np.nan avg_subopt = np.nan std_subopt = np.nan subopt_heuristic = np.array([suboptimality(cost_heuristic[i], cost_test[i], self._problem.sense()) for i in range(n_test)]) # accuracy test_accuracy, idx_correct = accuracy(results_pred, results_test, self._problem.sense()) # Create dataframes to return df = pd.Series( { "problem": self.name, "learner": self._learner.name, "n_best": self._learner.options['n_best'], "n_var": self._problem.n_var, "n_constr": self._problem.n_constraints, "n_test": n_test, "n_train": n_train, "n_theta": n_theta, "good_turing": self._sampler.good_turing, "good_turing_smooth": self._sampler.good_turing_smooth, "n_correct": np.sum(idx_correct), "n_strategies": n_strategies, "accuracy": 100 * test_accuracy, "n_infeas": np.sum(infeas >= stg.INFEAS_TOL), "avg_infeas": np.mean(infeas), "std_infeas": np.std(infeas), "max_infeas": np.max(infeas), "avg_subopt": avg_subopt, "std_subopt": std_subopt, "max_subopt": max_subopt, "avg_subopt_heuristic": np.mean(subopt_heuristic), "std_subopt_heuristic": np.std(subopt_heuristic), "max_subopt_heuristic": np.max(subopt_heuristic), "mean_solve_time_pred": np.mean(solve_time_pred), "std_solve_time_pred": np.std(solve_time_pred), "mean_pred_time_pred": np.mean(pred_time_pred), "std_pred_time_pred": np.std(pred_time_pred), "mean_time_pred": np.mean(time_pred), "std_time_pred": np.std(time_pred), "max_time_pred": np.max(time_pred), "mean_time_full": np.mean(time_test), "std_time_full": np.std(time_test), "max_time_full": np.max(time_test), "mean_time_heuristic": np.mean(time_heuristic), "std_time_heuristic": np.std(time_heuristic), "max_time_heuristic": np.max(time_heuristic), } ) df_detail = pd.DataFrame( { "problem": [self.name] * n_test, "learner": [self._learner.name] * n_test, "n_best": [self._learner.options['n_best']] * n_test, "correct": idx_correct, "infeas": infeas, "subopt": subopt, "solve_time_pred": solve_time_pred, "pred_time_pred": pred_time_pred, "time_pred": time_pred, "time_full": time_test, "time_heuristic": time_heuristic, "time_improvement": time_comp, "time_improvement_heuristic": time_comp_heuristic, } ) return df, df_detail