def load(self, file_name): # Check if file name exists if not os.path.isfile(file_name + ".json"): e.value_error("Optimal Tree json file does not exist.") # Load tree from file self._lnr = self.iai.read_json(file_name + ".json")
def n_strategies(self): """Number of strategies.""" if self.encoding is None: e.value_error("Model has been trained yet to " + "return the number of strategies.") return len(self.encoding)
def save(self, file_name, delete_existing=False): """ Save optimizer to a specific tar.gz file. Parameters ---------- file_name : string File name of the compressed optimizer. delete_existing : bool, optional Delete existing file with the same name? Defaults to False. """ if self._learner is None: e.value_error("You cannot save the optimizer without " + "training it before.") # Add .tar.gz if the file has no extension if not file_name.endswith('.tar.gz'): file_name += ".tar.gz" # Check if file already exists if os.path.isfile(file_name): if not delete_existing: p = None while p not in ['y', 'n', 'N', '']: p = input("File %s already exists. " % file_name + "Would you like to delete it? [y/N] ") if p == 'y': os.remove(file_name) else: return else: os.remove(file_name) # Create temporary directory to create the archive # and store relevant files with tempfile.TemporaryDirectory() as tmpdir: # Save learner self._learner.save(os.path.join(tmpdir, "learner")) # Save optimizer with open(os.path.join(tmpdir, "optimizer.pkl"), 'wb') \ as optimizer: file_dict = { '_problem': self._problem, # '_solver_cache': self._solver_cache, # Cannot pickle 'learner_name': self._learner.name, 'learner_options': self._learner.options, 'learner_best_params': self._learner.best_params, 'encoding': self.encoding } pkl.dump(file_dict, optimizer) # Create archive with the files tar = tarfile.open(file_name, "w:gz") for f in glob(os.path.join(tmpdir, "*")): tar.add(f, os.path.basename(f)) tar.close()
def load(self, file_name): path = file_name + ".ckpt" # Check if file name exists if not os.path.isfile(path): e.value_error("Pytorch checkpoint file does not exist.") self.model = LightningNet.load_from_checkpoint( path, self.best_params).to(self.device) self.model.eval() # Necessary to set the model to evaluation mode self.model.freeze() # Necessary to set the model to evaluation mode
def __init__(self, **options): """Initialize Pytorch Learner class. Parameters ---------- options : dict Learner options as a dictionary. """ if not PytorchNeuralNet.is_installed(): e.value_error("Pytorch not installed") # import torch import torch self.torch = torch # Disable logging log = logging.getLogger("lightning") log.setLevel(logging.ERROR) self.name = stg.PYTORCH self.n_input = options.pop('n_input') self.n_classes = options.pop('n_classes') self.options = {} self.options['bounds'] = options.pop('bounds', pts.PARAMETER_BOUNDS) not_specified_bounds = \ [x for x in pts.PARAMETER_BOUNDS.keys() if x not in self.options['bounds'].keys()] for p in not_specified_bounds: # Assign remaining keys self.options['bounds'][p] = pts.PARAMETER_BOUNDS[p] # Pick minimum between n_best and n_classes self.options['n_best'] = min(options.pop('n_best', stg.N_BEST), self.n_classes) # Pick number of hyperopt_trials self.options['n_train_trials'] = options.pop('n_train_trials', stg.N_TRAIN_TRIALS) # Mute optuna optuna.logging.set_verbosity(optuna.logging.INFO) # Define device self.use_gpu = self.torch.cuda.is_available() if self.use_gpu: self.device = self.torch.device("cuda:0") stg.logger.info("Using CUDA GPU %s with Pytorch" % self.torch.cuda.get_device_name(self.device)) else: self.device = self.torch.device("cpu") stg.logger.info("Using CPU with Pytorch")
def save_training_data(self, file_name, delete_existing=False): """ Save training data to file. Avoids the need to recompute data. Parameters ---------- file_name : string File name of the compressed optimizer. delete_existing : bool, optional Delete existing file with the same name? Defaults to False. """ # Check if file already exists if os.path.isfile(file_name): if not delete_existing: p = None while p not in ['y', 'n', 'N', '']: p = input("File %s already exists. " % file_name + "Would you like to delete it? [y/N] ") if p == 'y': os.remove(file_name) else: return else: os.remove(file_name) if not self.samples_present(): e.value_error("You need to get the strategies " + "from the data first by training the model.") # Save to file with open(file_name, 'wb') \ as data: data_dict = { 'X_train': self.X_train, 'y_train': self.y_train, 'obj_train': self.obj_train, '_problem': self._problem, 'encoding': self.encoding } # if hasattr(self, '_solver_cache'): # data_dict['_solver_cache'] = self._solver_cache # Store strategy filter if hasattr(self, '_filter'): data_dict['_filter'] = self._filter pkl.dump(data_dict, data)
def from_file(cls, file_name): """ Create optimizer from a specific compressed tar.gz file. Parameters ---------- file_name : string File name of the exported optimizer. """ # Add .tar.gz if the file has no extension if not file_name.endswith('.tar.gz'): file_name += ".tar.gz" # Check if file exists if not os.path.isfile(file_name): e.value_error("File %s does not exist." % file_name) # Extract file to temporary directory and read it with tempfile.TemporaryDirectory() as tmpdir: with tarfile.open(file_name) as tar: tar.extractall(path=tmpdir) # Load optimizer optimizer_file_name = os.path.join(tmpdir, "optimizer.pkl") if not optimizer_file_name: e.value_error("Optimizer pkl file does not exist.") with open(optimizer_file_name, "rb") as f: optimizer_dict = pkl.load(f) name = optimizer_dict.get('name', 'problem') # Create optimizer using loaded dict problem = optimizer_dict['_problem'].cvxpy_problem optimizer = cls(problem, name=name) # Assign strategies encoding optimizer.encoding = optimizer_dict['encoding'] optimizer._sampler = optimizer_dict.get('_sampler', None) # Load learner learner_name = optimizer_dict['learner_name'] learner_options = optimizer_dict['learner_options'] learner_best_params = optimizer_dict['learner_best_params'] optimizer._learner = \ LEARNER_MAP[learner_name](n_input=optimizer.n_parameters, n_classes=len(optimizer.encoding), **learner_options) optimizer._learner.best_params = learner_best_params optimizer._learner.load(os.path.join(tmpdir, "learner")) return optimizer
def assign_to_unique_strategy(strategy, unique_strategies): y = next((index for (index, s) in enumerate(unique_strategies) if strategy == s), -1) # y = -1 # n_unique_strategies = len(unique_strategies) # for j in range(n_unique_strategies): # if unique_strategies[j] == strategy: # y = j # break if y == -1: e.value_error("Strategy not found") return y
def __init__(self, cvxpy_problem, solver=stg.DEFAULT_SOLVER, verbose=False, **solver_options): """ Initialize optimization problem. Parameters ---------- problem : cvxpy.Problem CVXPY problem. solver : str, optional Solver to solve internal problem. Defaults to DEFAULT_SOLVER. solver_options : dict, optional A dict of options for the internal solver. """ # Assign solver self.solver = solver self.verbose = verbose # Define problem if not cvxpy_problem.is_dcp(): e.value_error("CVXPY Problem is not DCP") if not cvxpy_problem.is_qp(): e.value_error("MLOPT supports only MIQP-based problems " + "LP/QP/MILP/MIQP") self.cvxpy_problem = cvxpy_problem # Canonicalize problem self._canonicalize() # Check if parameters in matrices (do it only once) self._parameters_in_matrices = self.check_parameters_in_matrices() self._x = None # Raw solution # Add default solver options to solver options if solver == stg.DEFAULT_SOLVER: solver_options.update(stg.DEFAULT_SOLVER_OPTIONS) # Set options self.solver_options = solver_options
def train(self, X=None, sampling_fn=None, parallel=True, learner=stg.DEFAULT_LEARNER, filter_strategies=stg.FILTER_STRATEGIES, **learner_options): """ Train optimizer using parameter X. This function needs one argument between data points X or sampling function sampling_fn. It will raise an error otherwise because there is no way to sample data. Parameters ---------- X : pandas dataframe or numpy array, optional Data samples. Each row is a new sample points. sampling_fn : function, optional Function to sample data taking one argument being the number of data points to be sampled and returning a structure of the same type as X. parallel : bool Perform training in parallel. learner : str Learner to use. Learners are defined in :mod:`mlopt.settings` learner_options : dict, optional A dict of options for the learner. """ # Get training samples self.get_samples(X, sampling_fn, parallel=parallel, filter_strategies=filter_strategies) # Define learner if learner not in installed_learners(): e.value_error("Learner specified not installed. " "Available learners are: %s" % installed_learners()) self._learner = LEARNER_MAP[learner](n_input=n_features(self.X_train), n_classes=len(self.encoding), **learner_options) # Train learner self._learner.train(pandas2array(self.X_train), self.y_train)
def solve(self, problem_data=None, solver_data=None, strategy=None, cache=None): """Solve optimization problem. Kwargs: solver (string): Solver to use. Defaults to strategy (Strategy): Strategy to apply. Default none. cache (dict): KKT solver cache Returns: Dictionary of results """ if problem_data is None: data, inverse_data, solving_chain = self._get_problem_data() else: data, inverse_data, solving_chain = problem_data if strategy is not None: if not strategy.accepts(data): e.value_error("Strategy incompatible for current problem") if strategy is not None: strategy.apply(data, inverse_data[-1]) solving_chain = \ SolvingChain(problem=self.cvxpy_problem, reductions=solving_chain.reductions[:-1] + [KKTSolver()]) solver_options = {} else: solver_options = self.solver_options cache = self.cvxpy_problem._solver_cache raw_solution = solving_chain.solver.solve_via_data( data, warm_start=True, verbose=self.verbose, solver_opts=solver_options, solver_cache=cache) return self._parse_solution(raw_solution, data, self.cvxpy_problem, solving_chain, inverse_data)
def load_training_data(self, file_name): """ Load pickled training data from file name. Parameters ---------- file_name : string File name of the data. """ # Check if file exists if not os.path.isfile(file_name): e.value_error("File %s does not exist." % file_name) # Load optimizer with open(file_name, "rb") as f: data_dict = pkl.load(f) # Store data internally self.X_train = data_dict['X_train'] self.y_train = data_dict['y_train'] self.obj_train = data_dict['obj_train'] self._problem = data_dict['_problem'] self.encoding = data_dict['encoding'] # Set n_train in learner if self._learner is not None: self._learner.n_train = len(self.y_train) stg.logger.info("Loaded %d points with %d strategies" % (len(self.y_train), len(self.encoding))) if ('_solver_cache' in data_dict): self._solver_cache = data_dict['_solver_cache'] # Full strategies backup after filtering if ('_filter' in data_dict): self._filter = data_dict['_filter'] # Compute Good turing estimates self._sampler = Sampler(self._problem, n_samples=len(self.X_train)) self._sampler.compute_good_turing(self.y_train)
def __init__(self, **options): """ Initialize XGBoost Learner class. Parameters ---------- options : dict Learner options as a dictionary. """ if not XGBoost.is_installed(): e.value_error("XGBoost not installed") import xgboost as xgb self.xgb = xgb self.name = stg.XGBOOST self.n_input = options.pop('n_input') self.n_classes = options.pop('n_classes') self.options = {} self.options['bounds'] = options.pop('bounds', xgbs.PARAMETER_BOUNDS) not_specified_bounds = \ [x for x in xgbs.PARAMETER_BOUNDS.keys() if x not in self.options['bounds'].keys()] for p in not_specified_bounds: # Assign remaining keys self.options['bounds'][p] = xgbs.PARAMETER_BOUNDS[p] # Pick minimum between n_best and n_classes self.options['n_best'] = min(options.pop('n_best', stg.N_BEST), self.n_classes) # Pick number of hyperopt_trials self.options['n_train_trials'] = options.pop('n_train_trials', stg.N_TRAIN_TRIALS) # Mute optuna optuna.logging.set_verbosity(optuna.logging.INFO)
def choose_best(self, problem_data, labels, parallel=False, batch_size=stg.JOBLIB_BATCH_SIZE, use_cache=True): """ Choose best strategy between provided ones Parameters ---------- labels : list Strategy labels to compare. parallel : bool, optional Perform `n_best` strategies evaluation in parallel. True by default. use_cache : bool, optional Use solver cache if available. True by default. Returns ------- dict Results as a dictionary. """ n_best = self._learner.options['n_best'] # For each n_best classes get x, y, time and store the best one x = [] time = [] infeas = [] cost = [] strategies = [self.encoding[label] for label in labels] # Cache is a list of solver caches to pass cache = [None] * n_best if self._solver_cache and use_cache: cache = [self._solver_cache[label] for label in labels] n_jobs = u.get_n_processes(n_best) if parallel else 1 results = Parallel(n_jobs=n_jobs, batch_size=batch_size)( delayed(self._problem.solve)(problem_data, strategy=strategies[j], cache=cache[j]) for j in range(n_best)) x = [r["x"] for r in results] time = [r["time"] for r in results] infeas = [r["infeasibility"] for r in results] cost = [r["cost"] for r in results] # Pick best class between k ones infeas = np.array(infeas) cost = np.array(cost) idx_filter = np.where(infeas <= stg.INFEAS_TOL)[0] if len(idx_filter) > 0: # Case 1: Feasible points # -> Get solution with best cost # between feasible ones if self._problem.sense() == Minimize: idx_pick = idx_filter[np.argmin(cost[idx_filter])] elif self._problem.sense() == Maximize: idx_pick = idx_filter[np.argmax(cost[idx_filter])] else: e.value_error('Objective type not understood') else: # Case 2: No feasible points # -> Get solution with minimum infeasibility idx_pick = np.argmin(infeas) # Store values we are interested in result = {} result['x'] = x[idx_pick] result['time'] = np.sum(time) result['strategy'] = strategies[idx_pick] result['cost'] = cost[idx_pick] result['infeasibility'] = infeas[idx_pick] return result
def get_samples(self, X=None, sampling_fn=None, parallel=True, filter_strategies=stg.FILTER_STRATEGIES): """Get samples either from data or from sampling function""" # Assert we have data to train or already trained if X is None and sampling_fn is None and not self.samples_present(): e.value_error("Not enough arguments to train the model") if X is not None and sampling_fn is not None: e.value_error("You can pass only one value between X " "and sampling_fn") # Check if data is passed, otherwise train # if (X is not None) and not self.samples_present(): if X is not None: stg.logger.info("Use new data") self.X_train = X self.y_train = None self.encoding = None # Encode training strategies by solving # the problem for all the points results = self._problem.solve_parametric(X, parallel=parallel, message="Compute " + "tight constraints " + "for training set") stg.logger.info("Checking for infeasible points") not_feasible_points = {i: x for i, x in tqdm(enumerate(results)) if np.isnan(x['x']).any()} if not_feasible_points: e.value_error("Infeasible points found. Number of infeasible " "points %d" % len(not_feasible_points)) stg.logger.info("No infeasible point found.") self.obj_train = [r['cost'] for r in results] train_strategies = [r['strategy'] for r in results] # Check if the problems are solvable # for r in results: # assert r['status'] in cps.SOLUTION_PRESENT, \ # "The training points must be feasible" # Encode strategies self.y_train, self.encoding = \ encode_strategies(train_strategies) # Compute Good turing estimates self._sampler = Sampler(self._problem, n_samples=len(self.X_train)) self._sampler.compute_good_turing(self.y_train) # Condense strategies if filter_strategies: self.filter_strategies(parallel=parallel) elif sampling_fn is not None and not self.samples_present(): stg.logger.info("Use iterative sampling") # Create X_train, y_train and encoding from # sampling function self.sample(sampling_fn, parallel=parallel) # Condense strategies if filter_strategies: self.filter_strategies(parallel=parallel) # Add factorization faching if # 1. Problem is MIQP # 2. Parameters do not enter in matrices if self._problem.is_qp() and \ (self._solver_cache is None) and \ not self._problem.parameters_in_matrices: self.cache_factors()
def solver(self, s): """Set internal solver.""" if s not in INSTALLED_SOLVERS: e.value_error('Solver %s not installed.' % s) self._solver = s
def __init__(self, **options): """ Initialize OptimalTrees class. Parameters ---------- options : dict Learner options as a dictionary. """ if not OptimalTree.is_installed(): e.value_error("Interpretable AI not installed") # Import julia and IAI module from interpretableai import iai self.iai = iai from julia import Distributed self.nprocs = Distributed.nprocs # Define name self.name = stg.OPTIMAL_TREE # Assign settings self.n_input = options.pop('n_input') self.n_classes = options.pop('n_classes') self.options = {} self.options['hyperplanes'] = options.pop('hyperplanes', False) # self.options['fast_num_support_restarts'] = \ # options.pop('fast_num_support_restarts', [20]) self.options['parallel'] = options.pop('parallel_trees', True) self.options['cp'] = options.pop('cp', None) self.options['max_depth'] = options.pop('max_depth', octstg.DEFAULT_TRAINING_PARAMS['max_depth']) self.options['minbucket'] = options.pop('minbucket', octstg.DEFAULT_TRAINING_PARAMS['minbucket']) # Pick minimum between n_best and n_classes self.options['n_best'] = min(options.pop('n_best', stg.N_BEST), self.n_classes) self.options['save_svg'] = options.pop('save_svg', False) # Get fraction between training and validation self.options['frac_train'] = options.pop('frac_train', stg.FRAC_TRAIN) # Load Julia n_cpus = get_n_processes() n_cur_procs = self.nprocs() if n_cur_procs < n_cpus and self.options['parallel']: # Add processors to match number of cpus Distributed.addprocs((n_cpus - n_cur_procs)) # Assign optimaltrees options self.optimaltrees_options = {'random_seed': 1} self.optimaltrees_options['max_depth'] = self.options['max_depth'] self.optimaltrees_options['minbucket'] = self.options['minbucket'] if self.options['hyperplanes']: self.optimaltrees_options['hyperplane_config'] = \ {'sparsity': 'all'} if self.options['cp']: self.optimaltrees_options['cp'] = self.options['cp']