コード例 #1
0
    def load(self, file_name):
        # Check if file name exists
        if not os.path.isfile(file_name + ".json"):
            e.value_error("Optimal Tree json file does not exist.")

        # Load tree from file
        self._lnr = self.iai.read_json(file_name + ".json")
コード例 #2
0
    def n_strategies(self):
        """Number of strategies."""
        if self.encoding is None:
            e.value_error("Model has been trained yet to " +
                    "return the number of strategies.")

        return len(self.encoding)
コード例 #3
0
ファイル: optimizer.py プロジェクト: laf070810/mlopt
    def save(self, file_name, delete_existing=False):
        """
        Save optimizer to a specific tar.gz file.

        Parameters
        ----------
        file_name : string
            File name of the compressed optimizer.
        delete_existing : bool, optional
            Delete existing file with the same name?
            Defaults to False.
        """
        if self._learner is None:
            e.value_error("You cannot save the optimizer without " +
                          "training it before.")

        # Add .tar.gz if the file has no extension
        if not file_name.endswith('.tar.gz'):
            file_name += ".tar.gz"

        # Check if file already exists
        if os.path.isfile(file_name):
            if not delete_existing:
                p = None
                while p not in ['y', 'n', 'N', '']:
                    p = input("File %s already exists. " % file_name +
                              "Would you like to delete it? [y/N] ")
                if p == 'y':
                    os.remove(file_name)
                else:
                    return
            else:
                os.remove(file_name)

        # Create temporary directory to create the archive
        # and store relevant files
        with tempfile.TemporaryDirectory() as tmpdir:

            # Save learner
            self._learner.save(os.path.join(tmpdir, "learner"))

            # Save optimizer
            with open(os.path.join(tmpdir, "optimizer.pkl"), 'wb') \
                    as optimizer:
                file_dict = {
                    '_problem': self._problem,
                    # '_solver_cache': self._solver_cache,  # Cannot pickle
                    'learner_name': self._learner.name,
                    'learner_options': self._learner.options,
                    'learner_best_params': self._learner.best_params,
                    'encoding': self.encoding
                }
                pkl.dump(file_dict, optimizer)

            # Create archive with the files
            tar = tarfile.open(file_name, "w:gz")
            for f in glob(os.path.join(tmpdir, "*")):
                tar.add(f, os.path.basename(f))
            tar.close()
コード例 #4
0
    def load(self, file_name):
        path = file_name + ".ckpt"
        # Check if file name exists
        if not os.path.isfile(path):
            e.value_error("Pytorch checkpoint file does not exist.")

        self.model = LightningNet.load_from_checkpoint(
            path, self.best_params).to(self.device)
        self.model.eval()  # Necessary to set the model to evaluation mode
        self.model.freeze()  # Necessary to set the model to evaluation mode
コード例 #5
0
    def __init__(self, **options):
        """Initialize Pytorch Learner class.

        Parameters
        ----------
        options : dict
            Learner options as a dictionary.
        """
        if not PytorchNeuralNet.is_installed():
            e.value_error("Pytorch not installed")

        # import torch
        import torch
        self.torch = torch

        # Disable logging
        log = logging.getLogger("lightning")
        log.setLevel(logging.ERROR)

        self.name = stg.PYTORCH
        self.n_input = options.pop('n_input')
        self.n_classes = options.pop('n_classes')
        self.options = {}

        self.options['bounds'] = options.pop('bounds', pts.PARAMETER_BOUNDS)

        not_specified_bounds = \
            [x for x in pts.PARAMETER_BOUNDS.keys()
             if x not in self.options['bounds'].keys()]
        for p in not_specified_bounds:  # Assign remaining keys
            self.options['bounds'][p] = pts.PARAMETER_BOUNDS[p]

        # Pick minimum between n_best and n_classes
        self.options['n_best'] = min(options.pop('n_best', stg.N_BEST),
                                     self.n_classes)

        # Pick number of hyperopt_trials
        self.options['n_train_trials'] = options.pop('n_train_trials',
                                                     stg.N_TRAIN_TRIALS)

        # Mute optuna
        optuna.logging.set_verbosity(optuna.logging.INFO)

        # Define device
        self.use_gpu = self.torch.cuda.is_available()
        if self.use_gpu:
            self.device = self.torch.device("cuda:0")
            stg.logger.info("Using CUDA GPU %s with Pytorch" %
                            self.torch.cuda.get_device_name(self.device))
        else:
            self.device = self.torch.device("cpu")
            stg.logger.info("Using CPU with Pytorch")
コード例 #6
0
ファイル: optimizer.py プロジェクト: laf070810/mlopt
    def save_training_data(self, file_name, delete_existing=False):
        """
        Save training data to file.


        Avoids the need to recompute data.

        Parameters
        ----------
        file_name : string
            File name of the compressed optimizer.
        delete_existing : bool, optional
            Delete existing file with the same name?
            Defaults to False.
        """
        # Check if file already exists
        if os.path.isfile(file_name):
            if not delete_existing:
                p = None
                while p not in ['y', 'n', 'N', '']:
                    p = input("File %s already exists. " % file_name +
                              "Would you like to delete it? [y/N] ")
                if p == 'y':
                    os.remove(file_name)
                else:
                    return
            else:
                os.remove(file_name)

        if not self.samples_present():
            e.value_error("You need to get the strategies " +
                          "from the data first by training the model.")

        # Save to file
        with open(file_name, 'wb') \
                as data:
            data_dict = {
                'X_train': self.X_train,
                'y_train': self.y_train,
                'obj_train': self.obj_train,
                '_problem': self._problem,
                'encoding': self.encoding
            }

            # if hasattr(self, '_solver_cache'):
            #     data_dict['_solver_cache'] = self._solver_cache

            # Store strategy filter
            if hasattr(self, '_filter'):
                data_dict['_filter'] = self._filter

            pkl.dump(data_dict, data)
コード例 #7
0
    def from_file(cls, file_name):
        """
        Create optimizer from a specific compressed tar.gz file.

        Parameters
        ----------
        file_name : string
            File name of the exported optimizer.
        """

        # Add .tar.gz if the file has no extension
        if not file_name.endswith('.tar.gz'):
            file_name += ".tar.gz"

        # Check if file exists
        if not os.path.isfile(file_name):
            e.value_error("File %s does not exist." % file_name)

        # Extract file to temporary directory and read it
        with tempfile.TemporaryDirectory() as tmpdir:
            with tarfile.open(file_name) as tar:
                tar.extractall(path=tmpdir)

            # Load optimizer
            optimizer_file_name = os.path.join(tmpdir, "optimizer.pkl")
            if not optimizer_file_name:
                e.value_error("Optimizer pkl file does not exist.")
            with open(optimizer_file_name, "rb") as f:
                optimizer_dict = pkl.load(f)

            name = optimizer_dict.get('name', 'problem')

            # Create optimizer using loaded dict
            problem = optimizer_dict['_problem'].cvxpy_problem
            optimizer = cls(problem, name=name)

            # Assign strategies encoding
            optimizer.encoding = optimizer_dict['encoding']
            optimizer._sampler = optimizer_dict.get('_sampler', None)

            # Load learner
            learner_name = optimizer_dict['learner_name']
            learner_options = optimizer_dict['learner_options']
            learner_best_params = optimizer_dict['learner_best_params']
            optimizer._learner = \
                LEARNER_MAP[learner_name](n_input=optimizer.n_parameters,
                                          n_classes=len(optimizer.encoding),
                                          **learner_options)
            optimizer._learner.best_params = learner_best_params
            optimizer._learner.load(os.path.join(tmpdir, "learner"))

        return optimizer
コード例 #8
0
ファイル: strategy.py プロジェクト: jiaodaxiaozi/mlopt
def assign_to_unique_strategy(strategy, unique_strategies):
    y = next((index
              for (index, s) in enumerate(unique_strategies) if strategy == s),
             -1)
    #  y = -1
    #  n_unique_strategies = len(unique_strategies)
    #  for j in range(n_unique_strategies):
    #      if unique_strategies[j] == strategy:
    #          y = j
    #          break
    if y == -1:
        e.value_error("Strategy not found")
    return y
コード例 #9
0
    def __init__(self,
                 cvxpy_problem,
                 solver=stg.DEFAULT_SOLVER,
                 verbose=False,
                 **solver_options):
        """
        Initialize optimization problem.


        Parameters
        ----------
        problem : cvxpy.Problem
            CVXPY problem.
        solver : str, optional
            Solver to solve internal problem. Defaults to DEFAULT_SOLVER.
        solver_options : dict, optional
            A dict of options for the internal solver.
        """
        # Assign solver
        self.solver = solver
        self.verbose = verbose

        # Define problem
        if not cvxpy_problem.is_dcp():
            e.value_error("CVXPY Problem is not DCP")

        if not cvxpy_problem.is_qp():
            e.value_error("MLOPT supports only MIQP-based problems " +
                          "LP/QP/MILP/MIQP")

        self.cvxpy_problem = cvxpy_problem

        # Canonicalize problem
        self._canonicalize()

        # Check if parameters in matrices (do it only once)
        self._parameters_in_matrices = self.check_parameters_in_matrices()

        self._x = None  # Raw solution

        # Add default solver options to solver options
        if solver == stg.DEFAULT_SOLVER:
            solver_options.update(stg.DEFAULT_SOLVER_OPTIONS)

        # Set options
        self.solver_options = solver_options
コード例 #10
0
ファイル: optimizer.py プロジェクト: laf070810/mlopt
    def train(self,
              X=None,
              sampling_fn=None,
              parallel=True,
              learner=stg.DEFAULT_LEARNER,
              filter_strategies=stg.FILTER_STRATEGIES,
              **learner_options):
        """
        Train optimizer using parameter X.

        This function needs one argument between data points X
        or sampling function sampling_fn. It will raise an error
        otherwise because there is no way to sample data.

        Parameters
        ----------
        X : pandas dataframe or numpy array, optional
            Data samples. Each row is a new sample points.
        sampling_fn : function, optional
            Function to sample data taking one argument being
            the number of data points to be sampled and returning
            a structure of the same type as X.
        parallel : bool
            Perform training in parallel.
        learner : str
            Learner to use. Learners are defined in :mod:`mlopt.settings`
        learner_options : dict, optional
            A dict of options for the learner.
        """

        # Get training samples
        self.get_samples(X,
                         sampling_fn,
                         parallel=parallel,
                         filter_strategies=filter_strategies)

        # Define learner
        if learner not in installed_learners():
            e.value_error("Learner specified not installed. "
                          "Available learners are: %s" % installed_learners())
        self._learner = LEARNER_MAP[learner](n_input=n_features(self.X_train),
                                             n_classes=len(self.encoding),
                                             **learner_options)

        # Train learner
        self._learner.train(pandas2array(self.X_train), self.y_train)
コード例 #11
0
    def solve(self,
              problem_data=None,
              solver_data=None,
              strategy=None,
              cache=None):
        """Solve optimization problem.

        Kwargs:
            solver (string): Solver to use. Defaults to
            strategy (Strategy): Strategy to apply. Default none.
            cache (dict): KKT solver cache

        Returns: Dictionary of results

        """

        if problem_data is None:
            data, inverse_data, solving_chain = self._get_problem_data()
        else:
            data, inverse_data, solving_chain = problem_data

        if strategy is not None:
            if not strategy.accepts(data):
                e.value_error("Strategy incompatible for current problem")

        if strategy is not None:
            strategy.apply(data, inverse_data[-1])
            solving_chain = \
                SolvingChain(problem=self.cvxpy_problem,
                             reductions=solving_chain.reductions[:-1] +
                             [KKTSolver()])
            solver_options = {}
        else:
            solver_options = self.solver_options
            cache = self.cvxpy_problem._solver_cache

        raw_solution = solving_chain.solver.solve_via_data(
            data,
            warm_start=True,
            verbose=self.verbose,
            solver_opts=solver_options,
            solver_cache=cache)

        return self._parse_solution(raw_solution, data, self.cvxpy_problem,
                                    solving_chain, inverse_data)
コード例 #12
0
    def load_training_data(self, file_name):
        """
        Load pickled training data from file name.

        Parameters
        ----------
        file_name : string
            File name of the data.
        """

        # Check if file exists
        if not os.path.isfile(file_name):
            e.value_error("File %s does not exist." % file_name)

        # Load optimizer
        with open(file_name, "rb") as f:
            data_dict = pkl.load(f)

        # Store data internally
        self.X_train = data_dict['X_train']
        self.y_train = data_dict['y_train']
        self.obj_train = data_dict['obj_train']
        self._problem = data_dict['_problem']
        self.encoding = data_dict['encoding']

        # Set n_train in learner
        if self._learner is not None:
            self._learner.n_train = len(self.y_train)

        stg.logger.info("Loaded %d points with %d strategies" %
                        (len(self.y_train), len(self.encoding)))

        if ('_solver_cache' in data_dict):
            self._solver_cache = data_dict['_solver_cache']

        # Full strategies backup after filtering
        if ('_filter' in data_dict):
            self._filter = data_dict['_filter']

        # Compute Good turing estimates
        self._sampler = Sampler(self._problem, n_samples=len(self.X_train))
        self._sampler.compute_good_turing(self.y_train)
コード例 #13
0
ファイル: xgboost.py プロジェクト: jiaodaxiaozi/mlopt
    def __init__(self, **options):
        """
        Initialize XGBoost Learner class.

        Parameters
        ----------
        options : dict
            Learner options as a dictionary.
        """
        if not XGBoost.is_installed():
            e.value_error("XGBoost not installed")

        import xgboost as xgb
        self.xgb = xgb

        self.name = stg.XGBOOST
        self.n_input = options.pop('n_input')
        self.n_classes = options.pop('n_classes')
        self.options = {}

        self.options['bounds'] = options.pop('bounds', xgbs.PARAMETER_BOUNDS)

        not_specified_bounds = \
            [x for x in xgbs.PARAMETER_BOUNDS.keys()
             if x not in self.options['bounds'].keys()]
        for p in not_specified_bounds:  # Assign remaining keys
            self.options['bounds'][p] = xgbs.PARAMETER_BOUNDS[p]

        # Pick minimum between n_best and n_classes
        self.options['n_best'] = min(options.pop('n_best', stg.N_BEST),
                                     self.n_classes)

        # Pick number of hyperopt_trials
        self.options['n_train_trials'] = options.pop('n_train_trials',
                                                     stg.N_TRAIN_TRIALS)

        # Mute optuna
        optuna.logging.set_verbosity(optuna.logging.INFO)
コード例 #14
0
    def choose_best(self, problem_data, labels, parallel=False,
                    batch_size=stg.JOBLIB_BATCH_SIZE, use_cache=True):
        """
        Choose best strategy between provided ones

        Parameters
        ----------
        labels : list
            Strategy labels to compare.
        parallel : bool, optional
            Perform `n_best` strategies evaluation in parallel.
            True by default.
        use_cache : bool, optional
            Use solver cache if available. True by default.

        Returns
        -------
        dict
            Results as a dictionary.
        """
        n_best = self._learner.options['n_best']

        # For each n_best classes get x, y, time and store the best one
        x = []
        time = []
        infeas = []
        cost = []

        strategies = [self.encoding[label] for label in labels]

        # Cache is a list of solver caches to pass
        cache = [None] * n_best
        if self._solver_cache and use_cache:
            cache = [self._solver_cache[label] for label in labels]

        n_jobs = u.get_n_processes(n_best) if parallel else 1

        results = Parallel(n_jobs=n_jobs, batch_size=batch_size)(
            delayed(self._problem.solve)(problem_data,
                                         strategy=strategies[j],
                                         cache=cache[j])
            for j in range(n_best))

        x = [r["x"] for r in results]
        time = [r["time"] for r in results]
        infeas = [r["infeasibility"] for r in results]
        cost = [r["cost"] for r in results]

        # Pick best class between k ones
        infeas = np.array(infeas)
        cost = np.array(cost)
        idx_filter = np.where(infeas <= stg.INFEAS_TOL)[0]
        if len(idx_filter) > 0:
            # Case 1: Feasible points
            # -> Get solution with best cost
            #    between feasible ones
            if self._problem.sense() == Minimize:
                idx_pick = idx_filter[np.argmin(cost[idx_filter])]
            elif self._problem.sense() == Maximize:
                idx_pick = idx_filter[np.argmax(cost[idx_filter])]
            else:
                e.value_error('Objective type not understood')
        else:
            # Case 2: No feasible points
            # -> Get solution with minimum infeasibility
            idx_pick = np.argmin(infeas)

        # Store values we are interested in
        result = {}
        result['x'] = x[idx_pick]
        result['time'] = np.sum(time)
        result['strategy'] = strategies[idx_pick]
        result['cost'] = cost[idx_pick]
        result['infeasibility'] = infeas[idx_pick]

        return result
コード例 #15
0
    def get_samples(self, X=None, sampling_fn=None,
                    parallel=True,
                    filter_strategies=stg.FILTER_STRATEGIES):
        """Get samples either from data or from sampling function"""

        # Assert we have data to train or already trained
        if X is None and sampling_fn is None and not self.samples_present():
            e.value_error("Not enough arguments to train the model")

        if X is not None and sampling_fn is not None:
            e.value_error("You can pass only one value between X "
                          "and sampling_fn")

        # Check if data is passed, otherwise train
        #  if (X is not None) and not self.samples_present():
        if X is not None:
            stg.logger.info("Use new data")
            self.X_train = X
            self.y_train = None
            self.encoding = None

            # Encode training strategies by solving
            # the problem for all the points
            results = self._problem.solve_parametric(X,
                                                     parallel=parallel,
                                                     message="Compute " +
                                                     "tight constraints " +
                                                     "for training set")

            stg.logger.info("Checking for infeasible points")
            not_feasible_points = {i: x for i, x in tqdm(enumerate(results))
                                   if np.isnan(x['x']).any()}
            if not_feasible_points:
                e.value_error("Infeasible points found. Number of infeasible "
                              "points %d" % len(not_feasible_points))
            stg.logger.info("No infeasible point found.")

            self.obj_train = [r['cost'] for r in results]
            train_strategies = [r['strategy'] for r in results]

            # Check if the problems are solvable
            #  for r in results:
            #      assert r['status'] in cps.SOLUTION_PRESENT, \
            #          "The training points must be feasible"

            # Encode strategies
            self.y_train, self.encoding = \
                encode_strategies(train_strategies)

            # Compute Good turing estimates
            self._sampler = Sampler(self._problem, n_samples=len(self.X_train))
            self._sampler.compute_good_turing(self.y_train)

            # Condense strategies
            if filter_strategies:
                self.filter_strategies(parallel=parallel)

        elif sampling_fn is not None and not self.samples_present():
            stg.logger.info("Use iterative sampling")
            # Create X_train, y_train and encoding from
            # sampling function
            self.sample(sampling_fn, parallel=parallel)

            # Condense strategies
            if filter_strategies:
                self.filter_strategies(parallel=parallel)

        # Add factorization faching if
        # 1. Problem is MIQP
        # 2. Parameters do not enter in matrices
        if self._problem.is_qp() and \
                (self._solver_cache is None) and \
                not self._problem.parameters_in_matrices:
            self.cache_factors()
コード例 #16
0
 def solver(self, s):
     """Set internal solver."""
     if s not in INSTALLED_SOLVERS:
         e.value_error('Solver %s not installed.' % s)
     self._solver = s
コード例 #17
0
    def __init__(self,
                 **options):
        """
        Initialize OptimalTrees class.

        Parameters
        ----------
        options : dict
            Learner options as a dictionary.
        """
        if not OptimalTree.is_installed():
            e.value_error("Interpretable AI not installed")

        # Import julia and IAI module
        from interpretableai import iai
        self.iai = iai
        from julia import Distributed
        self.nprocs = Distributed.nprocs

        # Define name
        self.name = stg.OPTIMAL_TREE

        # Assign settings
        self.n_input = options.pop('n_input')
        self.n_classes = options.pop('n_classes')
        self.options = {}
        self.options['hyperplanes'] = options.pop('hyperplanes', False)
        #  self.options['fast_num_support_restarts'] = \
        #      options.pop('fast_num_support_restarts', [20])
        self.options['parallel'] = options.pop('parallel_trees', True)
        self.options['cp'] = options.pop('cp', None)
        self.options['max_depth'] = options.pop('max_depth',
                octstg.DEFAULT_TRAINING_PARAMS['max_depth'])
        self.options['minbucket'] = options.pop('minbucket',
                octstg.DEFAULT_TRAINING_PARAMS['minbucket'])
        # Pick minimum between n_best and n_classes
        self.options['n_best'] = min(options.pop('n_best', stg.N_BEST),
                                     self.n_classes)
        self.options['save_svg'] = options.pop('save_svg', False)

        # Get fraction between training and validation
        self.options['frac_train'] = options.pop('frac_train', stg.FRAC_TRAIN)

        # Load Julia
        n_cpus = get_n_processes()

        n_cur_procs = self.nprocs()
        if n_cur_procs < n_cpus and self.options['parallel']:
            # Add processors to match number of cpus
            Distributed.addprocs((n_cpus - n_cur_procs))

        # Assign optimaltrees options
        self.optimaltrees_options = {'random_seed': 1}
        self.optimaltrees_options['max_depth'] = self.options['max_depth']
        self.optimaltrees_options['minbucket'] = self.options['minbucket']
        if self.options['hyperplanes']:
            self.optimaltrees_options['hyperplane_config'] = \
                {'sparsity': 'all'}

        if self.options['cp']:
            self.optimaltrees_options['cp'] = self.options['cp']