class Gradient_Descent_Algo:
    def __init__(self, data, settings=None):
        self.data = data
        self.settings = settings

        self._create_descent_settings()
        self._create_problem_wrapper()
        self._create_lambda_configs()

    def run(self, initial_lambda_set, debug=True, log_file=None):
        self.log_file = log_file
        start_time = time.time()

        self.fmodel = Fitted_Model(initial_lambda_set[0].size)
        best_cost = None
        best_initial_lambdas = None
        for initial_lambdas in initial_lambda_set:
            self.log("%s: initial_lambdas %s" %
                     (self.method_label, initial_lambdas))
            self._run_lambdas(initial_lambdas, debug=debug)
            if best_cost is None or best_cost > self.fmodel.best_cost:
                best_cost = self.fmodel.best_cost
                best_initial_lambdas = initial_lambdas
            self.log("%s: best start lambda %s" %
                     (self.method_label, best_initial_lambdas))

        runtime = time.time() - start_time
        self.log("%s: runtime %s" % (self.method_label, runtime))
        self.fmodel.set_runtime(runtime)

    def _run_lambdas(self, initial_lambdas, debug=True):
        start_history_idx = len(self.fmodel.cost_history)
        # warm up the problem
        self._solve_wrapper(initial_lambdas, quick_run=True)
        # do a real run now
        model_params = self._solve_wrapper(initial_lambdas, quick_run=False)

        # Check that no model params are None
        if self._any_model_params_none(model_params):
            self.log("ERROR: No model params fit for initial lambda values")
            self.fmodel.update(initial_lambdas, None, None)
            return

        current_cost = self.get_validate_cost(model_params)
        self.fmodel.update(initial_lambdas, model_params, current_cost)
        self.log("self.fmodel.current_cost %f" % self.fmodel.current_cost)
        self._print_model_details()
        step_size = self.step_size_init
        for i in range(0, self.num_iters):
            lambda_derivatives = self._get_lambda_derivatives_wrapper()

            potential_lambdas, potential_model_params, potential_cost = self._run_potential_lambdas(
                step_size, lambda_derivatives, quick_run=True)

            self.log(
                "potential_lambdas %s, potential_cost %s, curr cost %s" %
                (potential_lambdas, potential_cost, self.fmodel.current_cost))
            while self._check_should_backtrack(
                    potential_cost, step_size,
                    lambda_derivatives) and step_size > self.step_size_min:
                if potential_cost is None:  # If can't find a solution, shrink faster
                    step_size *= self.shrink_factor**3
                else:
                    step_size *= self.shrink_factor
                potential_lambdas, potential_model_params, potential_cost = self._run_potential_lambdas(
                    step_size, lambda_derivatives, quick_run=True)
                if potential_cost is not None:
                    self.log(
                        "(shrinking) potential_lambdas %s, cost %f, step, %f" %
                        (potential_lambdas, potential_cost, step_size))
                else:
                    self.log("(shrinking) potential_lambdas None!")

            if self.fmodel.current_cost < potential_cost:
                self.log("COST IS INCREASING! %f" % potential_cost)
                break
            else:
                potential_lambdas, potential_model_params, potential_cost = self._run_potential_lambdas(
                    step_size, lambda_derivatives, quick_run=False)

                self.fmodel.update(potential_lambdas, potential_model_params,
                                   potential_cost)

                self.log("%s iter: %d step_size %f" %
                         (self.method_label, i, step_size))
                self.log("current model %s" % self.fmodel)
                self.log("cost_history %s" %
                         self.fmodel.cost_history[start_history_idx:])
                self.log("current test cost %s" %
                         self.get_test_cost(self.fmodel.best_model_params))

                self._print_model_details()

                if self.fmodel.get_cost_diff() < self.decr_enough_threshold:
                    self.log("decrease amount too small %f" %
                             self.fmodel.get_cost_diff())
                    break

            if step_size < self.step_size_min:
                self.log("STEP SIZE TOO SMALL %f" % step_size)
                break

            sys.stdout.flush()

        self.log("TOTAL ITERS %d" % i)
        self.log("full_cost_hist: %s" %
                 self.fmodel.cost_history[start_history_idx:])
        self.log("current_test_cost: %s" %
                 self.get_test_cost(self.fmodel.best_model_params))

    def get_test_cost(self, model):
        return None

    def _print_model_details(self):
        # fill in if you want to print more things
        return

    def _check_should_backtrack(self, potential_cost, step_size,
                                lambda_derivatives):
        if potential_cost is None:
            return True
        backtrack_thres_raw = self.fmodel.current_cost - self.backtrack_alpha * step_size * np.linalg.norm(
            lambda_derivatives)**2
        backtrack_thres = self.fmodel.current_cost if backtrack_thres_raw < 0 else backtrack_thres_raw
        return potential_cost > backtrack_thres

    def _run_potential_lambdas(self,
                               step_size,
                               lambda_derivatives,
                               quick_run=False):
        potential_lambdas = self._get_updated_lambdas(step_size,
                                                      lambda_derivatives)
        try:
            potential_model_params = self._solve_wrapper(potential_lambdas,
                                                         quick_run=quick_run)
        except cvxpy.error.SolverError:
            potential_model_params = None

        if self._any_model_params_none(potential_model_params):
            potential_cost = None
        else:
            potential_cost = self.get_validate_cost(potential_model_params)
        return potential_lambdas, potential_model_params, potential_cost

    def _solve_wrapper(self, lambdas, quick_run):
        start_solve_time = time.time()
        model_params = self.problem_wrapper.solve(lambdas, quick_run=quick_run)
        if quick_run is False:
            self.fmodel.incr_num_solves()
        self.log("solve runtime %f" % (time.time() - start_solve_time))
        return model_params

    def _get_lambda_derivatives_wrapper(self):
        start_solve_time = time.time()
        lambda_derivatives = self._get_lambda_derivatives()
        self.log("lambda_derivatives runtime %f" %
                 (time.time() - start_solve_time))
        self.log("lambda_derivatives %s" % lambda_derivatives)
        return lambda_derivatives

    def _get_updated_lambdas(self, method_step_size, lambda_derivatives):
        current_lambdas = self.fmodel.current_lambdas
        new_step_size = method_step_size
        if self.use_boundary:
            potential_lambdas = current_lambdas - method_step_size * lambda_derivatives

            for idx in range(0, current_lambdas.size):
                if current_lambdas[idx] > self.lambda_mins[
                        idx] and potential_lambdas[idx] < self.lambda_mins[idx]:
                    smaller_step_size = self.boundary_factor * (
                        current_lambdas[idx] -
                        self.lambda_mins[idx]) / lambda_derivatives[idx]
                    new_step_size = min(new_step_size, smaller_step_size)
                    self.log("USING THE BOUNDARY %f" % new_step_size)

        return np.maximum(current_lambdas - new_step_size * lambda_derivatives,
                          self.lambda_mins)

    def log(self, log_str):
        if self.log_file is None:
            print log_str
        else:
            self.log_file.write("%s\n" % log_str)
            self.log_file.flush()

    @staticmethod
    def _any_model_params_none(model_params):
        if model_params is None:
            return True
        else:
            return any([m is None for m in model_params])
Exemple #2
0
class Gradient_Descent_Algo:
    def __init__(self, data, settings=None):
        self.data = data
        self.settings = settings

        self._create_descent_settings()
        self._create_problem_wrapper()
        self._create_lambda_configs()

    def run(self, initial_lambda_set, debug=True, log_file=None):
        self.log_file = log_file
        start_time = time.time()

        self.fmodel = Fitted_Model(initial_lambda_set[0].size)
        best_cost = None
        best_initial_lambdas = None
        for initial_lambdas in initial_lambda_set:
            self.log("%s: initial_lambdas %s" %
                     (self.method_label, initial_lambdas))
            self._run_lambdas(
                initial_lambdas, debug=debug
            )  #, max_cost_at_iter=best_cost, check_iter=self.check_iter)
            if best_cost is None or best_cost > self.fmodel.best_cost:
                best_cost = self.fmodel.best_cost
                best_initial_lambdas = initial_lambdas
            self.log("%s: best start lambda %s" %
                     (self.method_label, best_initial_lambdas))

        runtime = time.time() - start_time
        self.log("%s: runtime %s" % (self.method_label, runtime))
        self.fmodel.set_runtime(runtime)
        # self.fmodel.set_num_solves(len(self.fmodel.cost_history))

    def _check_optimality_conditions(self, model_params, lambdas, thres=1):
        return

    def _run_lambdas(self,
                     initial_lambdas,
                     debug=True):  #, max_cost_at_iter=None, check_iter=None):
        start_history_idx = len(self.fmodel.cost_history)
        # warm up the problem
        self._solve_wrapper(initial_lambdas, quick_run=True)
        # do a real run now
        model_params = self._solve_wrapper(initial_lambdas, quick_run=False)
        if debug:
            self._check_optimality_conditions(model_params, initial_lambdas)

        # Check that no model params are None
        if self._any_model_params_none(model_params):
            self.log("ERROR: No model params fit for initial lambda values")
            self.fmodel.update(initial_lambdas, None, None)
            return

        current_cost = self.get_validate_cost(model_params)
        self.fmodel.update(initial_lambdas, model_params, current_cost)
        self.log("self.fmodel.current_cost %f" % self.fmodel.current_cost)
        self._print_model_details()
        step_size = self.step_size_init
        for i in range(0, self.num_iters):
            lambda_derivatives = self._get_lambda_derivatives_wrapper()

            if debug:
                self._double_check_derivative(lambda_derivatives)
                1 / 0

            potential_lambdas, potential_model_params, potential_cost = self._run_potential_lambdas(
                step_size, lambda_derivatives, quick_run=True)

            self.log(
                "potential_lambdas %s, potential_cost %s, curr cost %s" %
                (potential_lambdas, potential_cost, self.fmodel.current_cost))
            while self._check_should_backtrack(
                    potential_cost, step_size,
                    lambda_derivatives) and step_size > self.step_size_min:
                if potential_cost is None:  # Then cvxpy couldn't find a solution. Shrink faster
                    step_size *= self.shrink_factor**3
                else:
                    step_size *= self.shrink_factor
                potential_lambdas, potential_model_params, potential_cost = self._run_potential_lambdas(
                    step_size, lambda_derivatives, quick_run=True)
                if potential_cost is not None:
                    self.log(
                        "(shrinking) potential_lambdas %s, cost %f, step, %f" %
                        (potential_lambdas, potential_cost, step_size))
                else:
                    self.log("(shrinking) potential_lambdas None!")

            if self.fmodel.current_cost < potential_cost:
                self.log("COST IS INCREASING! %f" % potential_cost)
                break
            else:
                # Note to self: it is possible that solving to a lower accuracy results in lower validation loss
                # but higher accuracy gives higher validation loss.
                potential_lambdas, potential_model_params, potential_cost = self._run_potential_lambdas(
                    step_size, lambda_derivatives, quick_run=False)
                if debug:
                    self._check_optimality_conditions(potential_model_params,
                                                      potential_lambdas)

                self.fmodel.update(potential_lambdas, potential_model_params,
                                   potential_cost)

                self.log("%s iter: %d step_size %f" %
                         (self.method_label, i, step_size))
                self.log("current model %s" % self.fmodel)
                self.log("cost_history %s" %
                         self.fmodel.cost_history[start_history_idx:])
                self.log("current test cost %s" %
                         self.get_test_cost(self.fmodel.best_model_params))

                self._print_model_details()

                if self.fmodel.get_cost_diff() < self.decr_enough_threshold:
                    self.log("decrease amount too small %f" %
                             self.fmodel.get_cost_diff())
                    break

            if step_size < self.step_size_min:
                self.log("STEP SIZE TOO SMALL %f" % step_size)
                break

            # if check_iter is not None and max_cost_at_iter is not None and check_iter == i and max_cost_at_iter < potential_cost:
            #     self.log("Cost %f higher than threshold %f" % (potential_cost, max_cost_at_iter))
            #     break

            sys.stdout.flush()

        self.log("TOTAL ITERS %d" % i)
        self.log("full_cost_hist: %s" %
                 self.fmodel.cost_history[start_history_idx:])
        self.log("current_test_cost: %s" %
                 self.get_test_cost(self.fmodel.best_model_params))

    def get_test_cost(self, model):
        return None

    def _print_model_details(self):
        # fill in if you want to print more things
        return

    def _check_should_backtrack(self, potential_cost, step_size,
                                lambda_derivatives):
        if potential_cost is None:
            return True
        backtrack_thres_raw = self.fmodel.current_cost - self.backtrack_alpha * step_size * np.linalg.norm(
            lambda_derivatives)**2
        backtrack_thres = self.fmodel.current_cost if backtrack_thres_raw < 0 else backtrack_thres_raw
        return potential_cost > backtrack_thres

    def _run_potential_lambdas(self,
                               step_size,
                               lambda_derivatives,
                               quick_run=False):
        potential_lambdas = self._get_updated_lambdas(step_size,
                                                      lambda_derivatives)
        try:
            potential_model_params = self._solve_wrapper(potential_lambdas,
                                                         quick_run=quick_run)
        except cvxpy.error.SolverError:
            potential_model_params = None

        if self._any_model_params_none(potential_model_params):
            potential_cost = None
        else:
            potential_cost = self.get_validate_cost(potential_model_params)
        return potential_lambdas, potential_model_params, potential_cost

    def _solve_wrapper(self, lambdas, quick_run):
        start_solve_time = time.time()
        model_params = self.problem_wrapper.solve(lambdas, quick_run=quick_run)
        if quick_run is False:
            self.fmodel.incr_num_solves()
        self.log("CVX runtime %f" % (time.time() - start_solve_time))
        return model_params

    def _get_lambda_derivatives_wrapper(self):
        start_solve_time = time.time()
        lambda_derivatives = self._get_lambda_derivatives()
        self.log("lambda_derivatives runtime %f" %
                 (time.time() - start_solve_time))
        self.log("lambda_derivatives %s" % lambda_derivatives)
        return lambda_derivatives

    def _get_updated_lambdas(self, method_step_size, lambda_derivatives):
        current_lambdas = self.fmodel.current_lambdas
        new_step_size = method_step_size
        if self.use_boundary:
            potential_lambdas = current_lambdas - method_step_size * lambda_derivatives

            for idx in range(0, current_lambdas.size):
                if current_lambdas[idx] > self.lambda_mins[
                        idx] and potential_lambdas[idx] < self.lambda_mins[idx]:
                    smaller_step_size = self.boundary_factor * (
                        current_lambdas[idx] -
                        self.lambda_mins[idx]) / lambda_derivatives[idx]
                    new_step_size = min(new_step_size, smaller_step_size)
                    self.log("USING THE BOUNDARY %f" % new_step_size)

        return np.maximum(current_lambdas - new_step_size * lambda_derivatives,
                          self.lambda_mins)

    def _double_check_derivative_indepth(self, i, model1, model2, model0, eps):
        # override this function if you want to do more detailed checking of the derivatives
        # useful when you want to also double check the derivatives of the model parameters
        # wrt to lambda penalty parameters
        return

    def _double_check_derivative(self,
                                 calculated_derivative,
                                 accept_diff=1e-1,
                                 epsilon=1e-6):
        num_lambdas = len(self.fmodel.current_lambdas)
        print "self.fmodel.current_lambdas", self.fmodel.current_lambdas
        numerical_derivs = []
        for i in range(num_lambdas):
            print "===========CHECK I= %d ===============" % i
            # don't allow the discrete derivative perturb too much if the lambda value is low already
            eps = min(epsilon, self.fmodel.current_lambdas[i] / 100)
            reg1 = np.copy(self.fmodel.current_lambdas)
            reg1[i] += eps
            model1 = self.problem_wrapper.solve(np.array(reg1),
                                                quick_run=False,
                                                warm_start=False)

            reg2 = np.copy(self.fmodel.current_lambdas)
            reg2[i] -= eps
            model2 = self.problem_wrapper.solve(np.array(reg2),
                                                quick_run=False,
                                                warm_start=False)

            # Calculate derivative of validation error
            error1 = self.get_validate_cost(model1)
            error2 = self.get_validate_cost(model2)
            i_deriv = (error1 - error2) / (eps * 2)
            numerical_derivs.append(i_deriv)

            print "********** jean calc derivative[i]", calculated_derivative[
                i]
            print "********** i_deriv", i_deriv
            print "np.abs(calculated_derivative[i] - i_deriv)", np.abs(
                calculated_derivative[i] - i_deriv)
            relative_ok = np.abs(
                (calculated_derivative[i] - i_deriv) / i_deriv) < accept_diff
            absolute_ok = np.abs(calculated_derivative[i] -
                                 i_deriv) < accept_diff

            # model0 = self.problem_wrapper.solve(self.fmodel.current_lambdas, quick_run=False, warm_start=False)
            # error0 = self.get_validate_cost(model0)
            # self._double_check_derivative_indepth(i, model1, model2, model0, eps)
            # self._double_check_derivative_indepth(i, model1, model2, None, eps)
            # assert(relative_ok or absolute_ok)
        print "calculated_derivative", calculated_derivative
        print "numerical_derivs", numerical_derivs

    def log(self, log_str):
        if self.log_file is None:
            print log_str
        else:
            self.log_file.write("%s\n" % log_str)
            self.log_file.flush()

    @staticmethod
    def _any_model_params_none(model_params):
        if model_params is None:
            return True
        else:
            return any([m is None for m in model_params])