def fit_l1_slsqp(f, score, start_params, args, kwargs, disp=False, maxiter=1000, callback=None, retall=False, full_output=False, hess=None): """ Solve the l1 regularized problem using scipy.optimize.fmin_slsqp(). Specifically: We convert the convex but non-smooth problem .. math:: \\min_\\beta f(\\beta) + \\sum_k\\alpha_k |\\beta_k| via the transformation to the smooth, convex, constrained problem in twice as many variables (adding the "added variables" :math:`u_k`) .. math:: \\min_{\\beta,u} f(\\beta) + \\sum_k\\alpha_k u_k, subject to .. math:: -u_k \\leq \\beta_k \\leq u_k. Parameters ---------- All the usual parameters from LikelhoodModel.fit alpha : non-negative scalar or numpy array (same size as parameters) The weight multiplying the l1 penalty term trim_mode : 'auto, 'size', or 'off' If not 'off', trim (set to zero) parameters that would have been zero if the solver reached the theoretical minimum. If 'auto', trim params using the Theory above. If 'size', trim params if they have very small absolute value size_trim_tol : float or 'auto' (default = 'auto') For use when trim_mode === 'size' auto_trim_tol : float For sue when trim_mode == 'auto'. Use qc_tol : float Print warning and don't allow auto trim when (ii) in "Theory" (above) is violated by this much. qc_verbose : Boolean If true, print out a full QC report upon failure acc : float (default 1e-6) Requested accuracy as used by slsqp """ start_params = np.array(start_params).ravel('F') ### Extract values # k_params is total number of covariates, # possibly including a leading constant. k_params = len(start_params) # The start point x0 = np.append(start_params, np.fabs(start_params)) # alpha is the regularization parameter alpha = np.array(kwargs['alpha_rescaled']).ravel('F') # Make sure it's a vector alpha = alpha * np.ones(k_params) assert alpha.min() >= 0 # Convert display parameters to scipy.optimize form disp_slsqp = _get_disp_slsqp(disp, retall) # Set/retrieve the desired accuracy acc = kwargs.setdefault('acc', 1e-10) ### Wrap up for use in fmin_slsqp func = lambda x_full: _objective_func(f, x_full, k_params, alpha, *args) f_ieqcons_wrap = lambda x_full: _f_ieqcons(x_full, k_params) fprime_wrap = lambda x_full: _fprime(score, x_full, k_params, alpha) fprime_ieqcons_wrap = lambda x_full: _fprime_ieqcons(x_full, k_params) ### Call the solver results = fmin_slsqp(func, x0, f_ieqcons=f_ieqcons_wrap, fprime=fprime_wrap, acc=acc, iter=maxiter, disp=disp_slsqp, full_output=full_output, fprime_ieqcons=fprime_ieqcons_wrap) params = np.asarray(results[0][:k_params]) ### Post-process # QC qc_tol = kwargs['qc_tol'] qc_verbose = kwargs['qc_verbose'] passed = l1_solvers_common.qc_results(params, alpha, score, qc_tol, qc_verbose) # Possibly trim trim_mode = kwargs['trim_mode'] size_trim_tol = kwargs['size_trim_tol'] auto_trim_tol = kwargs['auto_trim_tol'] params, trimmed = l1_solvers_common.do_trim_params(params, k_params, alpha, score, passed, trim_mode, size_trim_tol, auto_trim_tol) ### Pack up return values for statsmodels optimizers # TODO These retvals are returned as mle_retvals...but the fit wasn't ML. # This could be confusing someday. if full_output: x_full, fx, its, imode, smode = results fopt = func(np.asarray(x_full)) converged = (imode == 0) warnflag = str(imode) + ' ' + smode iterations = its gopt = float('nan') # Objective is non-differentiable hopt = float('nan') retvals = { 'fopt': fopt, 'converged': converged, 'iterations': iterations, 'gopt': gopt, 'hopt': hopt, 'trimmed': trimmed, 'warnflag': warnflag } ### Return if full_output: return params, retvals else: return params
def fit_l1_slsqp( f, score, start_params, args, kwargs, disp=False, maxiter=1000, callback=None, retall=False, full_output=False, hess=None): """ Solve the l1 regularized problem using scipy.optimize.fmin_slsqp(). Specifically: We convert the convex but non-smooth problem .. math:: \\min_\\beta f(\\beta) + \\sum_k\\alpha_k |\\beta_k| via the transformation to the smooth, convex, constrained problem in twice as many variables (adding the "added variables" :math:`u_k`) .. math:: \\min_{\\beta,u} f(\\beta) + \\sum_k\\alpha_k u_k, subject to .. math:: -u_k \\leq \\beta_k \\leq u_k. Parameters ---------- All the usual parameters from LikelhoodModel.fit alpha : non-negative scalar or numpy array (same size as parameters) The weight multiplying the l1 penalty term trim_mode : 'auto, 'size', or 'off' If not 'off', trim (set to zero) parameters that would have been zero if the solver reached the theoretical minimum. If 'auto', trim params using the Theory above. If 'size', trim params if they have very small absolute value size_trim_tol : float or 'auto' (default = 'auto') For use when trim_mode === 'size' auto_trim_tol : float For sue when trim_mode == 'auto'. Use qc_tol : float Print warning and don't allow auto trim when (ii) in "Theory" (above) is violated by this much. qc_verbose : Boolean If true, print out a full QC report upon failure acc : float (default 1e-6) Requested accuracy as used by slsqp """ start_params = np.array(start_params).ravel('F') ### Extract values # k_params is total number of covariates, # possibly including a leading constant. k_params = len(start_params) # The start point x0 = np.append(start_params, np.fabs(start_params)) # alpha is the regularization parameter alpha = np.array(kwargs['alpha_rescaled']).ravel('F') # Make sure it's a vector alpha = alpha * np.ones(k_params) assert alpha.min() >= 0 # Convert display parameters to scipy.optimize form disp_slsqp = _get_disp_slsqp(disp, retall) # Set/retrieve the desired accuracy acc = kwargs.setdefault('acc', 1e-10) ### Wrap up for use in fmin_slsqp func = lambda x_full: _objective_func(f, x_full, k_params, alpha, *args) f_ieqcons_wrap = lambda x_full: _f_ieqcons(x_full, k_params) fprime_wrap = lambda x_full: _fprime(score, x_full, k_params, alpha) fprime_ieqcons_wrap = lambda x_full: _fprime_ieqcons(x_full, k_params) ### Call the solver results = fmin_slsqp( func, x0, f_ieqcons=f_ieqcons_wrap, fprime=fprime_wrap, acc=acc, iter=maxiter, disp=disp_slsqp, full_output=full_output, fprime_ieqcons=fprime_ieqcons_wrap) params = np.asarray(results[0][:k_params]) ### Post-process # QC qc_tol = kwargs['qc_tol'] qc_verbose = kwargs['qc_verbose'] passed = l1_solvers_common.qc_results( params, alpha, score, qc_tol, qc_verbose) # Possibly trim trim_mode = kwargs['trim_mode'] size_trim_tol = kwargs['size_trim_tol'] auto_trim_tol = kwargs['auto_trim_tol'] params, trimmed = l1_solvers_common.do_trim_params( params, k_params, alpha, score, passed, trim_mode, size_trim_tol, auto_trim_tol) ### Pack up return values for statsmodels optimizers # TODO These retvals are returned as mle_retvals...but the fit wasn't ML. # This could be confusing someday. if full_output: x_full, fx, its, imode, smode = results fopt = func(np.asarray(x_full)) converged = 'True' if imode == 0 else smode iterations = its gopt = float('nan') # Objective is non-differentiable hopt = float('nan') retvals = { 'fopt': fopt, 'converged': converged, 'iterations': iterations, 'gopt': gopt, 'hopt': hopt, 'trimmed': trimmed} ### Return if full_output: return params, retvals else: return params
def fit_l1_cvxopt_cp( f, score, start_params, args, kwargs, disp=False, maxiter=100, callback=None, retall=False, full_output=False, hess=None): """ Solve the l1 regularized problem using cvxopt.solvers.cp Specifically: We convert the convex but non-smooth problem .. math:: \\min_\\beta f(\\beta) + \\sum_k\\alpha_k |\\beta_k| via the transformation to the smooth, convex, constrained problem in twice as many variables (adding the "added variables" :math:`u_k`) .. math:: \\min_{\\beta,u} f(\\beta) + \\sum_k\\alpha_k u_k, subject to .. math:: -u_k \\leq \\beta_k \\leq u_k. Parameters ---------- All the usual parameters from LikelhoodModel.fit alpha : non-negative scalar or numpy array (same size as parameters) The weight multiplying the l1 penalty term trim_mode : 'auto, 'size', or 'off' If not 'off', trim (set to zero) parameters that would have been zero if the solver reached the theoretical minimum. If 'auto', trim params using the Theory above. If 'size', trim params if they have very small absolute value size_trim_tol : float or 'auto' (default = 'auto') For use when trim_mode === 'size' auto_trim_tol : float For sue when trim_mode == 'auto'. Use qc_tol : float Print warning and don't allow auto trim when (ii) in "Theory" (above) is violated by this much. qc_verbose : Boolean If true, print out a full QC report upon failure abstol : float absolute accuracy (default: 1e-7). reltol : float relative accuracy (default: 1e-6). feastol : float tolerance for feasibility conditions (default: 1e-7). refinement : int number of iterative refinement steps when solving KKT equations (default: 1). """ start_params = np.array(start_params).ravel('F') ## Extract arguments # k_params is total number of covariates, possibly including a leading constant. k_params = len(start_params) # The start point x0 = np.append(start_params, np.fabs(start_params)) x0 = matrix(x0, (2 * k_params, 1)) # The regularization parameter alpha = np.array(kwargs['alpha_rescaled']).ravel('F') # Make sure it's a vector alpha = alpha * np.ones(k_params) assert alpha.min() >= 0 ## Wrap up functions for cvxopt f_0 = lambda x: _objective_func(f, x, k_params, alpha, *args) Df = lambda x: _fprime(score, x, k_params, alpha) G = _get_G(k_params) # Inequality constraint matrix, Gx \leq h h = matrix(0.0, (2 * k_params, 1)) # RHS in inequality constraint H = lambda x, z: _hessian_wrapper(hess, x, z, k_params) ## Define the optimization function def F(x=None, z=None): if x is None: return 0, x0 elif z is None: return f_0(x), Df(x) else: return f_0(x), Df(x), H(x, z) ## Convert optimization settings to cvxopt form solvers.options['show_progress'] = disp solvers.options['maxiters'] = maxiter if 'abstol' in kwargs: solvers.options['abstol'] = kwargs['abstol'] if 'reltol' in kwargs: solvers.options['reltol'] = kwargs['reltol'] if 'feastol' in kwargs: solvers.options['feastol'] = kwargs['feastol'] if 'refinement' in kwargs: solvers.options['refinement'] = kwargs['refinement'] ### Call the optimizer results = solvers.cp(F, G, h) x = np.asarray(results['x']).ravel() params = x[:k_params] ### Post-process # QC qc_tol = kwargs['qc_tol'] qc_verbose = kwargs['qc_verbose'] passed = l1_solvers_common.qc_results( params, alpha, score, qc_tol, qc_verbose) # Possibly trim trim_mode = kwargs['trim_mode'] size_trim_tol = kwargs['size_trim_tol'] auto_trim_tol = kwargs['auto_trim_tol'] params, trimmed = l1_solvers_common.do_trim_params( params, k_params, alpha, score, passed, trim_mode, size_trim_tol, auto_trim_tol) ### Pack up return values for statsmodels # TODO These retvals are returned as mle_retvals...but the fit wasn't ML if full_output: fopt = f_0(x) gopt = float('nan') # Objective is non-differentiable hopt = float('nan') iterations = float('nan') converged = 'True' if results['status'] == 'optimal'\ else results['status'] retvals = { 'fopt': fopt, 'converged': converged, 'iterations': iterations, 'gopt': gopt, 'hopt': hopt, 'trimmed': trimmed} else: x = np.array(results['x']).ravel() params = x[:k_params] ### Return results if full_output: return params, retvals else: return params
def fit_l1_cvxopt_cp(f, score, start_params, args, kwargs, disp=False, maxiter=100, callback=None, retall=False, full_output=False, hess=None): """ Solve the l1 regularized problem using cvxopt.solvers.cp Specifically: We convert the convex but non-smooth problem .. math:: \\min_\\beta f(\\beta) + \\sum_k\\alpha_k |\\beta_k| via the transformation to the smooth, convex, constrained problem in twice as many variables (adding the "added variables" :math:`u_k`) .. math:: \\min_{\\beta,u} f(\\beta) + \\sum_k\\alpha_k u_k, subject to .. math:: -u_k \\leq \\beta_k \\leq u_k. Parameters ---------- All the usual parameters from LikelhoodModel.fit alpha : non-negative scalar or numpy array (same size as parameters) The weight multiplying the l1 penalty term trim_mode : 'auto, 'size', or 'off' If not 'off', trim (set to zero) parameters that would have been zero if the solver reached the theoretical minimum. If 'auto', trim params using the Theory above. If 'size', trim params if they have very small absolute value size_trim_tol : float or 'auto' (default = 'auto') For use when trim_mode === 'size' auto_trim_tol : float For sue when trim_mode == 'auto'. Use qc_tol : float Print warning and do not allow auto trim when (ii) in "Theory" (above) is violated by this much. qc_verbose : Boolean If true, print out a full QC report upon failure abstol : float absolute accuracy (default: 1e-7). reltol : float relative accuracy (default: 1e-6). feastol : float tolerance for feasibility conditions (default: 1e-7). refinement : int number of iterative refinement steps when solving KKT equations (default: 1). """ start_params = np.array(start_params).ravel('F') ## Extract arguments # k_params is total number of covariates, possibly including a leading constant. k_params = len(start_params) # The start point x0 = np.append(start_params, np.fabs(start_params)) x0 = matrix(x0, (2 * k_params, 1)) # The regularization parameter alpha = np.array(kwargs['alpha_rescaled']).ravel('F') # Make sure it's a vector alpha = alpha * np.ones(k_params) assert alpha.min() >= 0 ## Wrap up functions for cvxopt f_0 = lambda x: _objective_func(f, x, k_params, alpha, *args) Df = lambda x: _fprime(score, x, k_params, alpha) G = _get_G(k_params) # Inequality constraint matrix, Gx \leq h h = matrix(0.0, (2 * k_params, 1)) # RHS in inequality constraint H = lambda x, z: _hessian_wrapper(hess, x, z, k_params) ## Define the optimization function def F(x=None, z=None): if x is None: return 0, x0 elif z is None: return f_0(x), Df(x) else: return f_0(x), Df(x), H(x, z) ## Convert optimization settings to cvxopt form solvers.options['show_progress'] = disp solvers.options['maxiters'] = maxiter if 'abstol' in kwargs: solvers.options['abstol'] = kwargs['abstol'] if 'reltol' in kwargs: solvers.options['reltol'] = kwargs['reltol'] if 'feastol' in kwargs: solvers.options['feastol'] = kwargs['feastol'] if 'refinement' in kwargs: solvers.options['refinement'] = kwargs['refinement'] ### Call the optimizer results = solvers.cp(F, G, h) x = np.asarray(results['x']).ravel() params = x[:k_params] ### Post-process # QC qc_tol = kwargs['qc_tol'] qc_verbose = kwargs['qc_verbose'] passed = l1_solvers_common.qc_results(params, alpha, score, qc_tol, qc_verbose) # Possibly trim trim_mode = kwargs['trim_mode'] size_trim_tol = kwargs['size_trim_tol'] auto_trim_tol = kwargs['auto_trim_tol'] params, trimmed = l1_solvers_common.do_trim_params(params, k_params, alpha, score, passed, trim_mode, size_trim_tol, auto_trim_tol) ### Pack up return values for statsmodels # TODO These retvals are returned as mle_retvals...but the fit was not ML if full_output: fopt = f_0(x) gopt = float('nan') # Objective is non-differentiable hopt = float('nan') iterations = float('nan') converged = (results['status'] == 'optimal') warnflag = results['status'] retvals = { 'fopt': fopt, 'converged': converged, 'iterations': iterations, 'gopt': gopt, 'hopt': hopt, 'trimmed': trimmed, 'warnflag': warnflag } else: x = np.array(results['x']).ravel() params = x[:k_params] ### Return results if full_output: return params, retvals else: return params