Example #1
0
    def test_Lasso_Path(self):
        diabetes = datasets.load_diabetes()
        X = diabetes.data
        y = diabetes.target
        X /= X.std(axis=0)

        df = pdml.ModelFrame(diabetes)
        df.data /= df.data.std(axis=0, ddof=False)

        self.assert_numpy_array_almost_equal(df.data.values, X)

        eps = 5e-3
        expected = lm.lasso_path(X, y, eps, fit_intercept=False)
        result = df.lm.lasso_path(eps=eps, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.lars_path(X, y, method='lasso', verbose=True)
        result = df.lm.lars_path(method='lasso', verbose=True)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])
    def test_Lasso_Path(self):
        diabetes = datasets.load_diabetes()
        X = diabetes.data
        y = diabetes.target
        X /= X.std(axis=0)

        df = pdml.ModelFrame(diabetes)
        df.data /= df.data.std(axis=0, ddof=False)

        self.assert_numpy_array_almost_equal(df.data.values, X)

        eps = 5e-3
        expected = lm.lasso_path(X, y, eps, fit_intercept=False)
        result = df.lm.lasso_path(eps=eps, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.lars_path(X, y, method='lasso', verbose=True)
        result = df.lm.lars_path(method='lasso', verbose=True)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])
Example #3
0
    def enetpath_vs_enet(self, drug_name, alphas=None, l1_ratio=0.5, nfeat=5,
                         max_iter=1000, tol=1e-4, selection="cyclic",
                         fit_intercept=False):
        """
        #if X is not scaled, the enetpath and ElasticNet will give slightly
        # different results
        #if X is scale using::

            from sklearn import preprocessing
            xscaled = preprocessing.scale(X)
            xscaled = pd.DataFrame(xscaled, columns=X.columns)
        """
        # 1. use enet to loop over alphas and then plot the coefficients
        # along alpha for each feature

        # Get the data for the requested drug
        X, Y = self._get_one_drug_data(drug_name)

        # Run elasticnet for a bunch of alphas to get the coefficients
        alphas, coeffs, _ = enet_path(X, Y, l1_ratio=l1_ratio, alphas=alphas)

        # estimate the best alpha for later
        best_alpha = self.runCV(drug_name, verbose=False)['alpha']

        # The final data, coeffs is sorted by coefficients on the smallest alpha
        coeffs = pd.DataFrame(coeffs, index=list(X.columns))
        N = len(alphas)
        coeffs.sort_values(by=N-1, inplace=True)
        results = {"alphas": alphas, "coeffs": coeffs, "best_alpha": best_alpha}

        # the Viewer
        self._plot_enet(results)

        return results
Example #4
0
def plot_enet_descent_path(X, y, l1_ratio, plot_file):
    # Compute paths
    eps = 5e-3  # the smaller it is the longer is the path

    # Reference the global image variable
    global image

    print("Computing regularization path using the elastic net.")
    alphas_enet, coefs_enet, _ = enet_path(X,
                                           y,
                                           eps=eps,
                                           l1_ratio=l1_ratio,
                                           fit_intercept=False)

    # Display results
    fig = plt.figure(1)
    ax = plt.gca()

    colors = cycle(['b', 'r', 'g', 'c', 'k'])
    neg_log_alphas_enet = -np.log10(alphas_enet)
    for coef_e, c in zip(coefs_enet, colors):
        l1 = plt.plot(neg_log_alphas_enet, coef_e, linestyle='--', c=c)

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    title = 'ElasticNet Path by alpha for l1_ratio = ' + str(l1_ratio)
    plt.title(title)
    plt.axis('tight')

    image = fig
    fig.savefig(plot_file)
    plt.close(fig)
    return image
Example #5
0
 def get_paths(self, num=10, n_alphas=10):
     l1_ratio_list = np.linspace(max(0.01, 1.0 / num), 1.0, endpoint=False)
     space_list = list()
     for l1 in l1_ratio_list:
         alphas, coefs, _ = enet_path(self.X_train,
                                      self.y_train,
                                      n_alphas=n_alphas,
                                      l1_ratio=l1)
         f = pd.DataFrame(
             coefs).transpose()  # cols = features, one row per alpha
         g = f.applymap(lambda x: np.abs(x) > 0.0)
         nz = pd.DataFrame({
             'nz': g.sum(axis=1),
             'alpha': alphas
         })  # number of non-zero coefs and min alpha needed
         nz.sort_values(by='nz', inplace=True)
         nz.reset_index(inplace=True, drop=True)
         alpha_max = nz['alpha'].max(
         )  # if alpha > alpha_max then all coefs are 0
         space_list.append({
             'l1_ratio': l1,
             'alpha_max': alpha_max,
             'alpha_min': 0.0
         })
     return space_list
Example #6
0
def worker(
        boot_inds,
        X,
        y,
        X_noise=0.01,
        y_noise=0.5,
        alpha=0.9,
        lambda_path=np.geomspace(1.0, 0.01, num=100),
):

    X_boot = X[boot_inds, :]
    y_boot = y[boot_inds]

    X_boot = scale(
        scale(X_boot +
              np.random.normal(scale=X_noise * 1e-6, size=X_boot.shape)) +
        np.random.normal(scale=X_noise, size=X_boot.shape))
    y_boot = scale(
        scale(y_boot +
              np.random.normal(scale=y_noise * 1e-6, size=len(y_boot))) +
        np.random.normal(scale=y_noise, size=len(y_boot)))

    lambdas_enet, coefs_enet, _ = enet_path(X,
                                            y,
                                            l1_ratio=alpha,
                                            alphas=lambda_path,
                                            fit_intercept=False)

    return {"beta": coefs_enet != 0, "lambda_path": lambdas_enet}
Example #7
0
def elastic_net_path(x_df,y_se,almax,almin,l1r,amin,a1se,intercept):
    X = x_df.values.copy()
    y = y_se.values.copy()
    
    labs = x_df.columns
    log10a = np.linspace(almax,almin,100)  
    alps = 10**(log10a)         
    
    # === Path calculation (by "enet_path") ===
    alphas,coeffs,_ = enet_path(X,y,alphas=alps,l1_ratio=l1r,fit_intercept=intercept)

    # Plotting (path)
    plt.figure(figsize=(8,6))
    axes = plt.gca()
    for coeff,lab in zip(coeffs,labs):
        plt.plot(log10a,coeff)
        axes.text(log10a[20],coeff[20],lab,fontsize=10)
        axes.text(log10a[40],coeff[40],lab,fontsize=10)
        axes.text(log10a[60],coeff[60],lab,fontsize=10)
        axes.text(log10a[80],coeff[80],lab,fontsize=10)
        axes.text(log10a[99],coeff[99],lab,fontsize=10)
    plt.axvline(np.log10(amin),linestyle="--",color="k",label="alpha min")
    plt.axvline(np.log10(a1se),linestyle=":",color="k",label="alpha 1se")
    plt.title("Regularization path\nenet_path (L1 ratio: "+str(l1r)+")")
    plt.xlabel('log10(alpha)')
    plt.ylabel('Coefficients')
    #plt.xlim(min(log10a)-0.1, max(log10a)+0.1)
    plt.axhline(0,linewidth=1,color="k")
    plt.grid()
    plt.show()
 def plot_results(self):
     """Create the base regression plots as well as a regularization path plot."""
     rc.REG.plot_results(self)
     path = linear_model.enet_path(self.independentVar, self.dependentVar, l1_ratio=self.l1_ratio,return_models=False, fit_intercept=False)
     alphas = path[0] #Vector of alphas
     coefs = (path[1]).T #Array of coefficients for each alpha
     viz.plot_regPath(alphas, coefs).plot()
Example #9
0
def enetModel(Xfillna,yNor,l1_ratio,names):
    alphas, coefs, _ = linear_model.enet_path(Xfillna,yNor,l1_ratio = 1.0, \
                                              fit_intercept=False, return_models=False)
    plot.plot(alphas,coefs.T)
    plot.xlabel('alpha')
    plot.ylabel('Coefficients')
    plot.title('Coefficient curves for debt classify data')
    plot.axis('tight')
    plot.semilogx()
    ax = plot.gca()
    ax.invert_xaxis()
    plot.show()
    
    nattr, nalpha = coefs.shape    
    #find coefficient ordering
    nzList = []
    for iAlpha in range(1,nalpha):
        coefList = list(coefs[:,iAlpha])
        nzCoef = [index for index in range(nattr) if coefList[index] != 0.0]
        for q in nzCoef:
                if not(q in nzList):
                    nzList.append(q)
    nameList = [names[nzList[i]] for i in range(len(nzList))]
    print('Attribute Ordered by How Early They Enter the Model')
    print(nameList)
Example #10
0
def train(A,
          l1_ratio=0.5,
          eps=1e-3,
          n_alphas=100,
          alphas=None,
          positive=True,
          max_iter=1000):
    sample_num, feature_num = A.shape
    W = lil_matrix((feature_num, feature_num))
    A = SparseMatrix(A)

    for j in range(feature_num):
        aj, Aj = A.lil_get_col_to_csc(j)

        if aj.nnz == 0:
            continue

        aj = aj.toarray().ravel()

        alphas, coefs, __ = linear_model.enet_path(Aj,
                                                   aj,
                                                   l1_ratio=l1_ratio,
                                                   eps=eps,
                                                   n_alphas=n_alphas,
                                                   alphas=alphas,
                                                   positive=positive,
                                                   max_iter=max_iter)

        W[:j, j] = coefs[:j, -1].reshape(j, 1)
        W[j + 1:, j] = coefs[j:, -1].reshape(feature_num - j - 1, 1)

    return W
Example #11
0
    def train(self, trainset):

        # Here again: call base method before doing anything.
        env.AlgoBase.train(self, trainset)

        user_num = self.trainset.n_users
        item_num = self.trainset.n_items
        A = sparse.lil_matrix((user_num, item_num))

        for u, i, r in trainset.all_ratings():
            # A[u, i] = r
            if r > self.threshold:
                A[u, i] = 1

        W = sparse.lil_matrix((item_num, item_num))

        for j in range(item_num):

            aj = A.getcol(j).toarray().ravel()
            Aj = A.copy()
            Aj[:, j] = 0
            Aj = Aj.tocsc()
            __, coefs, __ = linear_model.enet_path(Aj, aj, l1_ratio=self.l1_ratio, eps=self.eps,
                                                   n_alphas=self.n_alphas, alphas=self.alphas,
                                                   positive=self.positive, max_iter=self.max_iter)

            W[:, j] = coefs[:, -1].reshape(item_num, 1)

        self.estimator = A.tocsc() * W.tocsc()
def _fit_bootstrap_sample(X, y, func, L):
    """ Computes the regularization path for the regression y ~ X.
    
    Parameters
    ----------
    X : array, shape (n_samples , n_features)

    y : array, shape (n_samples)

    func : string
         the function used for computing the regularization path
         (either 'lasso', 'elasticnet', or 'lars').
        
    L : int
        length of the path.

    Returns
    -------
    array, shape (n_features , L) 
        0 if the coefficient is null and 1 otherwise.

    """
    if func == 'lasso':
        _, coef_path, _ = lasso_path(X, y, n_alphas=L)
    elif func == 'elasticnet':
        _, coef_path, _ = enet_path(X, y, nalphas=L)
    elif func == 'lars':
        _, _, coef_path = lars_path(X, y, max_iter=L - 1)

    return 1 * (coef_path != 0)
 def _gen_cv_paths(self, alphas):
     """Helper function to generate cv paths."""
     self.alphas, self.coefs_cv, _ = linear_model.enet_path(
         self.x_train,
         self.y_train,
         fit_intercept=self.intercept,
         alphas=alphas)
     self.coefs_cv = self.coefs_cv.T
Example #14
0
def path_calc(X, y, X_holdout, y_holdout, alphas, paramgrid, colname = 'CV', yname = '', method = 'Elastic Net'):
    #make a copy of the parameters before popping things off
    copy_params = copy.deepcopy(paramgrid)
    fit_intercept = copy_params.pop('fit_intercept')
    precompute = copy_params.pop('precompute')
    copy_X = copy_params.pop('copy_X')
    normalize = False

    # this code adapted from sklearn ElasticNet fit function, which unfortunately doesn't accept multiple alphas at once
    X, y = check_X_y(X, y, accept_sparse='csc',
                     order='F', dtype=[np.float64, np.float32],
                     copy=copy_X and fit_intercept,
                     multi_output=True, y_numeric=True)
    y = check_array(y, order='F', copy=False, dtype=X.dtype.type,
                    ensure_2d=False)

    #this is the step that gives the data to find intercept if fit_intercept is true.
    X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(X, y, None, precompute, normalize,
                                                                 fit_intercept, copy=False)
    y = np.squeeze(y)

    #do the path calculation, and tell how long it took
    print('Calculating path...')
    start_t = time.time()
    if method == 'Elastic Net':
        path_alphas, path_coefs, path_gaps, path_iters = enet_path(X, y, alphas=alphas, return_n_iter = True,
                                                   **copy_params)
    if method == 'LASSO':
        path_alphas, path_coefs, path_gaps, path_iters = lasso_path(X, y, alphas=alphas, return_n_iter=True,
                                                                   **copy_params)
    dt = time.time() - start_t
    print('Took ' + str(dt) + ' seconds')

    #create some empty arrays to store the result
    y_pred_holdouts = np.empty(shape=(len(alphas),len(y_holdout)))
    intercepts = np.empty(shape=(len(alphas)))
    rmses = np.empty(shape=(len(alphas)))
    cvcols = []
    for j in list(range(len(path_alphas))):

        coef_temp = path_coefs[:, j]

        if fit_intercept:
            coef_temp = coef_temp / X_scale
            intercept = y_offset - np.dot(X_offset, coef_temp.T)
        else:
            intercept = 0.

        y_pred_holdouts[j,:] = np.dot(X_holdout, path_coefs[:, j]) + intercept
        intercepts[j] = intercept
        rmses[j] = RMSE(y_pred_holdouts[j,:], y_holdout)
        cvcols.append(('predict','"'+ method + ' - ' + yname + ' - ' + colname + ' - Alpha:' + str(path_alphas[j]) + ' - ' + str(paramgrid) + '"'))

    return path_alphas, path_coefs, intercepts, path_iters, y_pred_holdouts, rmses, cvcols
Example #15
0
 def Plot_Enet_Path(self, path_lenght=5e-3, alphas=None):
     import matplotlib.pyplot as plt
     print("Computing regularization path using the elastic net...")
     alphas_enet, coefs_enet, _ = enet_path(
         self.X, self.Y, eps=path_lenght, l1_ratio=0.8, fit_intercept=False, alphas=alphas)
     print("Computing regularization path using the positve elastic net...")
     alphas_positive_enet, coefs_positive_enet, _ = enet_path(
         self.X, self.Y, eps=path_lenght, l1_ratio=0.8, positive=True, fit_intercept=False, alphas=alphas)
     plt.figure()
     ax = plt.gca()
     ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
     l1 = plt.plot(-np.log10(alphas_enet), coefs_enet.T)
     l2 = plt.plot(-np.log10(alphas_positive_enet), coefs_positive_enet.T,
                   linestyle='--')
     plt.xlabel('-Log(alpha)')
     plt.ylabel('coefficients')
     plt.title('Elastic-Net and positive Elastic-Net')
     plt.legend((l1[-1], l2[-1]), ('Elastic-Net', 'positive Elastic-Net'),
                loc='lower left')
     plt.axis('tight')
     plt.show()
def coefficient_order(x_scaled, y_scaled):
    # 在整个数据集上训练,获取alphas, coefs
    alphas, coefs, _ = enet_path(x_scaled,
                                 y_scaled,
                                 l1_ratio=0.8,
                                 fit_intercept=False,
                                 return_models=False)
    plt.plot(alphas, coefs.T)
    plt.xlabel('alpha')
    plt.ylabel('coefficients')
    plt.axis('tight')
    plt.semilogx()
    ax = plt.gca()
    ax.invert_xaxis()
    plt.show()

    # find coefficient ordering 找到每一列中不为0的行
    n_attr, n_alpha = coefs.shape
    nz_list = []
    for i_alpha in range(1, n_alpha):
        coef_list = list(coefs[:, i_alpha])
        nz_coef = [i for i in range(n_attr) if coef_list[i] != 0]
        for q in nz_coef:
            if q not in nz_list:
                nz_list.append(q)

    names = ['V' + str(i) for i in range(n_rows)]
    name_list = [names[nz_list[i]] for i in range(len(nz_list))]
    print('Attributes Ordered by How Early They Enter the Model')
    print(name_list)

    # find coefficients corresponding to best alpha value(来源于交叉验证)
    alphaStar = 0.020334883589342503
    index_alphaStar = [
        index for index in range(100) if alphas[index] > alphaStar
    ]
    index_star = max(index_alphaStar)

    coef_star = list(coefs[:, index_star])
    print('Best coefficient values', coef_star)

    print("")

    # The coefficients on normalized attributes give another slightly different ordering
    abs_coef = [abs(a) for a in coef_star]
    coef_sort = sorted(abs_coef, reverse=True)
    idx_coef_size = [abs_coef.index(a) for a in coef_sort if a != 0]
    name_list_2 = [names[idx_coef_size[i]] for i in range(len(idx_coef_size))]

    print("Attributes Ordered by Coef Size at Optimum alpha")
    print(name_list_2)
Example #17
0
    def train(self, alpha, l1_ratio):
        with mlflow.start_run(source_name=self.current_file) as run:
            run_id = run.info.run_uuid
            print("run_id:", run_id)
            experiment_id = run.info.experiment_id
            print("  experiment_id:", experiment_id)
            clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
            clf.fit(self.train_x, self.train_y)

            predicted_qualities = clf.predict(self.test_x)
            (rmse, mae, r2) = self.eval_metrics(self.test_y,
                                                predicted_qualities)

            #print("Parameters:(alpha={}, l1_ratio={}):".format(alpha, l1_ratio))
            print("  Parameters:")
            print("    alpha:", alpha)
            print("    l1_ratio:", l1_ratio)
            print("  Metrics:")
            print("    RMSE:", rmse)
            print("    MAE:", mae)
            print("    R2:", r2)

            mlflow.log_param("alpha", alpha)
            mlflow.log_param("l1_ratio", l1_ratio)

            mlflow.log_metric("rmse", rmse)
            mlflow.log_metric("r2", r2)
            mlflow.log_metric("mae", mae)

            mlflow.set_tag("data_path", self.data_path)
            mlflow.set_tag("exp_id", experiment_id)
            mlflow.set_tag("exp_name", self.experiment_name)
            mlflow.set_tag("run_origin", self.run_origin)
            mlflow.set_tag("platform", platform.system())

            mlflow.sklearn.log_model(clf, "model")

            eps = 5e-3  # the smaller it is the longer is the path
            alphas_enet, coefs_enet, _ = enet_path(self.X,
                                                   self.y,
                                                   eps=eps,
                                                   l1_ratio=l1_ratio,
                                                   fit_intercept=False)
            plot_file = "wine_ElasticNet-paths.png"
            plot_utils.plot_enet_descent_path(self.X, self.y, l1_ratio,
                                              alphas_enet, coefs_enet,
                                              plot_file)
            mlflow.log_artifact(plot_file)

        return (experiment_id, run_id)
Example #18
0
def sklearn_liner_model_regressions(xTrain, xTest, yTrain, yTest):
    modelForConsideration: DataFrame = pd.DataFrame()
    LinerModels = \
        [
            linear_model.ARDRegression(), linear_model.BayesianRidge(), linear_model.ElasticNet(),
            linear_model.ElasticNetCV(),
            linear_model.HuberRegressor(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(),
            linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(),
            linear_model.LinearRegression(), linear_model.MultiTaskLasso(),
            linear_model.MultiTaskElasticNet(), linear_model.MultiTaskLassoCV(), linear_model.MultiTaskElasticNetCV(),
            linear_model.OrthogonalMatchingPursuit(),
            linear_model.OrthogonalMatchingPursuitCV(), linear_model.PassiveAggressiveClassifier(),
            linear_model.PassiveAggressiveRegressor(), linear_model.Perceptron(),
            linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeClassifier(),
            linear_model.RidgeClassifierCV(),
            linear_model.RidgeCV(), linear_model.SGDClassifier(), linear_model.SGDRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.enet_path(xTrain, yTrain),
            linear_model.lars_path(xTrain, yTrain), linear_model.lasso_path(xTrain, yTrain),
            # linear_model.LogisticRegression()
            # ,linear_model.LogisticRegressionCV(),linear_model.logistic_regression_path(xTrain, yTrain), linear_model.orthogonal_mp(xTrain, yTrain), linear_model.orthogonal_mp_gram(), linear_model.ridge_regression()
        ]
    for model in LinerModels:
        modelName: str = model.__class__.__name__
        try:
            # print(f"Preparing Model {modelName}")
            if modelName == "LogisticRegression":
                model = linear_model.LogisticRegression(random_state=0)
            model.fit(xTrain, yTrain)
            yTrainPredict = model.predict(xTrain)
            yTestPredict = model.predict(xTest)
            errorList = calculate_prediction_error(modelName, yTestPredict,
                                                   yTest, yTrainPredict,
                                                   yTrain)

            if errorList["Test Average Error"][0] < 30 and errorList[
                    "Train Average Error"][0] < 30:
                try:
                    modelForConsideration = modelForConsideration.append(
                        errorList)
                except (Exception) as e:
                    print(e)

        except (Exception, ArithmeticError) as e:
            print(f"Error occurred while preparing Model {modelName}")
    return modelForConsideration
Example #19
0
def dataset_path(X, y, dataset_name=''):
	print("\n\ncoefs path processing... dataset: " + dataset_name)
	
	print("\nprocessing Lasso...")
	eps = 1e-5
	alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept=False)
	plot_coefs_path(dataset_name + ' Lasso coefficient paths', alphas_lasso, coefs_lasso)
	
	print("\nprocessing ElasticNet...")
	eps = 1e-5
	alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=ENET_L1_RATIO, fit_intercept=False)
	plot_coefs_path(dataset_name + ' ElasticNet coefficient paths', alphas_enet, coefs_enet)

	print("\nprocessing Ridge...")
	m_ridge = Ridge(fit_intercept=False)
	alphas = np.logspace(5, -1, 100)
	ridge_alphas, model_coef = model_path(m_ridge, X, y, alphas)
	plot_coefs_path(dataset_name +' Ridge coefficient paths', ridge_alphas, model_coef)
def enet_plot(l1_ratio):
    """Function plotting enet_path for some tuning parameter."""
    _, theta_enet, _ = linear_model.enet_path(A,
                                              b,
                                              alphas=alphas,
                                              fit_intercept=False,
                                              l1_ratio=l1_ratio,
                                              return_models=False)
    fig1 = plt.figure(figsize=(12, 8))
    ax1 = fig1.add_subplot(111)
    ax1.plot(alphas, np.transpose(theta_enet), linewidth=3)
    ax1.set_xscale('log')
    ax1.set_xlabel(r"$\lambda$")
    ax1.set_ylabel("Coefficient value")
    ax1.set_ylim([-1, 5])
    plt.savefig(save_fig(path, "enet_coeffs", "pdf"))
    plt.show()
    return theta_enet
Example #21
0
    def enetpath_vs_enet(self,
                         drug_name,
                         alphas=None,
                         l1_ratio=0.5,
                         nfeat=5,
                         max_iter=1000,
                         tol=1e-4,
                         selection="cyclic",
                         fit_intercept=False):
        """
        #if X is not scaled, the enetpath and ElasticNet will give slightly
        # different results
        #if X is scale using::

            from sklearn import preprocessing
            xscaled = preprocessing.scale(X)
            xscaled = pd.DataFrame(xscaled, columns=X.columns)
        """
        # 1. use enet to loop over alphas and then plot the coefficients
        # along alpha for each feature

        # Get the data for the requested drug
        X, Y = self._get_one_drug_data(drug_name)

        # Run elasticnet for a bunch of alphas to get the coefficients
        alphas, coeffs, _ = enet_path(X, Y, l1_ratio=l1_ratio, alphas=alphas)

        # estimate the best alpha for later
        best_alpha = self.runCV(drug_name, verbose=False).alpha

        # The final data, coeffs is sorted by coefficients on the smallest alpha
        coeffs = pd.DataFrame(coeffs, index=list(X.columns))
        N = len(alphas)
        coeffs.sort_values(by=N - 1, inplace=True)
        results = {
            "alphas": alphas,
            "coeffs": coeffs,
            "best_alpha": best_alpha
        }

        # the Viewer
        self._plot_enet(results)

        return results
def crossValENetRocksMines(xNormalized, labelNormalized, nrows, ncols):

    nxval = 10

        
    for ixval in range(nxval):
        
        idxTest = [a for a in range(nrows) if a%nxval == ixval*nxval]
        idxTrain = [a for a in range(nrows) if a%nxval != ixval*nxval]
        
        xTrain = [xNormalized[r] for r in idxTrain]
        xTest = [xNormalized[r] for r in idxTest]
        
        labelTrain = [labelNormalized[r] for r in idxTrain]
        labelTest = [labelNormalized[r] for r in idxTest]
        
        alphas, coefs,_ = enet_path(xTrain, labelTrain, l1_ratio = 0.8, \
        fit_intercept=False, return_models=False)
        
        if ixval == 0:
            pred = numpy.dot(xTest, coefs)
            yOut = labelTest
        else:
            yTemp = numpy.array(yOut)
            yOut = numpy.concatenate((yTemp,labelTest),axis=0)
            
            predTemp = numpy.array(pred)
            pred = numpy.concatenate((predTemp,numpy.dot(xTest,coefs)),axis=0)

    misClassRate = []
    _,nPred = pred.shape
    for iPred in range(1,nPred):
        predList = list(pred[:,iPred])
        errCnt = 0.0
        
        for irow in range(nrows):
            if (predList[irow] < 0.0) and (yOut[irow]>=0.0):
                errCnt += 1.0
            elif (predList[irow] >= 0.0) and (yOut[irow] < 0.0):
                errCnt += 1.0
        misClassRate.append(errCnt/nrows)
        
    return (misClassRate, alphas)
def crossValENetRocksMines(xNormalized, labelNormalized, nrows, ncols):

    nxval = 10

    for ixval in range(nxval):

        idxTest = [a for a in range(nrows) if a % nxval == ixval * nxval]
        idxTrain = [a for a in range(nrows) if a % nxval != ixval * nxval]

        xTrain = [xNormalized[r] for r in idxTrain]
        xTest = [xNormalized[r] for r in idxTest]

        labelTrain = [labelNormalized[r] for r in idxTrain]
        labelTest = [labelNormalized[r] for r in idxTest]

        alphas, coefs,_ = enet_path(xTrain, labelTrain, l1_ratio = 0.8, \
        fit_intercept=False, return_models=False)

        if ixval == 0:
            pred = numpy.dot(xTest, coefs)
            yOut = labelTest
        else:
            yTemp = numpy.array(yOut)
            yOut = numpy.concatenate((yTemp, labelTest), axis=0)

            predTemp = numpy.array(pred)
            pred = numpy.concatenate((predTemp, numpy.dot(xTest, coefs)),
                                     axis=0)

    misClassRate = []
    _, nPred = pred.shape
    for iPred in range(1, nPred):
        predList = list(pred[:, iPred])
        errCnt = 0.0

        for irow in range(nrows):
            if (predList[irow] < 0.0) and (yOut[irow] >= 0.0):
                errCnt += 1.0
            elif (predList[irow] >= 0.0) and (yOut[irow] < 0.0):
                errCnt += 1.0
        misClassRate.append(errCnt / nrows)

    return (misClassRate, alphas)
Example #24
0
def enet_plot(l1_ratio):
    """ function plotting enet_path for some tuning parameter """
    _, theta_enet, _ = enet_path(X, y, alphas=alphas, fit_intercept=False,
                                 l1_ratio=l1_ratio, return_models=False)
    fig1 = plt.figure(figsize=(12, 8))
    plt.title("Enet path: " + r"$p={0}, n={1} $".format(n_features,
              n_samples), fontsize=16)
    ax1 = fig1.add_subplot(111)
    ax1.plot(alphas, np.transpose(theta_enet), linewidth=3)
    ax1.set_xscale('log')
    ax1.set_xlabel(r"$\lambda$")
    ax1.set_ylabel("Coefficient value")
    ax1.set_ylim([-1, 2])
    sns.despine()
    plt.show()
    filename = "Enet_path" + str(l1_ratio)
    filename = filename.replace(".", "")
    my_saving_display(fig1, dirname, filename, imageformat)
    return theta_enet
Example #25
0
def myenetpath(data, lam, alpha):
    n_samples, n_features = data.shape
    sig = np.ones(n_features)
    tor = np.array([])
    for iter1 in range(500):
        A = np.zeros((n_features * n_samples, n_features * n_features))
        for i in range(1, n_features + 1):
            A[(i - 1) * n_samples:i * n_samples, (i - 1) * n_features:i *
              n_features] = data * np.tile(np.sqrt(sig / sig[i - 1]),
                                           (n_samples, 1))
        temp = np.eye(n_features)
        temp = temp.flatten('F')
        loc = np.array(np.nonzero(temp == 0))
        loc = loc.reshape(loc.shape[1])
        A = A[:, loc]
        y = data.flatten('F')
        _, coef_path, _ = enet_path(A * np.sqrt(2 * n_samples),
                                    y * np.sqrt(2 * n_samples),
                                    alphas=lam,
                                    l1_ratio=alpha)
    return coef_path
Example #26
0
def main():

    path = '/users/davecwright/documents/kaggle/liberty_fire_cost/'
    path = 'c:\\users\\dwright\\code\\'
    train_name = path + 'train.csv'
    test_name = path + 'test.csv'
    # f = open(path + 'train.csv', 'rb')

    readRows = 2000 #None for all
    print 'loading train_data'
    train_data = pd.read_csv(train_name, nrows=readRows)

    print 'train_data loaded'
    y_data = train_data['target'].values
    train_data.drop('target', 1)
    train_data = scrub(train_data)
    y_data = y_data.astype(float)

    eps = 5e-3
    X = train_data
    y = y_data
    folds = 10
    kf = cross_validation.KFold(y_data.shape[0], n_folds=folds)
    alphas = range(folds)
    k = 0
    for test, train in kf:
        penalty = (alphas[k] + 1) * 1/folds
        print alpha
        clf = ElasticNet(l1_ratio=penalty, eps=eps)
        clf.train(X, y)
        # doing our own cv parametarization


    print 'computing lasso path'
    alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept = False)
    print 'computing enet path'
    alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)

    plt.figure(1)
    ax = plt.gca()
    ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
    l1 = plt.plot(-np.log10(alphas_lasso), coefs_lasso.T)
    l2 = plt.plot(-np.log10(alphas_enet), coefs_enet.T, linestyle='--')

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso and Elastic-Net Paths')
    plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
    plt.axis('tight')
    plt.show()
    sys.exit()



    for train_index, test_index in kf:
        #print train_index, test_index
        print train_data.iloc[train_index], train_data.iloc[test_index]


    # I am implementing the lasso reduction here...
    #for train, test in kf:

    fig = plt.figure(figsize=(12, 9))
    #ax = fig.add_subplot(111)

    #ax.plot(np.sort(y_data))
    #plt.xlabel("Number of Features")
    #plt.ylabel("claims cost")
    #plt.title("claims_cost")
    #ax.set_xscale("log")

    #ax.set_position([box.x0, box.y0 + box.height * 0.3, box.width, box.height * 0.7])
    #ax.legend(**_PLT_LEGEND_OPTIONS)
    #plt.show()


    train_data = train_data.drop('target', 1)

    # A - preprocessing
    # encode the text variables, var1-var9, Z values are NaN

    # fill in missing values in the text variables and in the continuous variables
    # skip continuous for now

    # A3. build new features through the interactions of various items
    # skip for now

    #  A4. dimensionality reduction to take the feature set back down to something more manageable.

    est_clf = svm.SVR(kernel='linear', C=1)
    rks = select_ests(X_train, y_data, 100, est_clf)
    X_train = X_train[:, rks]

    clf = svm.SVR(kernel='linear', C=1)
    acy = cv(X_train, y_data, clf, None, estimator_name(clf))

    # the point here is to understand what accuracy is

    print 'accuracy:', acy

    #select_model(X_train, y_data)

    # B. split out test and fit sets

    test_data = pd.read_csv(test_name, nrows=readRows)
    test_data = encode_impute(test_data)
    X_test = scaler.transform(test_data)
eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
# The return_models parameter sets that lasso_path will return
# the alphas and the coefficients as output, instead of a list
# of models as it does by default. Returning the list of models
# is deprecated and will eventually be removed in 0.15
alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, return_models=False)

print("Computing regularization path using the positive lasso...")
alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(X, y, eps,
                                                        positive=True,
                                                        return_models=False)
print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=0.8,
                                    return_models=False)

print("Computing regularization path using the positve elastic net...")
alphas_positive_enet, coefs_positive_enet, _ = enet_path(X, y, eps=eps,
                                                      l1_ratio=0.8,
                                                      positive=True,
                                                      return_models=False)

# Display results

pl.figure(1)
ax = pl.gca()
ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
l1 = pl.plot(-np.log10(alphas_lasso), coefs_lasso.T)
l2 = pl.plot(-np.log10(alphas_enet), coefs_enet.T, linestyle='--')
def main():
    print(__doc__)

    # Author: Alexandre Gramfort <*****@*****.**>
    # License: BSD 3 clause

    X, y = datasets.load_diabetes(return_X_y=True)

    X /= X.std(
        axis=0)  # Standardize data (easier to set the l1_ratio parameter)

    # Compute paths

    eps = 5e-3  # the smaller it is the longer is the path

    print("Computing regularization path using the lasso...")
    alphas_lasso, coefs_lasso, _ = lasso_path(X,
                                              y,
                                              eps=eps,
                                              fit_intercept=False)

    print("Computing regularization path using the positive lasso...")
    alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(
        X, y, eps=eps, positive=True, fit_intercept=False)
    print("Computing regularization path using the elastic net...")
    alphas_enet, coefs_enet, _ = enet_path(X,
                                           y,
                                           eps=eps,
                                           l1_ratio=0.8,
                                           fit_intercept=False)

    print("Computing regularization path using the positive elastic net...")
    alphas_positive_enet, coefs_positive_enet, _ = enet_path(
        X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)

    # Display results

    plt.figure(1)
    colors = cycle(['b', 'r', 'g', 'c', 'k'])
    neg_log_alphas_lasso = -np.log10(alphas_lasso)
    neg_log_alphas_enet = -np.log10(alphas_enet)
    for coef_l, coef_e, c in zip(coefs_lasso, coefs_enet, colors):
        l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
        l2 = plt.plot(neg_log_alphas_enet, coef_e, linestyle='--', c=c)

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso and Elastic-Net Paths')
    plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
    plt.axis('tight')

    plt.figure(2)
    neg_log_alphas_positive_lasso = -np.log10(alphas_positive_lasso)
    for coef_l, coef_pl, c in zip(coefs_lasso, coefs_positive_lasso, colors):
        l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
        l2 = plt.plot(neg_log_alphas_positive_lasso,
                      coef_pl,
                      linestyle='--',
                      c=c)

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso and positive Lasso')
    plt.legend((l1[-1], l2[-1]), ('Lasso', 'positive Lasso'), loc='lower left')
    plt.axis('tight')

    plt.figure(3)
    neg_log_alphas_positive_enet = -np.log10(alphas_positive_enet)
    for (coef_e, coef_pe, c) in zip(coefs_enet, coefs_positive_enet, colors):
        l1 = plt.plot(neg_log_alphas_enet, coef_e, c=c)
        l2 = plt.plot(neg_log_alphas_positive_enet,
                      coef_pe,
                      linestyle='--',
                      c=c)

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Elastic-Net and positive Elastic-Net')
    plt.legend((l1[-1], l2[-1]), ('Elastic-Net', 'positive Elastic-Net'),
               loc='lower left')
    plt.axis('tight')
    plt.show()
    idxTrain = [a for a in range(nrow) if a%nxval != ixval%nxval]

    #Define test and training attribute and label sets
    xTrain = numpy.array([xNormalized[r] for r in idxTrain])
    xTest = numpy.array([xNormalized[r] for r in idxTest])
    yTrain = [yNormalized[r] for r in idxTrain]
    yTest = [yNormalized[r] for r in idxTest]
    labelsTest = [labels[r] for r in idxTest]

    #build model for each column in yTrain
    models = []
    lenTrain = len(yTrain)
    lenTest = nrow - lenTrain
    for iModel in range(nlabels):
        yTemp = numpy.array([yTrain[j][iModel] for j in range(lenTrain)])
        models.append(enet_path(xTrain, yTemp,l1_ratio=1.0, fit_intercept=False, eps=0.5e-3, n_alphas=nAlphas , return_models=False))

    for iStep in range(1,nAlphas):
        #Assemble the predictions for all the models, find largest prediction and calc error
        allPredictions = []
        for iModel in range(nlabels):
            _, coefs, _ = models[iModel]
            predTemp = list(numpy.dot(xTest, coefs[:,iStep]))
            #un-normalize the prediction for comparison
            predUnNorm = [(predTemp[j]*ySD[iModel] + yMeans[iModel]) for j in range(len(predTemp))]
            allPredictions.append(predUnNorm)

        predictions = []
        for i in range(lenTest):
            listOfPredictions = [allPredictions[j][i] for j in range(nlabels) ]
            idxMax = listOfPredictions.index(max(listOfPredictions))
Example #30
0
y = diabetes.target

X /= X.std(0) # Standardize data (easier to set the rho parameter)

################################################################################
# Compute paths

eps = 5e-3 # the smaller it is the longer is the path

print "Computing regularization path using the lasso..."
models = lasso_path(X, y, eps=eps)
alphas_lasso = np.array([model.alpha for model in models])
coefs_lasso = np.array([model.coef_ for model in models])

print "Computing regularization path using the elastic net..."
models = enet_path(X, y, eps=eps, rho=0.8)
alphas_enet = np.array([model.alpha for model in models])
coefs_enet = np.array([model.coef_ for model in models])

################################################################################
# Display results

ax = pl.gca()
ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
l1 = pl.plot(coefs_lasso)
l2 = pl.plot(coefs_enet, linestyle='--')

pl.xlabel('-Log(lambda)')
pl.ylabel('weights')
pl.title('Lasso and Elastic-Net Paths')
pl.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
Example #31
0
#> WRAIN      0.000000
#> DEGREES    0.074101
#> HRAIN     -0.041159
#> TIME_SV    0.024027
#> dtype: float64

my_test = pd.DataFrame([[500, 17, 120, 2]])
my_pipeline.predict(my_test)
#> array([-1.41981616])

### 8.6.2 ペナルティの強さと係数の関係

As = np.e**np.arange(2, -5.5, -0.1)
B = 0.1

_, my_path, _ = enet_path(zscore(X), zscore(y), alphas=As, l1_ratio=B)

pd.DataFrame(my_path.T, columns=X.columns,
             index=np.log(As)).plot(xlabel='log A ( = log alpha)',
                                    ylabel='Coefficients')

### 8.6.3 パラメータ$A,\,B$の決定

As = np.linspace(0, 0.1, 21)
Bs = np.linspace(0, 0.1, 6)

my_pipeline = Pipeline([('sc', StandardScaler()), ('enet', ElasticNet())])
my_search = GridSearchCV(estimator=my_pipeline,
                         param_grid={
                             'enet__alpha': As,
                             'enet__l1_ratio': Bs
Example #32
0
    def enetpath_vs_enet(self, drug_name, alphas=None, l1_ratio=0.5, nfeat=5,
                         max_iter=1000, tol=1e-4, selection="cyclic", fit_intercept=False):
        """
        #if X is not scaled, the enetpath and ElasticNet will give slightly differen results
        #if X is scale using::

        from sklearn import preprocessing
        xscaled = preprocessing.scale(X)
        xscaled = pd.DataFrame(xscaled, columns=X.columns)
        """
        # 1. use enet to loop over alphas and then plot the coefficients along alpha
        # for each feature

        # Get the data for the requested drug
        xscaled, Y = self._get_one_drug_data(drug_name)

        alphas, coefs1, _ = enet_path(xscaled, Y, l1_ratio=l1_ratio, alphas=alphas)
        pylab.figure(1)
        pylab.clf()
        for this in coefs1:
            pylab.plot(pylab.log(alphas), this)

        self.alphas1 = alphas
        self.coefs1 = coefs1

        # Identify the first 5

        # 2. should be equivalenet to using ElasticNet for each alphas
        coefs2 = []
        # if alphas is None, it will be created automatically from enet_path
        for alpha in alphas:
            # to have same results as in enet_path, normalize must be set to
            # False when X is scaled.
            en = sklearn.linear_model.ElasticNet(l1_ratio=l1_ratio, alpha=alpha,
                            max_iter=max_iter, tol=tol, selection=selection,
                            fit_intercept=fit_intercept)
            res = en.fit(xscaled, Y)
            coefs2.append(res.coef_)
        coefs2 = np.array(coefs2).transpose()
        pylab.figure(2)
        pylab.clf()
        for this in coefs2:
            pylab.plot(pylab.log(alphas), this)

        self.coefs2 = coefs2

        #pylab.plot(-pylab.log(res.coef_))

        pylab.figure(3)
        pylab.clf()
        self.df1 = pd.DataFrame(coefs1.transpose(), columns=xscaled.columns)
        self.df2 = pd.DataFrame(coefs2.transpose(), columns=xscaled.columns)

        (self.df1 == 0).sum().plot()
        (self.df2 == 0).sum().plot()


        self.indices1 = (self.df1 == 0).sum().sort_values().ix[0:nfeat]
        self.indices2 = (self.df2 == 0).sum().sort_values().ix[0:nfeat]
        names1 = self.indices1.index
        names2 = self.indices2.index
        print(names2)
    xNormalized.append(rowNormalized)

#normalize labels to center

meanLabel = sum(labels)/nrow
sdLabel = sqrt(sum([(labels[i] - meanLabel) * (labels[i] - meanLabel) for i in range(nrow)])/nrow)

labelNormalized = [(labels[i] - meanLabel)/sdLabel for i in range(nrow)]

#Convert normalized labels to numpy array
Y = numpy.array(labelNormalized)

#Convert normalized attributes to numpy array
X = numpy.array(xNormalized)

alphas, coefs, _ = enet_path(X, Y,l1_ratio=0.8, fit_intercept=False, return_models=False)

plot.plot(alphas,coefs.T)

plot.xlabel('alpha')
plot.ylabel('Coefficients')
plot.axis('tight')
plot.semilogx()
ax = plot.gca()
ax.invert_xaxis()
plot.show()

nattr, nalpha = coefs.shape

#find coefficient ordering
nzList = []
    xNormalized.append(rowNormalized)

#normalize labels to center

meanLabel = sum(labels)/nrow
sdLabel = sqrt(sum([(labels[i] - meanLabel) * (labels[i] - meanLabel) for i in range(nrow)])/nrow)

labelNormalized = [(labels[i] - meanLabel)/sdLabel for i in range(nrow)]

#Convert normalized labels to numpy array
Y = numpy.array(labelNormalized)

#Convert normalized attributes to numpy array
X = numpy.array(xNormalized)

alphas, coefs, _ = enet_path(X, Y,l1_ratio=0.8, fit_intercept=False, return_models=False)

plot.plot(alphas,coefs.T)

plot.xlabel('alpha')
plot.ylabel('Coefficients')
plot.axis('tight')
plot.semilogx()
ax = plot.gca()
ax.invert_xaxis()
plot.show()

nattr, nalpha = coefs.shape

#find coefficient ordering
nzList = []

# number of cross validation folds
nxval = 10

for ixval in range(nxval):
    # Define test and training index sets
    idxTest = [a for a in range(nrow) if a % nxval == ixval % nxval]
    idxTrain = [a for a in range(nrow) if a % nxval != ixval % nxval]

    # Define test and training attribute and label sets
    xTrain = numpy.array([xNormalized[r] for r in idxTrain])
    xTest = numpy.array([xNormalized[r] for r in idxTest])
    labelTrain = numpy.array([labelNormalized[r] for r in idxTrain])
    labelTest = numpy.array([labelNormalized[r] for r in idxTest])
    alphas, coefs, _ = enet_path(xTrain, labelTrain, l1_ratio=0.8, fit_intercept=False, return_models=False)

    # apply coefs to test data to produce predictions and accumulate
    if ixval == 0:
        pred = numpy.dot(xTest, coefs)
        yOut = labelTest
    else:
        # accumulate predictions
        yTemp = numpy.array(yOut)
        yOut = numpy.concatenate((yTemp, labelTest), axis=0)

        # accumulate predictions
        predTemp = numpy.array(pred)
        pred = numpy.concatenate((predTemp, numpy.dot(xTest, coefs)), axis=0)

Example #36
0
def train_models(train_x, train_y, n_labels, n_alphas):
    for i_model in range(n_labels):
        current_y = train_y[:, i_model]
        yield enet_path(train_x, current_y, l1_ratio=1.0, eps=0.5e-3,
                        n_alphas=n_alphas, return_models=False)
Example #37
0
    print("  R2: %s" % r2)

    # Log mlflow attributes for mlflow UI
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)
    mlflow.sklearn.log_model(lr, "model")


    # Compute paths
    eps = 5e-3  # the smaller it is the longer is the path

    print("Computing regularization path using the elastic net.")
    alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=l1_ratio, fit_intercept=False)

    # Display results
    fig = plt.figure(1)
    ax = plt.gca()

    colors = cycle(['b', 'r', 'g', 'c', 'k'])
    neg_log_alphas_enet = -np.log10(alphas_enet)
    for coef_e, c in zip(coefs_enet, colors):
        l2 = plt.plot(neg_log_alphas_enet, coef_e, linestyle='--', c=c)

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    title = 'ElasticNet Path by alpha for l1_ratio = ' + str(l1_ratio)
    plt.title(title)
    plt.axis('tight')
y = diabetes.target

X /= X.std(axis=0)  # Standardize data (easier to set the l1_ratio parameter)

# Compute paths

eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept=False)

print("Computing regularization path using the positive lasso...")
alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(
    X, y, eps, positive=True, fit_intercept=False)
print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(
    X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)

print("Computing regularization path using the positve elastic net...")
alphas_positive_enet, coefs_positive_enet, _ = enet_path(
    X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)

# Display results

plt.figure(1)
ax = plt.gca()
ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
l1 = plt.plot(-np.log10(alphas_lasso), coefs_lasso.T)
l2 = plt.plot(-np.log10(alphas_enet), coefs_enet.T, linestyle='--')

plt.xlabel('-Log(alpha)')
plt.ylabel('coefficients')
Example #39
0
File: cv.py Project: nazerat/PySAT
def path_calc(X,
              y,
              X_holdout,
              y_holdout,
              alphas,
              paramgrid,
              colname='CV',
              yname='',
              method='Elastic Net'):
    #make a copy of the parameters before popping things off
    copy_params = copy.deepcopy(paramgrid)
    fit_intercept = copy_params.pop('fit_intercept')
    precompute = copy_params.pop('precompute')
    copy_X = copy_params.pop('copy_X')
    normalize = False

    # this code adapted from sklearn ElasticNet fit function, which unfortunately doesn't accept multiple alphas at once
    X, y = check_X_y(X,
                     y,
                     accept_sparse='csc',
                     order='F',
                     dtype=[np.float64, np.float32],
                     copy=copy_X and fit_intercept,
                     multi_output=True,
                     y_numeric=True)
    y = check_array(y,
                    order='F',
                    copy=False,
                    dtype=X.dtype.type,
                    ensure_2d=False)

    #this is the step that gives the data to find intercept if fit_intercept is true.
    X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(X,
                                                                 y,
                                                                 None,
                                                                 precompute,
                                                                 normalize,
                                                                 fit_intercept,
                                                                 copy=False)
    y = np.squeeze(y)

    #do the path calculation, and tell how long it took
    print('Calculating path...')
    start_t = time.time()
    if method == 'Elastic Net':
        path_alphas, path_coefs, path_gaps, path_iters = enet_path(
            X, y, alphas=alphas, return_n_iter=True, **copy_params)
    if method == 'LASSO':
        path_alphas, path_coefs, path_gaps, path_iters = lasso_path(
            X, y, alphas=alphas, return_n_iter=True, **copy_params)
    dt = time.time() - start_t
    print('Took ' + str(dt) + ' seconds')

    #create some empty arrays to store the result
    y_pred_holdouts = np.empty(shape=(len(alphas), len(y_holdout)))
    intercepts = np.empty(shape=(len(alphas)))
    rmses = np.empty(shape=(len(alphas)))
    cvcols = []
    for j in list(range(len(path_alphas))):

        coef_temp = path_coefs[:, j]

        if fit_intercept:
            coef_temp = coef_temp / X_scale
            intercept = y_offset - np.dot(X_offset, coef_temp.T)
        else:
            intercept = 0.

        y_pred_holdouts[j, :] = np.dot(X_holdout, path_coefs[:, j]) + intercept
        intercepts[j] = intercept
        rmses[j] = RMSE(y_pred_holdouts[j, :], y_holdout)
        cvcols.append(
            ('predict', '"' + method + ' - ' + yname + ' - ' + colname +
             ' - Alpha:' + str(path_alphas[j]) + ' - ' + str(paramgrid) + '"'))

    return path_alphas, path_coefs, intercepts, path_iters, y_pred_holdouts, rmses, cvcols
Example #40
0
def compute_paths(X, y, eps, path):
    import os
    from sklearn.linear_model import lasso_path, enet_path
    from sklearn.linear_model import lasso_path, enet_path
    import matplotlib.pyplot as plt
    import numpy as np
    from itertools import cycle

    print("Computing regularization path using the lasso...")
    alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept=False)
    print("Computing regularization path using the positive lasso...")
    alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(
        X, y, eps, positive=True, fit_intercept=False)
    print("Computing regularization path using the elastic net...")
    alphas_enet, coefs_enet, _ = enet_path(X,
                                           y,
                                           eps=eps,
                                           l1_ratio=0.8,
                                           fit_intercept=False)
    print("Computing regularization path using the positive elastic net...")
    alphas_positive_enet, coefs_positive_enet, _ = enet_path(
        X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)

    # Lasso and Elastic-Net Paths
    fig1 = plt.figure(1)
    ax = plt.gca()

    colors = cycle(['b', 'r', 'g', 'c', 'k'])
    neg_log_alphas_lasso = -np.log10(alphas_lasso)
    neg_log_alphas_enet = -np.log10(alphas_enet)
    for coef_l, coef_e, c in zip(coefs_lasso, coefs_enet, colors):
        l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
        l2 = plt.plot(neg_log_alphas_enet, coef_e, linestyle='--', c=c)

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso and Elastic-Net Paths')
    plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
    plt.axis('tight')

    # Lasso and Positive Lasso
    fig2 = plt.figure(2)
    ax = plt.gca()
    neg_log_alphas_positive_lasso = -np.log10(alphas_positive_lasso)
    for coef_l, coef_pl, c in zip(coefs_lasso, coefs_positive_lasso, colors):
        l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
        l2 = plt.plot(neg_log_alphas_positive_lasso,
                      coef_pl,
                      linestyle='--',
                      c=c)

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso and positive Lasso')
    plt.legend((l1[-1], l2[-1]), ('Lasso', 'positive Lasso'), loc='lower left')
    plt.axis('tight')

    # Elastic Net and Positive Elastic Net
    fig3 = plt.figure(3)
    ax = plt.gca()
    neg_log_alphas_positive_enet = -np.log10(alphas_positive_enet)
    for (coef_e, coef_pe, c) in zip(coefs_enet, coefs_positive_enet, colors):
        l1 = plt.plot(neg_log_alphas_enet, coef_e, c=c)
        l2 = plt.plot(neg_log_alphas_positive_enet,
                      coef_pe,
                      linestyle='--',
                      c=c)

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Elastic-Net and positive Elastic-Net')
    plt.legend((l1[-1], l2[-1]), ('Elastic-Net', 'positive Elastic-Net'),
               loc='lower left')
    plt.axis('tight')

    # Save figures
    if os.path.isfile(path + "plot1.png"):
        os.system("rm " + path + "plot1.png")
    if os.path.isfile(path + "plot2.png"):
        os.system("rm " + path + "plot2.png")
    if os.path.isfile(path + "plot3.png"):
        os.system("rm " + path + "plot3.png")
    fig1.savefig(path + "plot1.png")
    fig2.savefig(path + "plot2.png")
    fig3.savefig(path + "plot3.png")
# Compute paths

eps = 5e-3  # the smaller it is the longer is the path

print "Computing regularization path using the lasso..."
models = lasso_path(X, y, eps=eps)
alphas_lasso = np.array([model.alpha for model in models])
coefs_lasso = np.array([model.coef_ for model in models])

print "Computing regularization path using the positive lasso..."
models = lasso_path(X, y, eps=eps, positive=True)
alphas_positive_lasso = np.array([model.alpha for model in models])
coefs_positive_lasso = np.array([model.coef_ for model in models])

print "Computing regularization path using the elastic net..."
models = enet_path(X, y, eps=eps, rho=0.8)
alphas_enet = np.array([model.alpha for model in models])
coefs_enet = np.array([model.coef_ for model in models])

print "Computing regularization path using the positve elastic net..."
models = enet_path(X, y, eps=eps, rho=0.8, positive=True)
alphas_positive_enet = np.array([model.alpha for model in models])
coefs_positive_enet = np.array([model.coef_ for model in models])

###############################################################################
# Display results

pl.figure(1)
ax = pl.gca()
ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
l1 = pl.plot(coefs_lasso)
Example #42
0
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)

    # Log mlflow attributes for mlflow UI
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)
    mlflow.sklearn.log_model(lr, "model")

    # Compute paths
    eps = 5e-3  # the smaller it is the longer is the path

    print("Computing regularization path using the elastic net.")
    alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=l1_ratio)

    # Display results
    fig = plt.figure(1)
    ax = plt.gca()

    colors = cycle(["b", "r", "g", "c", "k"])
    neg_log_alphas_enet = -np.log10(alphas_enet)
    for coef_e, c in zip(coefs_enet, colors):
        l2 = plt.plot(neg_log_alphas_enet, coef_e, linestyle="--", c=c)

    plt.xlabel("-Log(alpha)")
    plt.ylabel("coefficients")
    title = "ElasticNet Path by alpha for l1_ratio = " + str(l1_ratio)
    plt.title(title)
    plt.axis("tight")
Example #43
0
def train(data_path, alpha, l1_ratio, run_origin="none"):
    print("run_origin:", run_origin)
    np.random.seed(40)

    # Read the wine-quality csv file
    print("data_path:", data_path)
    data = pd.read_csv(data_path)

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    mlflow.set_experiment(experiment_name)
    client = mlflow.tracking.MlflowClient()
    experiment_id = client.get_experiment_by_name(
        experiment_name).experiment_id
    print("experiment_id:", experiment_id)

    current_file = os.path.basename(__file__)
    with mlflow.start_run(source_name=current_file) as run:
        run_id = run.info.run_uuid
        print("run_id:", run_id)
        clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        clf.fit(train_x, train_y)

        predicted_qualities = clf.predict(test_x)
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha={}, l1_ratio={}):".format(
            alpha, l1_ratio))
        print("  RMSE:", rmse)
        print("  MAE:", mae)
        print("  R2:", r2)

        mlflow.log_param("data_path", data_path)
        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_param("run_origin", run_origin)

        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.set_tag("platform", platform.system())
        mlflow.set_tag("run_origin", run_origin)

        mlflow.sklearn.log_model(clf, "model")

        eps = 5e-3  # the smaller it is the longer is the path
        X = data.drop(["quality"], axis=1).values
        y = data[["quality"]].values.ravel()

        alphas_enet, coefs_enet, _ = enet_path(X,
                                               y,
                                               eps=eps,
                                               l1_ratio=l1_ratio,
                                               fit_intercept=False)
        plot_file = "wine_ElasticNet-paths_{}_{}.png".format(alpha, l1_ratio)
        plot_utils.plot_enet_descent_path(X, y, l1_ratio, alphas_enet,
                                          coefs_enet, plot_file)
        mlflow.log_artifact(plot_file)

    return (experiment_id, run_id)
Example #44
0
    def compute_save_multi_task_elastic_net_path(self,
                                                 phi_tilde,
                                                 X,
                                                 max_iter=1e5,
                                                 tol=1e-12,
                                                 l1_ratio=0.99):
        """ computing multi task elastic net and save the coefficient of the path """

        num_alpha = len(self.alpha_range)

        ## 1. normalize the features by making modal amplititute to 1 for all features
        if self.normalize_phi_tilde:

            print("EDMD/KDMD dictionary features are normalized....")
            scaling_transform = np.diag(1. / np.abs(phi_tilde[0, :]))
            inverse_scaling = np.linalg.inv(scaling_transform)
            assert np.linalg.norm(scaling_transform @ inverse_scaling -
                                  np.eye(scaling_transform.shape[0])) < 1e-6
            phi_tilde_scaled = phi_tilde @ scaling_transform
            print('norm = ', np.linalg.norm(phi_tilde_scaled, axis=0))
            print(phi_tilde_scaled.shape)
            print(phi_tilde_scaled[0, :])

        else:

            phi_tilde_scaled = phi_tilde

        ## 2. augmenting the complex AX=B problem into a AX=B problem with real entries
        ##    since current package only support real number array

        # -- note: must run in sequential....to have good convergence property as shown in my github page.

        a = np.hstack([np.real(phi_tilde_scaled), -np.imag(phi_tilde_scaled)])
        b = np.hstack([np.imag(phi_tilde_scaled), np.real(phi_tilde_scaled)])
        phi_tilde_aug = np.vstack([a, b])
        X_aug = np.vstack([X, np.zeros(X.shape)])
        num_data = X.shape[0]
        num_target = X.shape[1]
        alphas_enet, coefs_enet_aug, _ = enet_path(phi_tilde_aug,
                                                   X_aug,
                                                   max_iter=max_iter,
                                                   tol=tol,
                                                   alphas=self.alpha_range,
                                                   l1_ratio=l1_ratio,
                                                   fit_intercept=False,
                                                   check_input=True,
                                                   verbose=0)
        num_total_eigen_func = int(coefs_enet_aug.shape[1] / 2)

        # get the real and image part from the complex solution
        coefs_enet_real = coefs_enet_aug[:, :num_total_eigen_func, :]
        coefs_enet_imag = coefs_enet_aug[:, num_total_eigen_func:, :]
        assert coefs_enet_imag.shape == coefs_enet_real.shape

        # combine them into complex arrary for final results!
        coefs_enet_comp = coefs_enet_real + 1j * coefs_enet_imag

        ## 2.5 remove feature that is smaller than 1e-3 of the max. because most often,
        for i_alpha in range(coefs_enet_comp.shape[2]):
            for i_target in range(coefs_enet_comp.shape[0]):
                coef_cutoff_value = self.truncation_threshold * np.max(
                    abs(coefs_enet_comp[i_target, :, i_alpha]))
                index_remove = abs(
                    coefs_enet_comp[i_target, :, i_alpha]) < coef_cutoff_value
                coefs_enet_comp[i_target, index_remove, i_alpha] = 0 + 0j

        ## 2.7 given features selected, do LS-refit to remove the bias of any kind of regularization
        for i_alpha in range(coefs_enet_comp.shape[2]):
            bool_non_zero = np.linalg.norm(coefs_enet_comp[:, :, i_alpha],
                                           axis=0) > 0
            phi_tilde_scaled_reduced = phi_tilde_scaled[:, bool_non_zero]
            coef_enet_comp_reduced_i_alpha = np.linalg.lstsq(
                phi_tilde_scaled_reduced, X)[0]
            coefs_enet_comp[:, bool_non_zero,
                            i_alpha] = coef_enet_comp_reduced_i_alpha.T
            coefs_enet_comp[:, np.invert(bool_non_zero), i_alpha] = 0

        ## 3. compute residual for parameter sweep. so I can draw the trade off plot between num. non-zero vs rec. resdiual

        # convert complex array into mag.
        coefs_enet_mag = np.abs(coefs_enet_comp)

        def compute_residual_list(i):
            residual = np.linalg.norm(
                X -
                np.matmul(phi_tilde_scaled,
                          coefs_enet_comp[:, :, i].T)[:num_data])  # computed
            # the augmented but only compare first half rows.
            residual = residual / np.linalg.norm(
                X
            )  # normalize the residual in the same fashion... L2 norm of X
            return residual

        residual_array = np.array(
            joblib.Parallel(n_jobs=N_CPU)(
                joblib.delayed(compute_residual_list)(i)
                for i in range(num_alpha)))

        # finally, compute the complex koopman modes on those kept eigenfunctions
        if self.normalize_phi_tilde:
            phi_tilde_scaled = phi_tilde_scaled @ inverse_scaling

        # save data for trade-off plot
        np.savez(self.save_dir + 'MultiTaskElasticNet_result.npz',
                 alphas_enet=alphas_enet,
                 coefs_enet=coefs_enet_mag,
                 coefs_enet_comp=coefs_enet_comp,
                 residual_array=residual_array)

        ## 4. sweep for each alpha

        # 1. for each alpha, we get the non-zero index of selected eigentj
        print("evaluation module... start alpha sweeping...")
        mkdir(self.save_dir + 'sweep')

        sweep_index_list = []
        for ii, alpha in enumerate(alphas_enet):

            print("current alpha = ", alpha, " index = ", ii)
            alpha_dir_eval = self.save_dir + 'sweep/sweep_alpha_' + str(alpha)
            mkdir(alpha_dir_eval)

            # compute selected index
            non_zero_index_bool_array = np.linalg.norm(
                coefs_enet_comp[:, :, ii], axis=0) > 0
            sweep_index_list.append(non_zero_index_bool_array)

        return sweep_index_list
# Compute paths

eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
models = lasso_path(X, y, eps=eps)
alphas_lasso = np.array([model.alpha for model in models])
coefs_lasso = np.array([model.coef_ for model in models])

print("Computing regularization path using the positive lasso...")
models = lasso_path(X, y, eps=eps, positive=True)
alphas_positive_lasso = np.array([model.alpha for model in models])
coefs_positive_lasso = np.array([model.coef_ for model in models])

print("Computing regularization path using the elastic net...")
models = enet_path(X, y, eps=eps, l1_ratio=0.8)
alphas_enet = np.array([model.alpha for model in models])
coefs_enet = np.array([model.coef_ for model in models])

print("Computing regularization path using the positve elastic net...")
models = enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True)
alphas_positive_enet = np.array([model.alpha for model in models])
coefs_positive_enet = np.array([model.coef_ for model in models])

###############################################################################
# Display results

pl.figure(1)
ax = pl.gca()
ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
l1 = pl.plot(-np.log10(alphas_lasso), coefs_lasso)
eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
alphas_lasso, coefs_lasso, _ = lasso_path(X_train,
                                          y_train,
                                          eps,
                                          fit_intercept=False)

print("Computing regularization path using the positive lasso...")
alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(
    X_train, y_train, eps, positive=True, fit_intercept=False)
print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(X_train,
                                       y_train,
                                       eps=eps,
                                       l1_ratio=0.8,
                                       fit_intercept=False)

print("Computing regularization path using the positive elastic net...")
alphas_positive_enet, coefs_positive_enet, _ = enet_path(X_train,
                                                         y_train,
                                                         eps=eps,
                                                         l1_ratio=0.8,
                                                         positive=True,
                                                         fit_intercept=False)

plt.figure(1)
colors = cycle(['b', 'r', 'g', 'c', 'k'])
neg_log_alphas_lasso = -np.log10(alphas_lasso)
neg_log_alphas_enet = -np.log10(alphas_enet)
coefs = []
for a in alphas:
    clf.set_params(alpha=a, max_iter=1000)
    clf.fit(xtrain, ytrain)
    coefs.append(clf.coef_)

# lasso and elastic net path
eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
alphas_lasso, coefs_lasso, _ = lasso_path(xtrain, ytrain, eps, fit_intercept=False)


print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(
    xtrain, ytrain, eps=eps, l1_ratio=0.8, fit_intercept=False)

# Display results

plt.figure(1)
ax = plt.gca()
ax.set_color_cycle(['b', 'r', 'g', 'c', 'k', 'y', 'm'])

ax.plot(alphas, coefs)
ax.set_xscale('log')
ax.set_xlim(ax.get_xlim()[::-1])  # reverse axis
plt.xlabel('alpha')
plt.ylabel('weights')
plt.title('Ridge coefficients as a function of the regularization')
plt.axis('tight')
plt.show()