def lassoRegression(X,y):

    print("\n### ~~~~~~~~~~~~~~~~~~~~ ###")
    print("Lasso Regression")

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    myDegree = 40
    polynomialFeatures = PolynomialFeatures(degree=myDegree, include_bias=False)
    Xp = polynomialFeatures.fit_transform(X)

    myScaler = StandardScaler()
    scaled_Xp = myScaler.fit_transform(Xp)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    lassoRegression = Lasso(alpha=1e-7)
    lassoRegression.fit(scaled_Xp,y)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    dummyX = np.arange(0,2,0.01)
    dummyX = dummyX.reshape((dummyX.shape[0],1))
    dummyXp = polynomialFeatures.fit_transform(dummyX)
    scaled_dummyXp = myScaler.transform(dummyXp)
    dummyY = lassoRegression.predict(scaled_dummyXp)

    outputFILE = 'plot-lassoRegression.png'
    fig, ax = plt.subplots()
    fig.set_size_inches(h = 6.0, w = 10.0)
    ax.axis([0,2,0,15])
    ax.scatter(X,y,color="black",s=10.0)
    ax.plot(dummyX, dummyY, color='red', linewidth=1.5)
    plt.savefig(filename = outputFILE, bbox_inches='tight', pad_inches=0.2, dpi = 600)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    return( None )
def pred_pH(train, val, test, all_vars, loop):
    data = (val, test, train)
    # variable selection
    pH_lassoed_vars = lass_varselect(train, all_vars, 'pH', .00000001)
    univ_selector = SelectKBest(score_func = f_regression, k = 1200)
    univ_selector.fit(train[all_vars], train['pH'])
    pvals = univ_selector.get_support()
    chosen =  []
    for x in range(0, len(all_vars)):
        if pH_lassoed_vars[x] | pvals[x]:
            chosen.append(all_vars[x])
    lass_only =  []
    for x in range(0, len(all_vars)):
        if pH_lassoed_vars[x]:
            lass_only.append(all_vars[x])
    # nearest randomforest
    neigh = RandomForestRegressor(n_estimators=100)
    neigh.fit(train.ix[:, chosen], train['pH'])
    for dset in data:
        dset['pH_for_prds'] = neigh.predict(dset.ix[:, chosen])  
    # lasso
    lass = Lasso(alpha=.000000275, positive=True)
    lass.fit(train[all_vars], train['pH'])
    for dset in data:
        dset['pH_las_prds'] = lass.predict(dset[all_vars])
    # ridge
    pH_ridge = RidgeCV(np.array([.6]), normalize=True)
    pH_ridge.fit(train[all_vars], train['pH'])
    for dset in data:
        dset['pH_rdg_prds'] = pH_ridge.predict(dset[all_vars])
    # combination
    models= [ 'pH_rdg_prds', 'pH_las_prds', 
              'pH_for_prds', 'pH_for_prds' ] 
    name = 'pH_prds' + str(object=loop)
    write_preds(models, name, train, val, test, 'pH')
Esempio n. 3
0
    def fit(self, sklearn_alpha=None, **lasso_args):
        """
        Fit the lasso using `Lasso` from `sklearn`.
        This sets the attribute `soln` and
        forms the constraints necessary for post-selection inference
        by calling `form_constraints()`.

        Parameters
        ----------

        sklearn_alpha : float
            Lagrange parameter, in the normalization set by `sklearn`.

        lasso_args : keyword args
             Passed to `sklearn.linear_model.Lasso`_

        Returns
        -------

        soln : np.float
             Solution to lasso with `sklearn_alpha=self.lagrange`.
             
        
        """

        # fit Lasso using scikit-learn
        
        clf = Lasso(alpha = self.lagrange, fit_intercept = False)
        clf.fit(self.X, self.y, **lasso_args)
        self._soln = beta = clf.coef_       
        if not np.all(beta == 0):
            self.form_constraints()
        else:
            self.active = []
        return self._soln
Esempio n. 4
0
def lasso_regression(features, solutions, verbose=0):
    columns = solutions.columns

    clf = Lasso(alpha=1e-4, max_iter=5000)

    print('Training Model... ')
    clf.fit(features, solutions)
    
    feature_coeff = clf.coef_
    features_importances = np.zeros((169, 3))
    for idx in range(3):
        features_importance = np.reshape(feature_coeff[idx, :], (169, 8))
        features_importance = np.max(features_importance, axis=1)
        features_importances[:, idx] = features_importance
        
    features_importance_max = np.max(features_importances, axis=1)
    features_importance_max = np.reshape(features_importance_max, (13, 13))
    plt.pcolor(features_importance_max)
    plt.title("Feature importance for HoG")
    plt.colorbar()
    plt.xticks(arange(0.5,13.5), range(1, 14))
    plt.yticks(arange(0.5,13.5), range(1, 14))
    plt.axis([0, 13, 0, 13])
    plt.show()
    
    print('Done Training')
    return (clf, columns)
Esempio n. 5
0
    def RunLASSOScikit(q):
      totalTimer = Timer()

      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
      responsesData = np.genfromtxt(self.dataset[1], delimiter=',')

      # Get all the parameters.
      lambda1 = re.search("-l (\d+)", options)
      lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))
          
      try:
        with totalTimer:
          # Perform LASSO.
          model = Lasso()
          model.fit(inputData, responsesData)
          out = model.coef_
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
Esempio n. 6
0
 def lasso(self,training,target,feature_index_list):
     clf=Lasso(self.alpha,fit_intercept=False)
     clf.fit(training,target)
     coef=np.zeros(self.n_features)
     for index,feature_index in enumerate(feature_index_list):
         coef[feature_index]=clf.coef_[index]
     return coef
def reg_skl_lasso(param, data):
    [X_tr, X_cv, y_class_tr, y_class_cv, y_reg_tr, y_reg_cv] = data
    lasso = Lasso(alpha=param["alpha"], normalize=True)
    lasso.fit(X_tr, y_reg_tr)
    pred = lasso.predict(X_cv)
    RMSEScore = getscoreRMSE(y_reg_cv, pred)
    return RMSEScore, pred
Esempio n. 8
0
class SparseSelector(BaseEstimator):
    """
    Sparse L1 based feature selection.  Parameters are passed onto
    sklearn.linear_model.Lasso, which actually does the work.
    """
    def __init__(self, alpha=1.0, fit_intercept=True, 
                 normalize=False):
        self.alpha = alpha
        self.fit_intercept = fit_intercept
        self.normalize = normalize
        self.lasso = None

    def fit(self, X, y):
        self.lasso = Lasso(alpha=self.alpha, 
                           fit_intercept=self.fit_intercept,
                           normalize=self.normalize)
        self.lasso.fit(X, y)
        return self
        
    def transform(self, X):
        cols = np.nonzero(self.lasso.sparse_coef_)[1]
        if sp.sparse.issparse(X):
            return X.tocsc()[:, cols]
        else:
            return X[:, cols]

    def fit_transform(self, X, y):
        self.fit(X, y)
        return self.transform(X)
Esempio n. 9
0
def traverse_movies_lasso():
	LBMAP = getLBMap()
	DMAP = createEmpty()

	P_ERRORS, ERRORS = [], []

	training_data, training_response = [], []

	for i in range(len(data)):

		movie = data[i]
		m_rev = movie['revenue']

		myvector = vectorizeMovie(movie, LBMAP, DMAP)

		if i > 3695:
			model = Lasso(alpha = .05)
			model.fit(training_data, training_response)
			raw = math.fabs(model.predict(myvector) - m_rev)
			ERRORS.append(raw)
			#P_ERRORS.append(round(raw/m_rev, 4))
		
		training_data.append(myvector)
		training_response.append(m_rev)

		DMAP = update(movie, DMAP)

	#print 'all', avg_float_list(P_ERRORS)
	print 'all', avg_float_list(ERRORS)
Esempio n. 10
0
def precision_recall_samples(X, y):
    pr_lasso = precision_recall(support.T[-1], lasso_coefs(X, y))
    stability = stability_selection(X, y, pi=None)
    estimated = []
    for st in np.unique(stability):
        estimated.append(stability > st - 1.e-12)
    pr_ss = precision_recall(support.T[-1], estimated)

    n_samples, n_features = X.shape
    alpha_max = np.max(np.dot(y, X)) / n_samples
    alpha = .1 * alpha_max
    clf = Lasso(alpha=alpha)
    abs_coef = np.abs(clf.fit(X, y).coef_)
    estimated = []
    for th in np.unique(abs_coef):
        estimated.append(abs_coef > th - 1.e-12)

    pr_pt = precision_recall(support.T[-1], estimated)
    clf = BootstrapLasso(alpha=alpha, n_bootstraps=n_bootstraps)
    abs_coef = np.abs(clf.fit(X, y).coef_)
    estimated = []
    for th in np.unique(abs_coef):
        estimated.append(abs_coef > th - 1.e-12)

    pr_bpt = precision_recall(support.T[-1], estimated)
    return pr_lasso, pr_ss, pr_pt, pr_bpt
Esempio n. 11
0
def lassoreg(a):
    print ("Doing lasso regression")
    clf2 = Lasso(alpha=a)
    clf2.fit(base_X, base_Y)
    print ("Score = %f" % clf2.score(base_X, base_Y))
    clf2_pred = clf2.predict(X_test)
    write_to_file("lasso.csv", clf2_pred)
    def train(self, x, y, param_names, random_search=100, **kwargs):
        start = time.time()
        scaled_x = self._set_and_preprocess(x=x, param_names=param_names)

        # Check that each input is between 0 and 1
        self._check_scaling(scaled_x=scaled_x)

        if self._debug:
            print "Shape of training data: ", scaled_x.shape
            print "Param names: ", self._used_param_names
            print "First training sample\n", scaled_x[0]
            print "Encode: ", self._encode

        # Do a random search
        alpha = self._random_search(random_iter=100, x=scaled_x, y=y)

        # Now train model
        lasso = Lasso(alpha=alpha,
                      fit_intercept=True,
                      normalize=False,
                      precompute='auto',
                      copy_X=True,
                      max_iter=1000,
                      tol=0.0001,
                      warm_start=False,
                      positive=False)

        lasso.fit(scaled_x, y)
        self._model = lasso

        duration = time.time() - start
        self._training_finished = True
        return duration
def trainModel(x, y, degree=1):
    """Self designed Explicit method to train the model using linear regression."""
    #poly = PolynomialFeatures(degree)
    #z = poly.fit_transform(x)
    #return np.dot(np.linalg.pinv(z), y)
    clf = Lasso(alpha=.5)
    clf.fit(x, y)
    return clf
Esempio n. 14
0
    def classify(self):
        """Perform classification"""
        clf = Lasso(max_iter=10000000)
        #parameters = {'alpha':[0.001,0.005,0.01,0.05,0.1,0.5,1,5.0,10.0]}
        #clf = GridSearchCV(lasso, parameters,scoring='roc_auc')

        clf.fit(self._ClassifyDriver__traindata, self._ClassifyDriver__trainlabels)
        self._ClassifyDriver__y = clf.predict(self._ClassifyDriver__testdata)
def lasso(data, targets):
    """
    Returns a Lasso linear model for predictions with alpha 0.1
    Takes the data and the associated targets as arguments.
    """
    model = Lasso(alpha=0.1)
    model.fit(data, targets)
    return model
def varselect_w_lass(all_vars_list, selected_vars, alpha_val):
    lass = Lasso(alpha=alpha_val,
                 positive=True, max_iter=100000 , tol=.0001)
    lass.fit(np.array(fire_train_TRAIN_smp[all_vars_list]),
             np.array(fire_train_TRAIN_smp.target ))
    for x in range(1, len(all_vars_list)):
        if lass.coef_[x]> .00000001:
            selected_vars.append(all_vars_list[x])
def weight_analysis(verbose=0, stack_option='s'):
  logging.info('starting ensemble weight analysis')

  stack = STACK if stack_option == 's' else MODELS

  pool = multiprocessing.Pool(processes=4)
  drivers = settings.DRIVER_IDS#[:1000]
  CUTOFF = -1
  results = pool.map(
      compute_weights,
      map(lambda x: (x, verbose, stack_option), drivers)
  )

  predictions = {}
  for i, get_data, model, _ in stack:
    predictions[i] = np.array(list(itertools.chain(*[r[1][i] for r in results])))
  testY = list(itertools.chain(*[r[2] for r in results]))

  model_names = [
      ('%s.%s.%s' % (get_data.func_name, model.__name__, i), i)
      for i, get_data, model, repeat in stack
  ]
  model_names.sort(key=lambda x: x[0])
  keys = [x[1] for x in model_names]
  model_names = [x[0] for x in model_names]

  lasso = Lasso(alpha=0.0, positive=True)
  trainX = []
  for row_id in xrange(len(testY)):
    train_row = [predictions[i][row_id] for i in keys]
    trainX.append(train_row)

  a, b = trainX[:CUTOFF], trainX[CUTOFF:]
  c, d = testY[:CUTOFF], testY[CUTOFF:]
  lasso.fit(a, c)
  pred = lasso.predict(b)
  pred_train = lasso.predict(a)
  #logging.info('auc: %s' % util.compute_auc(d, pred))

  logging.info('coefficients:')
  weights = {}
  for i, name in enumerate(model_names):
    logging.info('%s: %.3f' % (model_names[i], lasso.coef_[i]))
    weights[keys[i]] = lasso.coef_[i]

  logging.info('individual scores:')
  for i, key in enumerate(keys):
    logging.info('%s: %.3f' % (
        model_names[i],
        util.compute_auc(testY, predictions[key])
    ))

  logging.info('weights dictionary: %s' % weights)

  # and again in the end, so you don't have to scroll
  logging.info('------------')
  #logging.info('auc: %s' % util.compute_auc(d, pred))
  logging.info('auc train: %s' % util.compute_auc(c, pred_train))
Esempio n. 18
0
def comparaison_ridge_lasso(X,Y):
    X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=random.seed())
    clf_lasso = Lasso(selection='random', random_state=random.seed())
    clf_ridge = Ridge()
    clf_lasso.fit(X_train,Y_train)
    clf_ridge.fit(X_train,Y_train)
    score_lasso=clf_lasso.score(X_test,Y_test)
    score_ridge=clf_ridge.score(X_test,Y_test)
    print("Precision de Lasso={:3.2f}% \nPrecision de Ridge={:3.2f}%\n".format(score_lasso*100,score_ridge*100))
def trainModel_phase2(x, y, degree=1):
    """Self designed Explicit method to train the model using linear regression."""
    #poly = PolynomialFeatures(degree)
    #z = poly.fit_transform(x)
    #return np.dot(np.linalg.pinv(z), y)
    #clf = BernoulliRBM()
    #clf = LinearRegression()
    clf = Lasso(alpha=.5)
    clf.fit(x.reshape(-1, 1), y)
    return clf
Esempio n. 20
0
class Linear():
    def __init__(self, type='Ridge', alpha=3, C=1.0, nu=0.2, limit=None, \
            epsilon=0.1):
        self.limit = limit
        if type == 'Ridge':
            self.model = Ridge(alpha=alpha)
        elif type == 'SVR':
            self.model = SVR(kernel='linear', C=C, epsilon=epsilon)
        elif type == 'NuSVR':
            self.model = NuSVR(C=C, nu=nu, kernel='linear')
        elif type == 'Lasso':
            self.model = Lasso(alpha=alpha)
        
    @staticmethod
    def get_cal(m):
        # get calitative features
        # watch out as indices depend on feature vector!
        return np.hstack((m[:,:23], m[:,24:37], m[:,38:52])) + 1
    
    @staticmethod
    def get_cant(m):
        # get cantitative features
        # watch out as indices depend on feature vector!
        return np.hstack((m[:,23:24], m[:,37:38], m[:,52:]))
        
    def fit(self, train_X, train_Y):
        # no fitting done here, just saving data
        if self.limit:
            if len(train_X) > self.limit:
                train_X = train_X[-self.limit:]
                train_Y = train_Y[-self.limit:]
        self.train_X = np.array(train_X)
        self.train_Y = np.array(train_Y)
        
        
    def predict(self, test_X):
        # fitting done here
        # not efficient on the long term
        test_X = np.array(test_X)
        enc = OneHotEncoder()
        scal = MinMaxScaler()
        data = np.vstack((self.train_X, test_X))
        enc.fit(self.get_cal(data))
        scal.fit(self.get_cant(data))
        
        new_train_X1 = enc.transform(self.get_cal(self.train_X))
        new_train_X2 = scal.transform(self.get_cant(self.train_X))
        new_train_X = scipy.sparse.hstack((new_train_X1, new_train_X2))
        new_test_X1 = enc.transform(self.get_cal(test_X))
        new_test_X2 = scal.transform(self.get_cant(test_X))
        new_test_X = scipy.sparse.hstack((new_test_X1, new_test_X2))
        
        self.model.fit(new_train_X, self.train_Y)
        R = self.model.predict(new_test_X)
        return R
Esempio n. 21
0
def test_lasso_regression():
	datafile_viper = '../data_viper/viper.pkl'
	viper = loadfile(datafile_viper)

	from sklearn.linear_model import Lasso

	model = Lasso(alpha=1e-3)
	model.fit(viper.train_feat, viper.train_y)

	y_pred = model.predict(viper.test_feat)
	print 'testing error {}'.format(abs_error(y_pred, viper.test_y)) 
def main(folds = 5):
    print "folds: ", folds
    #read in  data, parse into training and target sets
    print "\n ------------------Load file --------------- \n"
    train = np.loadtxt(sys.argv[1]).T
    min_max_scaler = preprocessing.MinMaxScaler()
    train = min_max_scaler.fit_transform(train)
	#test data set
    xtest = train[100:112, :]
    train = train[0:100, :]
    print "Size of read data: ", train.shape
    #train = imputation_missingValue(train)
    print "After Standardization:"
    print train
  
    target = np.loadtxt(sys.argv[2]).T
    ytest = target[100:112, :]
    target = target[0:100,:]
    print "Size of read data: ", target.shape

    al = 0.3
    rf = Lasso(alpha=al)
	
    #Simple K-Fold cross validation.
    cv = cross_validation.KFold(len(train), folds)
    #iterate through the training and test cross validation segments and
    #run the classifier on each one, aggregating the results into a list
    results = []
    i = 0
    min_MSE = sys.maxint
    best_train = -1
    best_test = -1
    for traincv, testcv in cv:
        start = timeit.default_timer()
        i += 1
        print i, "epoch"
        rf.fit(train[traincv], target[traincv])
        prediction = rf.predict(train[testcv])
        MSE = mean_squared_error(target[testcv], prediction)
        print "MSE: ", MSE, " for ",i
        if min_MSE > MSE:
            best_train = traincv
            best_test = testcv
            min_MSE = MSE
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
        results.append(MSE)
        stop = timeit.default_timer()
	print "Program running time: ", stop - start 
    #print out the mean of the cross-validated results
    print "Results: " + str( np.array(results).mean() ), "for folds: ", folds
    print "Results for independent data: ", mean_squared_error(rf.fit(train[best_train], target[best_train]).predict(xtest), ytest)
    print "R squared:"
    print "alpha:", al
def fit_predict_model(l1_penalty):
    RSS = np.zeros((len(l1_penalty)))
    num_nonzero_coeff = np.zeros((len(l1_penalty)))
    idx = 0
    for l1_penalty_choice in l1_penalty:
        model = Lasso(alpha=l1_penalty_choice, normalize=True)
        model.fit(training[all_features], training['price'])
        predicted_price = model.predict(validation[all_features])
        RSS[idx] = np.sum((predicted_price - validation['price'])**2)
        num_nonzero_coeff[idx] = np.count_nonzero(model.coef_) + np.count_nonzero(model.intercept_)
        idx += 1
    return (RSS, num_nonzero_coeff, model)
def lasso_regression(data, predictors, alpha):
    #Fit the model
    lassoreg = Lasso(alpha=alpha,normalize=True, max_iter=1e5)
    lassoreg.fit(data[predictors],data['TransformedLife'])
    y_pred = lassoreg.predict(data[predictors])
    
    #Return the result in pre-defined format
    rss = sum((y_pred-data['TransformedLife'])**2)
    ret = [rss]
    ret.extend([lassoreg.intercept_])
    ret.extend(lassoreg.coef_)
    return ret
Esempio n. 25
0
def lasso_regression(alpha):
    #Fit the model
    lassoreg = Lasso(alpha=alpha,normalize=True, max_iter=1e5)
    lassoreg.fit(A_x, A_y)
    y_pred = lassoreg.predict(A_x)
    
   #Return the result in pre-defined format
    rss = sum((y_pred-A_y)**2)
    ret = [rss]
    ret.extend([lassoreg.intercept_])
    ret.extend(lassoreg.coef_)
    return ret
def basispursuit(y, F, penalty=0.1):
    """
    solves basic (vanilla) basis pursuit using scikit-learn
    """

    clf = Lasso(alpha=penalty, fit_intercept=False)
    clf.fit(F, y)
    xhat = clf.coef_

    # reconstruct
    yhat = F.dot(xhat)

    return xhat, yhat
Esempio n. 27
0
def fringeremoval(img_list, ref_list, mask='all', method='svd'):

    nimgs = len(img_list)
    nimgsR = len(ref_list)
    xdim = img_list[0].shape[0]
    ydim = img_list[0].shape[1]
    
    if mask == 'all':
        bgmask = np.ones([ydim, xdim])
        # around 2% OD reduction with no mask
    else:
        bgmask = mask
        
    k = (bgmask == 1).flatten(1)
    
    # needs to be >float32 since float16 doesn't work with linalg
    
    R = np.dstack(ref_list).reshape((xdim*ydim, nimgsR)).astype(np.float32)
    A = np.dstack(img_list).reshape((xdim*ydim, nimgs)).astype(np.float32)
     
    # Timings: for 50 ref images lasso is twice as slow
    # lasso 1.00
    # svd 0.54
    # lu 0.54
    
    optref_list = []
    
    for j in range(A.shape[1]):
        
        if method == 'svd':
            b = R[k, :].T.dot(A[k, j])
            Binv = pinv(R[k, :].T.dot(R[k, :])) # svd through pinv
            c = Binv.dot(b)
            # can also try linalg.svd()
            
        elif method == 'lu':
            b = R[k, :].T.dot(A[k, j])
            p, L, U = lu(R[k, :].T.dot(R[k, :]))
            c = solve(U, solve(L, p.T.dot(b)))
            
        elif method == 'lasso':
            lasso = Lasso(alpha=0.01)
            lasso.fit(R, A)
            c = lasso.coef_
            
        else:
            raise Exception('Invalid method.')
        
        optref_list.append(np.reshape(R.dot(c), (xdim, ydim)))
    
    return optref_list
Esempio n. 28
0
def test_lasso_vs_graph_net():
    # Test for one of the extreme cases of Graph-Net: That is, with
    # l1_ratio = 1 (pure Lasso), we compare Graph-Net's performance with
    # Scikit-Learn lasso
    lasso = Lasso(max_iter=100, tol=1e-8, normalize=False)
    graph_net = BaseSpaceNet(mask=mask, alphas=1. * X_.shape[0],
                             l1_ratios=1, is_classif=False,
                             penalty="graph-net", max_iter=100)
    lasso.fit(X_, y)
    graph_net.fit(X, y)
    lasso_perf = 0.5 / y.size * extmath.norm(np.dot(
        X_, lasso.coef_) - y) ** 2 + np.sum(np.abs(lasso.coef_))
    graph_net_perf = 0.5 * ((graph_net.predict(X) - y) ** 2).mean()
    np.testing.assert_almost_equal(graph_net_perf, lasso_perf, decimal=3)
Esempio n. 29
0
def linearReg():
    sl=Lasso(alpha=0.2)

    sl.fit(features_array,values_array)

    predict_val=sl.predict(features_array)

    print(sl.coef_)
    print(sl.score(features_array,values_array))

    fig = plt.figure()
    ax = plt.subplot(111)
    ax.bar(range(0,features.shape[1]),sl.coef_)
    plt.show()
Esempio n. 30
0
def comparaison_moindres_carres(X,Y):
    X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=random.seed())
    clf_lasso = Lasso(selection='random', random_state=random.seed())
    clf_ridge = Ridge()
    clf_reg_lin = LinearRegression(n_jobs=-1)
    clf_lasso.fit(X_train,Y_train)
    clf_ridge.fit(X_train,Y_train)
    clf_reg_lin.fit(X_train,Y_train)
    Y_lasso=clf_lasso.predict(X_test)
    Y_ridge=clf_ridge.predict(X_test)
    Y_reg_lin=clf_reg_lin.predict(X_test)
    err_lasso=mean_squared_error(Y_test,Y_lasso)
    err_ridge=mean_squared_error(Y_test,Y_ridge)
    err_reg_lin=mean_squared_error(Y_test,Y_reg_lin)
    print("Erreur de Lasso={:1.2f}\nErreur de Ridge={:1.2f}\nErreur de regression lineaire={:1.2f}\n".format(err_lasso,err_ridge,err_reg_lin))
Esempio n. 31
0
import statsmodels.api as sm
X_sm = X = sm.add_constant(X)
model = sm.OLS(y,X_sm)
model.fit().summary()

from sklearn.linear_model import LinearRegression, Lasso
from sklearn.model_selection import cross_val_score

lm = LinearRegression()
lm.fit(X_train, y_train)

print(np.mean(cross_val_score(lm, X_train, y_train, scoring='neg_mean_absolute_error', cv=3)))

# lasso regression
lm_l = Lasso(alpha=.13)
lm_l.fit(X_train,y_train)
print(np.mean(cross_val_score(lm_l, X_train, y_train, scoring='neg_mean_absolute_error', cv=3)))

alpha = []
error = []

for i in range(1,100):
    alpha.append(i/100)
    lnl = Lasso(alpha=(i/100))
    error.append(np.mean(cross_val_score(lnl, X_train, y_train, scoring='neg_mean_absolute_error', cv=3)))

# plt.plot(alpha,error)
# plt.show()

err = tuple(zip(alpha,error))
df_err = pd.DataFrame(err, columns=['alpha','error'])
b1 = 1 / RR.intercept_
a1 = -1 * b * RR.coef_
#print out the conservation equation of Fractal Dimension and Volatility
print("Conservation Law Generated by Ridge Regression Model")
print(a1[0], "FD + ", b1, "V = 1")
# 0.5052215007413694 FD +  20.979416541132068 V = 1

# ##########################   Lasso Regression  ##############################
print("\n")
print("-----------------Lasso Regression ----------------")
print("\n")

# use sklearn.linear_model Lasso to model the Fractal Dimension and Volatility
La = Lasso()
# fit the Lasso model of Fractal Dimension and Volatility
Lareg = La.fit(x, y)
# store the coefficient of the fitted model
Lw1 = La.coef_
# store the intercept of the fitted model
Lw0 = La.intercept_

print("Lasso Coef: ", Lw1[0])
print("Lasso Intercept: ", Lw0)
# Lasso Coef:  -0.0
# Lasso Intercept:  0.012102210503849494

#use predict_list function to get the Lasso prediction of volatility
V_LS_predict = predict_list(fd_list, Lw1[0], Lw0)
print("Lasso Regression R_square: ", r2_score(v, V_LS_predict))
#R Square: 0.0
Esempio n. 33
0
ridge_reg_mape = (np.abs((BL_LT_predicted - BL_LT_labels_test) / BL_LT_labels_test).mean(axis=0))
# print("ridge_reg_mape: "+str(ridge_reg_mape))


ridge_reg_rmsle = np.sqrt(mean_squared_log_error(BL_LT_labels_test, BL_LT_predicted))
# print(ridge_reg_rmsle)





####################################################################################################################
                                              # Lasso #
####################################################################################################################
lasso_reg = Lasso(alpha=0.1,normalize=True)
lasso_reg.fit(BL_LT_prepared_train,BL_LT_labels_train)
BL_LT_predicted = lasso_reg.predict(BL_LT_prepared_test)

lasso_reg_mse = mean_squared_error(BL_LT_labels_test, BL_LT_predicted)
lasso_reg_rmse = np.sqrt(lasso_reg_mse)
# print(lasso_reg_rmse)


lasso_reg_mae = mean_absolute_error(BL_LT_labels_test, BL_LT_predicted)
# print(lasso_reg_mae)

lasso_reg_mape = (np.abs((BL_LT_predicted - BL_LT_labels_test) / BL_LT_labels_test).mean(axis=0))
# print("lasso_reg_mape: "+str(lasso_reg_mape))


lasso_reg_rmsle = np.sqrt(mean_squared_log_error(BL_LT_labels_test, BL_LT_predicted))
Esempio n. 34
0
for k in range(0, nbits):
    for cohort in range(1, icohort + 1):

        val = (C[k][cohort - 1] -
               0.5 * nreportspercohort[cohort - 1] * f) / (1 - f)

        if val < 0:
            val = 0
        Y[k][cohort - 1] = val

print(Y)

print(len(ind))
Y = Y.reshape(nbits * icohort, 1)
sparse_lasso = Lasso(alpha=1, fit_intercept=False)
sparse_lasso.fit(X, Y)
#print('---candidates---')
#print(candidates)
print('---client---')
print(client)
print('---')
words = candidates[field]
coefs = sparse_lasso.coef_
# print(coefs)
# # for i in range(0,coefs.shape[0]):
# #     if(coefs[i]>0):
# #        print(words.iloc[i])
#
print('strings selected by lasso: ')
pos_client_selec = candidates[field][coefs > 0.0001]
print(pos_client_selec)
Esempio n. 35
0
from sklearn.linear_model import Lasso

def plot():
  plt.figure(figsize=(8,4))
  plt.subplot(121)
  plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1), random_state=42)
  plt.ylabel("$y$", rotation=0, fontsize=18)
  plt.subplot(122)
  plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), tol=1, random_state=42)
  plt.show()

plot()

from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha=0.1)
lasso_reg.fit(X, y)
lasso_reg.predict([[1.5]])

from sklearn.linear_model import ElasticNet
elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=42)
elastic_net.fit(X, y)
elastic_net.predict([[1.5]])

np.random.seed(42)
m = 100
X = 6 * np.random.rand(m, 1) - 3
y = 2 + X + 0.5 * X**2 + np.random.randn(m, 1)

X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)

poly_scaler = Pipeline([
print("cross validation time:",time2-time1)

explained_variance_score = cross_val_score(lasso, X, y,cv=3,scoring='explained_variance')
r2 = cross_val_score(lasso, X, y, cv=3, scoring='r2')
mean_squared_error = cross_val_score(lasso, X, y, cv=3, scoring='neg_mean_squared_error')
print ("EVS_CV:",explained_variance_score.mean())
print ("r2_CV:",r2.mean())
print ("MSE_CV:",mean_squared_error.mean())


"""
 Test/Evaluation
"""
time3 = time.clock()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state=3)
lasso.fit(X_train, y_train)
y_pred= lasso.predict(X_test)
time4 = time.clock()
print("testing time:",time4-time3)

print ("EVS_test:", metrics.explained_variance_score(y_test, y_pred))
print ("R2_test", metrics.r2_score(y_test, y_pred))
print ("MSE_test:", metrics.mean_squared_error(y_test, y_pred))
print ("The weights are:",lasso.coef_)


"""
Visualization
"""

fig, ax = plt.subplots()
Esempio n. 37
0
def dFF_martian(data, rm_window, signal='Gcamp', reference='Isosbestic',
                rm_nans='fill', lambda_=100, porder=1, itermax=15):
    """
    Calculates dF/F for a given signal and reference.
    This method is adapted from Martianova, Aronson, & Proulx
    Multi-Fiber Photometry to Record Neural activity in Freely
    Moving Animal. 2019 JOVE

    inputs
      args:
        data: pandas dataframe containing columns for signal and reference
        rm_window: int representing window for calculating running mean
                   (i.e. sample freq * time window)
      kwargs:
        signal: string containing column name for signal
        reference: string containing column name for reference
        rm_nans: string indicating how NaNs should be handeled after
                 rolling running_mean ('fill' or 'clip')
        lambda_: int for lambda_ value in airPLS (larger values results
                 in smoother baseline estimation)
        porder: int for porder in airPLS
        itermax: int for maximum number of iterations for airPLS

    returns
        data: pandas dataframe containing original data, new columns with
              intermediate calculations, and dFF_signal

    """

    import numpy as np
    import pandas as pd
    from ._steps import z_score, scale_Isos, calc_dF_F
    from ._smooth import running_mean
    from ._baseline_correction import WhittakerSmooth, airPLS
    from sklearn.linear_model import Lasso

    # Calculate running mean
    data['rm_%s' % signal] = running_mean(data[signal], rm_window)
    data['rm_%s' % reference] = running_mean(data[reference], rm_window)

    # Deal with NaN values according to rm_nan specification
    if rm_nans != 'clip' and rm_nans != 'fill':
        rm_nans = 'fill'
        print('Invalid input for rm_nans, defaulting to "fill"')
    if rm_nans == 'clip':
        data = data[pd.notnull(data['rm_%s' % signal])].copy()
    if rm_nans == 'fill':
        data = data.fillna(method='bfill')

    # Calculates baseline using airPLS and subtracts trace
    data['blc_%s' % reference] = data['rm_%s' % reference]
    - airPLS(data['rm_%s' % reference],
             lambda_=lambda_, porder=porder, itermax=itermax)
    data['blc_%s' % signal] = data['rm_%s' % signal]
    - airPLS(data['rm_%s' % signal],
             lambda_=lambda_, porder=porder, itermax=itermax)

    # Calculates z-scores for each trace
    data['z_%s' % reference] = z_score(data['blc_%s' % reference])
    data['z_%s' % signal] = z_score(data['blc_%s' % signal])

    # Fits a robust non-negative linear regression to reference and signal,
    # then scales reference
    lin = Lasso(alpha=0.0001, precompute=True, max_iter=1000,
                positive=True, random_state=9999, selection='random')
    lin.fit(np.array(data['z_%s' % reference]).reshape(-1, 1),
            np.array(data['z_%s' % signal]).reshape(-1, 1))
    z_reference_fitted = lin.predict(np.array(data['z_%s' % reference]
                                              ).reshape(-1, 1))
    data['scaled_%s' % reference] = list(z_reference_fitted)

    # caluclates dF/F as z_signal - scaled_reference
    data['dFF_%s' % signal] = (data['z_%s' % signal]
                               - data['scaled_%s' % reference])

    # returns dataframe with calculations in new columns
    return data
def test_coef_shape_not_zero():
    est_no_intercept = Lasso(fit_intercept=False)
    est_no_intercept.fit(np.c_[np.ones(3)], np.ones(3))
    assert est_no_intercept.coef_.shape == (1, )
Esempio n. 39
0
 X_m = X_m.dropna(axis=1)
 # mutation_names = X_m.columns
 X = X_m
 coef_names = X.columns
 X.to_csv('./data_outputs/Lasso_only_mut/X_' + inhibitors_list[drug_num] +
          '.csv')
 drug_response = drug_response.loc[combined_ids]
 Y = drug_response.sort_index()
 Y.to_csv('./data_outputs/Lasso_only_mut/Y_' + inhibitors_list[drug_num] +
          '.csv')
 x_train, x_test, y_train, y_test = train_test_split(X,
                                                     Y,
                                                     test_size=0.2,
                                                     random_state=0)
 lasso = Lasso()
 lasso.fit(x_train, y_train)
 train_score = lasso.score(x_train, y_train)
 test_score = lasso.score(x_test, y_test)
 lasso_01 = Lasso(alpha=0.1, max_iter=10e5)
 lasso_01.fit(x_train, y_train)
 train_score_01 = lasso_01.score(x_train, y_train)
 test_score_01 = lasso_01.score(x_test, y_test)
 lasso_001 = Lasso(alpha=0.01, max_iter=10e5)
 lasso_001.fit(x_train, y_train)
 train_score_001 = lasso_001.score(x_train, y_train)
 test_score_001 = lasso_001.score(x_test, y_test)
 file = open(
     "./regression_outputs/Lasso_only_mut/" + inhibitors_list[drug_num] +
     ".txt", 'w+')
 print("Lasso: alpha = 1", file=file)
 print("train score: " + str(train_score), file=file)
Esempio n. 40
0
train_rmse = np.sqrt(
    1 / X_train.shape[0] *
    np.squeeze(np.dot((trainings - y_train).T, (trainings - y_train))))
test_rmse = np.sqrt(
    1 / X_test.shape[0] *
    np.squeeze(np.dot((predictions - y_test).T, (predictions - y_test))))

print("Training RMSE is: %f" % train_rmse)
print("Testing RMSE is: %f" % test_rmse)

df_rmse['KNN'] = [train_rmse, test_rmse]

# build Lasso regression model
# training
reg_lasso = Lasso(alpha=0.1)
reg_lasso.fit(X_train, y_train)

# testing
trainings = reg_lasso.predict(X_train).reshape(-1, 1)
predictions = reg_lasso.predict(X_test).reshape(-1, 1)

# combine all predictions
all_pred = np.concatenate((trainings, predictions), axis=0)

# transform to dataframe for plotting
df_lasso = pd.DataFrame(all_pred,
                        columns=['Lasso ' + df.columns[-2]],
                        index=df.index)
df_lasso[df.columns[-2]] = y

# plot results and add train/test split timing line
Esempio n. 41
0
df = pd.DataFrame({'actual': y_test, 'pred': y_pred})
print(df)

print(pd.DataFrame(boston_rr.coef_))

# errors
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:',
      np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# -------------------------------------

print("LASSO")
# --- LASSO --- #
boston_l = Lasso()
boston_l.fit(X_train, y_train)
print("Coefficients: ", boston_l.coef_)
print("Intercept: ", boston_l.intercept_)

# R for train and test set
print('R2 for train: ', boston_l.score(X_train, y_train))
print('R2 for test: ', boston_l.score(X_test, y_test))

# lasso - prediction
y_pred = boston_l.predict(X_test)
df = pd.DataFrame({'actual': y_test, 'pred': y_pred})
print(df)

print(pd.DataFrame(boston_l.coef_))

# errors
print(model_scores)

# best alpha index for lasso
print(np.argmax(model_scores))
#plus in this index to the list of alphas

#best alpha to use
print(alpha_space[0])
# this is th best alpha to use in the model

from sklearn.linear_model import Lasso

#use the alpha previously found
alpha_user = 0.0001
lasso_model = Lasso(alpha=alpha_user, normalize=True)
lasso_model.fit(X_train, y_train)
lasso_pred = lasso_model.predict(X_test)

#predicition is between 0 and 1 -- round to the nearest integer to predict if the song is a hit
rounded_lasso = np.round(lasso_pred)

print("Lasso Model Accuracy:", metrics.accuracy_score(y_test, rounded_lasso))

print("\nLasso Model Coefficients:", lasso_model.coef_)

cols = list(X.columns.values)
lasso_importance = pd.DataFrame(lasso_model.coef_, index=cols).nlargest(3, [0])
print("\nLargest Lasso coefficients:\n", lasso_importance)
"""The Lasso model score is similar to the linear regression model -- very low and not a good fit for the data. It's coefficients shows that the first feature, danceability, is the most important feature. This is not surprising as we are analyzing music from the 1970s

# **Performance**
Esempio n. 43
0
    # get top-level correlation matrix of countries with each-other once
    corr_mat = light_train.transpose().corr()

    # iterate over 'countries' (some are not actually countries, but aggregates)
    for country in light_train.index:

        # do LASSO selection with alpha value
        tmp = light_train.drop(country)
        # narrow LASSO alpha setting for precision
        lasso_countries = []
        lasso_fit = None
        log10_amin = 0
        log10_amax = None
        for i in range(max_alpha_iter):
            lasso_fit = Lasso(alpha=10**log10_alpha, fit_intercept=False)
            lasso_fit.fit(tmp.transpose(), light_train.loc[country])

            # check result
            nz = sum(lasso_fit.coef_ != 0)
            if (nz < n_nonzero):
                log10_amax = log10_alpha
                log10_alpha = (log10_amax + log10_amin) / 2
            elif (nz > n_nonzero):
                log10_amin = log10_alpha
                if log10_amax is None:
                    log10_alpha *= 2
                else:
                    log10_alpha = (log10_amax + log10_amin) / 2
            else:
                break
def Lassos(test_data_list, train_data_list, K_fold_size):
    rmse_list = []
    r_squared_train_list = []
    r_squared_list = []
    coeff_list = []
    F_value = []
    p = []
    #res_list=[]
    pre = []
    act = []
    res = []
    res_list = []
    mse1 = []
    m = []
    k_list = []
    for i in range(0, K_fold_size):

        test_data = test_data_list[i]
        train_data = train_data_list[i]
        y_test = test_data["Commercial-rate"]
        y_train = train_data["Commercial-rate"]
        y_train = y_train.values
        y_test = y_test.values
        test_data = test_data.drop(["Commercial-rate", "Intercept"], axis=1)
        test = test_data.values
        k_list.append(test)
        #print(test_data.shape)
        train_data = train_data.drop(["Commercial-rate", "Intercept"], axis=1)
        train = train_data.values
        r, c = test_data.shape
        reg = Lasso(alpha=10)
        reg = reg.fit(train, y_train)

        y_train_fitted = reg.predict(train)
        r_squared_train = reg.score(train, y_train)
        y_fitted = reg.predict(test)
        r_squared = reg.score(test, y_test)
        mse = metrics.mean_squared_error(y_test, y_fitted)
        mse1.append(mse)
        rmse = math.sqrt(mse)
        mse_train = metrics.mean_squared_error(y_train, y_train_fitted)
        rmse_train = math.sqrt(mse_train)
        r_squared_list.append(r_squared)
        rmse_list.append(rmse)
        means = np.mean(y_test)
        sum = 0
        for i in range(0, len(y_test)):
            res_list.append(y_test[i] - y_fitted[i])
        act.append(y_test)
        pre.append(y_fitted)
        res.append(res_list)

        for i in range(0, len(y_test)):
            sum += (y_test[i] - means)**2
        MSR = sum / c
        F = MSR / mse
        F_value.append(F)
        p.append(f.pdf(F, c, r - c))
        k = reg.coef_

        l = (reg.intercept_)
        m.append(l)
        coeff_list.append(k)

        #r_s=metrics.r2_score(y_test,y_fitted)
        #print(r_squared,mse,rmse,r_squared_train,rmse_train)
        #print(k)
        #print(k1)
    return (m, coeff_list, rmse_list, r_squared_list, F_value, p, mse1, res,
            pre, act, k_list)
Esempio n. 45
0
def LassoPrediction(X_train, X_test, Y_train):
    lasso = Lasso(alpha=0.1, normalize=True, max_iter=1e5)
    lasso.fit(X_train, Y_train)
    return lasso
Esempio n. 46
0
print(Xtrain.shape)
print(Xtest.shape)


"""
Output:

(354, 13)
(152, 13)

"""

## Build the lasso model with alpha

model_lasso = Lasso(alpha=1)
model_lasso.fit(Xtrain, ytrain)
pred_train_lasso= model_lasso.predict(Xtrain)
pred_test_lasso= model_lasso.predict(Xtest)


## Evaluate the lasso model
print(np.sqrt(mean_squared_error(ytrain,pred_train_lasso)))
print(r2_score(ytrain, pred_train_lasso))
print(np.sqrt(mean_squared_error(ytest,pred_test_lasso)))
print(r2_score(ytest, pred_test_lasso))

"""
Output:

4.887113841773082
0.6657249068677625
Esempio n. 47
0
                        reg_alpha=0.9,
                        reg_lambda=0.6,
                        subsample=0.2,
                        seed=42,
                        silent=1)

regr.fit(train_df_munged, label_df)

y_pred = regr.predict(train_df_munged)
y_test = label_df
print("XGBoost score on training set: ", rmse(y_test, y_pred))

y_pred_xgb = regr.predict(test_df_munged)

best_alpha = 0.00099

regr = Lasso(alpha=best_alpha, max_iter=50000)
regr.fit(train_df_munged, label_df)

y_pred = regr.predict(train_df_munged)
y_test = label_df
print("Lasso score on training set: ", rmse(y_test, y_pred))

y_pred_lasso = regr.predict(test_df_munged)

y_pred = (y_pred_xgb + y_pred_lasso) / 2
y_pred = np.exp(y_pred)

pred_df = pd.DataFrame(y_pred, index=test_df["Id"], columns=["SalePrice"])
pred_df.to_csv('output.csv', header=True, index_label='Id')
Esempio n. 48
0
    elm = data_miss[i]
    j = index[i]
    if elm[1] == '男':
        miss_nan_x += [[elm[2]] + list(elm[left1:(right1 + 1)]) +
                       list(elm[left:(right + 1)])]
        index_nan.append(j)
    elif elm[1] == '女':
        miss_nv_x += [[elm[2]] + list(elm[left1:(right1 + 1)]) +
                      list(elm[left:(right + 1)])]
        index_nv.append(j)
miss_nan_x = (np.array(miss_nan_x) - mean_nan) / std_nan
miss_nv_x = (np.array(miss_nv_x) - mean_nv) / std_nv

#train model
model_nan = Lasso(max_iter=10000, alpha=0.01)
model_nan.fit(nomiss_nan_x, nomiss_nan_y)
pred_nan = model_nan.predict(miss_nan_x)
pred_nan[np.argwhere(pred_nan < 0)] = 0

model_nv = Lasso(max_iter=10000, alpha=0.01)
model_nv.fit(nomiss_nv_x, nomiss_nv_y)
pred_nv = model_nv.predict(miss_nv_x)
pred_nv[np.argwhere(pred_nv < 0)] = 0
#update data in sqlite3
col_name = '乙肝核心抗体'
for i in range(len(pred_nan)):
    query = "update train set %s=%f where id=%d" % (col_name, pred_nan[i],
                                                    index_nan[i])
    curs.execute(query)
for i in range(len(pred_nv)):
    query = "update train set %s=%f where id=%d" % (col_name, pred_nv[i],
Esempio n. 49
0
y += 0.01 * np.random.normal((n_samples, ))

# Split data in train set and test set
n_samples = int(X.shape[0] / 2)
print(n_samples)
X_train, y_train = X[:n_samples], y[:n_samples]
X_test, y_test = X[n_samples:], y[n_samples:]

###############################################################################
# Lasso
from sklearn.linear_model import Lasso

alpha = 0.1
lasso = Lasso(alpha=alpha)

y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test)
r2_score_lasso = r2_score(y_test, y_pred_lasso)
print(lasso)
print("r^2 on test data : %f" % r2_score_lasso)

###############################################################################
# ElasticNet
from sklearn.linear_model import ElasticNet

enet = ElasticNet(alpha=alpha, l1_ratio=0.7)

y_pred_enet = enet.fit(X_train, y_train).predict(X_test)
r2_score_enet = r2_score(y_test, y_pred_enet)
print(enet)
print("r^2 on test data : %f" % r2_score_enet)
Esempio n. 50
0
def rmse(y_test, y_pred):
    return np.sqrt(mean_squared_error(y_test, y_pred))


# run prediction on training set to get an idea of how well it does
y_pred = regr.predict(train_new)
y_test = label_df
y_pred_xgb = y_pred
print("XGBoost score on training set: ", rmse(y_test, y_pred))
#XGBoost score on training set: ', 0.037633322832013358)

from sklearn.linear_model import Lasso

#found this best alpha through cross-validation
best_alpha = 0.00099

regr = Lasso(alpha=best_alpha, max_iter=50000)
regr.fit(train_new, label_df)

# run prediction on the training set to get a rough idea of how well it does
y_pred = regr.predict(train_new)
y_pred_lasso = y_pred
y_test = label_df
print("Lasso score on training set: ", rmse(y_test, y_pred))
#<pre class="">('Lasso score on training set: ', 0.10175440647797629)</pre>

#simple average
y_pred = (y_pred_xgb + y_pred_lasso) / 2
y_pred = np.exp(y_pred)
pred_df = pd.DataFrame(y_pred, index=test["Id"], columns=["SalePrice"])
pred_df.to_csv('sample_submission.csv', header=True, index_label='Id')
Esempio n. 51
0
#Linear Regression
clfreg = LinearRegression(n_jobs=1)
clfreg.fit(X_train,y_train)
y_pred = clfreg.predict(X_test)
confidencereg = clfreg.score(X_test,y_test)

#Ridge Regression
rr = Ridge(alpha=0.01)
rr.fit(X_train,y_train)
y_pred_ridge = rr.predict(X_test)
confidenceridge = rr.score(X_test,y_test)

#Lasso Regression
ls = Lasso()
ls.fit(X_train,y_train)
y_pred_lasso = ls.predict(X_test)
confidencelasso = ls.score(X_test,y_test)

#plotting learning curves for linear regression
import matplotlib.pyplot as plt
plt.plot(y_test[:100])
plt.plot(y_pred[:100])
plt.legend(['Actual', 'Linear Predicted'], loc='upper right')
plt.show()


#plotting learning curves for linear regression
import matplotlib.pyplot as plt
plt.plot(y_test[:100])
plt.plot(y_pred_ridge[:100])
Esempio n. 52
0
                                       index=df.index,
                                       columns=df.columns)

from fancyimpute import KNN
knns = {}
for kind in ['dragon', 'mordred']:
    knns[kind] = KNN(k=5)
    df = dfs[kind + '_good']
    imputed = knns[kind].fit_transform(df.values)
    dfs[kind + '_imputed'] = pd.DataFrame(imputed,
                                          index=df.index,
                                          columns=df.columns)

from sklearn.linear_model import Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(dfs['dragon_imputed'].values, dfs['mordred_imputed'].values[:, :])

predicted = lasso.predict(dfs['dragon_imputed'])
observed = dfs['mordred_imputed']
rs = np.zeros(observed.shape[1])
for i, col in enumerate(observed):
    rs[i] = np.corrcoef(observed[col], predicted[:, i])[0, 1]

# %matplotlib inline
import matplotlib.pyplot as plt
plt.plot(sorted(sorted(rs)))

plt.plot(np.linspace(0, 1, len(lasso.coef_.ravel())),
         sorted(np.abs(lasso.coef_.ravel()))[::-1])
plt.xscale('log')
plt.xlabel('Quantile rank (Top X% of coefficients)')
Esempio n. 53
0
    'alpha': [1e-15, 1e-10, 1e-8, 1e-4, 1e-3, 1e-2, 0.4, 1, 5, 10, 20]
}
lasso_regressor = GridSearchCV(lasso,
                               parameters,
                               scoring='neg_mean_squared_error',
                               cv=5)
lasso_regressor.fit(Xs, ys)
print(lasso_regressor.best_params_)
print(lasso_regressor.best_score_)

# In[177]:

lasso = Lasso(alpha=20, normalize=False)

# Fit the regressor to the data
lasso.fit(Xs, ys)

y_pred = ridge.predict(X_test)
# # Compute and print R^2 and RMSE
print("R^2: {}".format(ridge.score(X_test, y_test)))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error: {}".format(rmse))

# In[178]:

y_pred = lasso.predict(X_test)
# # Compute and print R^2 and RMSE
print("R^2: {}".format(lasso.score(X_test, y_test)))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error: {}".format(rmse))
Esempio n. 54
0
features[:,1:features.shape[1]] = f_r
tf_r = robust.transform(test_features[:,1:features.shape[1]])
test_features[:,1:features.shape[1]] = tf_r

enc = preprocessing.OneHotEncoder(categorical_features=[0])
enc.fit(features)
fitted = enc.transform(features).toarray()
features = fitted
test_features = enc.transform(test_features).toarray()
min_error = 1
min_idx = 1

labels = labels * 1.32

lasso = Lasso(max_iter=3000, normalize=True)
lasso.fit(features, labels)
guesses = lasso.predict(test_features)
np.savetxt("lasso_guesses.txt", guesses, '%9.2f', newline="\n")

guesses = np.array([guesses]).T

diff = np.subtract(guesses, test_labels)
diff = np.absolute(diff)
diff = np.divide(diff, test_labels)
np.savetxt("lasso_diff.txt", diff, '%9.2f', newline="\n")
avg_error = np.mean(diff)
print "lasso regression, error: %s" % (avg_error)

fig, ax = plt.subplots()
y = test_labels
ax.scatter(y, guesses)
Esempio n. 55
0
best_lasso = np.inf


def get_lasso(pred, actual, coef, lambda_):

    lasso_val = np.sum([elem**2 for elem in pred - actual]) + np.sum(
        [lambda_ * np.abs(B) for B in coef])

    return lasso_val


for lambda_ in lambdas:

    lasso_reg = Lasso(normalize=True, alpha=lambda_, fit_intercept=False)

    lasso_reg.fit(train_X, train_Y)

    coef = lasso_reg.coef_

    y_pred_lass = lasso_reg.predict(train_X)

    lasso_val = get_lasso(y_pred_lass, train_Y, coef, lambda_)

    lasso_train.append(lasso_val)

    y_pred_lass = lasso_reg.predict(test_X)

    lasso_val = get_lasso(y_pred_lass, test_Y, coef, lambda_)

    lasso_test.append(lasso_val)
Esempio n. 56
0
    print(cf[0], cf[1])"""

predict_away = lm_away.predict(X_away_test)
print(
    np.mean(
        cross_val_score(lm_away,
                        X_away_train,
                        y_away_train,
                        scoring='neg_mean_absolute_error',
                        cv=3)))

# Lasso Regression
print("\nLasso")
print("Home")
lm_lasso_home = Lasso(alpha=0.1)
lm_lasso_home.fit(X_home_train, y_home_train)
print(
    np.mean(
        cross_val_score(lm_lasso_home,
                        X_home_train,
                        y_home_train,
                        scoring='neg_mean_absolute_error',
                        cv=3)))

print("\nAway")
lm_lasso_away = Lasso(alpha=0.1)
lm_lasso_away.fit(X_away_train, y_away_train)
print(
    np.mean(
        cross_val_score(lm_lasso_away,
                        X_away_train,
Esempio n. 57
0
ridge_reg.predict(x)

# using Stochastic Gradient Descent
from sklearn.linear_model import SGDRegressor
ridge_sgd = SGDRegressor(
    penalty="l2")  ## indicates adding 1/2 * L2 norm of weight vector

# 2. LASSO
# Least Absolute Shrinkage and Selection Operator Regression
# L1 norm of weight vector
# it eliminates the weights of the least important features (sets them to zero)
# it automatically performs feature selection and outputs a sparse model

from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha=0.1)
lasso_reg.fit(x, y)

lasso_sgd = SGDRegressor(penalty="l1")

# 3. Elastic Net
# the regularization term is a mixture of Lasso and Ridge regularization terms
# the mixture parameter is r
# r = 0 => Ridge
# r = 1 => Lasso

## General Guideline:
# never use linear regression alone
# Ridge is a good starting point with slight regularization
# If only a handful of features are useful, use Lasso or Elastic Net
#
# Elastic net is preferred when multicollinearity or where P > n in the training set
# =============================================================================
# LASSO Regression
# Fit LASSO Regression Model over a range of different alphas and plot cv-R2 
lasso_alpha_space = np.logspace(-4, 0, 50)
lasso_scores = []
lasso_scores_std = []

lasso = Lasso()
for alpha in lasso_alpha_space:
    lasso.alpha = alpha
    lasso_cv_scores = cross_val_score(lasso, X, y, cv=10)
    lasso_scores.append(np.mean(lasso_cv_scores))
    lasso_scores_std.append(np.std(lasso_cv_scores))
display_plot(lasso_scores, lasso_scores_std)

lasso.fit(X_train, y_train).coef_
lasso_y_train_pred = lasso.predict(X_train)
lasso_y_test_pred = lasso.predict(X_test)
lasso.score(X_test, y_test)

# Plot residual vs. predicted values to diagnose the regression model
plt.scatter(lasso_y_train_pred, lasso_y_train_pred - y_train,
            c='steelblue', marker='o', edgecolor='white',
            label='Training data')
plt.scatter(lasso_y_test_pred, lasso_y_test_pred - y_test,
            c='limegreen', marker='s', edgecolor='white',
            label='Test data')
plt.xlabel('Predicted values')
plt.ylabel('Residuals')
plt.legend(loc='upper left')
plt.suptitle('LASSO Regression Diagnostic')
Esempio n. 59
0
reg = Lasso(alpha=1)
#reg = LinearRegression()
change_flag = np.zeros(len(vals))

for i in range(windowLength, len(vals)):

    y = vals[i - windowLength:i].reshape(-1, 1)
    X = np.array(list(range(len(y)))).reshape(-1, 1)

    y_train = y[:-1]
    X_train = X[:-1]

    y_test = y[-1]
    X_test = X[-1, :].reshape(1, -1)

    reg = reg.fit(X_train, y_train)

    reg.coef_
    reg.intercept_

    y_hat = reg.predict(X_test)
    df100.iloc[i, -1] = y_hat

    res_train = y_train - reg.predict(X_train).reshape(-1, 1)
    res_test = y_test - y_hat

    flagIdx = res_test > 2 * np.std(y_train)
    change_flag[i] = res_test > 3 * np.std(res_train)

#    plt.plot(X ,y, 'r-',X_test, y_hat, '*',linewidth=2)
Esempio n. 60
0
        print("Mean score: %10.5f" % (mean_score / float(num_of_run)))

        print("Mean  RMSE train: %10.5f" %
              (mean_rms_train / float(num_of_run)))
        print("Mean   MAE train: %10.5f" %
              (mean_mae_train / float(num_of_run)))
        print("Mean MaxAE train: %10.5f" %
              (mean_maxae_train / float(num_of_run)))
        print("Mean    rP train: %10.5f" % (mean_rp_train / float(num_of_run)))
        print("Mean score train: %10.5f" %
              (mean_score_train / float(num_of_run)))

        X_train, X_test, y_train, y_test = train_test_split( \
             features_array, labels, test_size=util.LEAVEPERC)

        for a in [0.001, 0.01, 0.1, 1.0]:
            regressor = Lasso(alpha=a, max_iter=10e5)
            regressor.fit(X_train, y_train)

            train_score = regressor.score(X_train, y_train)
            test_score = regressor.score(X_test, y_test)
            coeff_used = np.sum(regressor.coef_ != 0)

            print("Lasso using alpha %10.5f " % (a))
            print("  score train %10.5f " % (train_score))
            print("  score test  %10.5f " % (test_score))
            print("  number of features used ", coeff_used)
            for cidx in range(len(regressor.coef_)):
                if regressor.coef_[cidx] != 0.0:
                    print("   ", cidx + 1, " => ", featuresselected[cidx])