Beispiel #1
0
def predict_age():
    mask = ~np.isnan(train["Age"])
    age_train = train[mask]
    age_test = train[~mask]

    features = []
    features.append(embarked_enc.transform(age_train["Embarked"]))
    features.append(sex_enc.transform(age_train["Sex"]))
    features.append(title_enc.transform(age_train["Title"]))
    features.append(pclass_enc.transform(age_train["Pclass"]))

    age_clf = SGDRegressor()
    X = np.hstack(features)
    y = np.array(train["Age"][mask]).T
    age_clf.fit(X, y)

    features = []
    features.append(embarked_enc.transform(age_test["Embarked"]))
    features.append(sex_enc.transform(age_test["Sex"]))
    features.append(title_enc.transform(age_test["Title"]))
    features.append(pclass_enc.transform(age_test["Pclass"]))

    ages = age_clf.predict(np.hstack(features))
    j = 0
    for i in range(len(train)):
        if ~mask[i]:
            train.loc[i, "Age"] = ages[j]
            j += 1
def ls_sklearn_sgd(x, y):
    # Parameter estimation by sklearn SGD
    sgd = SGDRegressor(fit_intercept=True)
    sgd.fit(x.reshape((N, 1)), y)
    beta_0_sk = sgd.intercept_
    beta_1_sk = sgd.coef_[0]
    return beta_0_sk, beta_1_sk
def sgd(X, y, weight, X_test=False):
    from sklearn.linear_model import SGDRegressor
    from sklearn import cross_validation
    from sklearn.metrics import confusion_matrix
    from sklearn.preprocessing import StandardScaler

    #X_train, X_test, y_train, y_test, weight_train, weight_test = cross_validation.train_test_split(
    #        X, y, weight, test_size=0.2, random_state=0)
    clf = SGDRegressor(loss="huber", n_iter=100, penalty="l1")
    #clf = LogisticRegression( max_iter=100)

    X_train = X
    y_train = y

    scaler = StandardScaler(with_mean=False)
    scaler.fit(X_train)  # Don't cheat - fit only on training data
    X_train = scaler.transform(X_train)

    X_test = scaler.transform(X_test)  # apply same transformation to test data

    clf.fit(X_train, y_train, sample_weight=weight)

    print(clf.score(X_train,y_train,weight))

    y_pred = clf.predict(X_test)
    
    from sklearn.externals import joblib
    import scipy.io as sio
    joblib.dump(clf, 'models/sgd_.pkl') 
    sio.savemat('predict_y_forward.mat', {'y':y_pred})
Beispiel #4
0
    def predict(self, df):

        # get time frame
        time_frame = settings.time_frame
        
        # copy of data
        df_copy = df.copy()

        from sklearn.linear_model import SGDRegressor
        from sklearn.metrics import mean_absolute_error, mean_squared_error
    
        # partition data
        X_train, y_train, X_val, y_val, X_test, y_test = self.partition(df_copy)
        
        # normalize features
        X_train_std, X_val_std, X_test_std = self.feature_scale(X_train, X_val, X_test)
        
        # instance of Linear Regression classifier
        lr = SGDRegressor()
        
        # fit model
        lr.fit(X_train_std, y_train)
        
        # predictions on validation set
        predictions = lr.predict(X_val_std)
    
        # R^2 score
        score = lr.score(X_val_std, y_val)
        
        # error
        test_error = (mean_squared_error(y_val, predictions)**.5)
        print test_error
Beispiel #5
0
def predictScores(trainFeatures,trainTargets,testFeatures,testItemIds,isRegression = False):
    logging.info("Feature preparation done, fitting model...")
    
    predicted_scores = []
    if isRegression:
        clf = SGDRegressor(     penalty="l2", 
                                alpha=1e-4)
                            
        print("trainFeatures rows::"+str(trainFeatures.shape[0]))
        print("trainTargets rows::"+str(len(trainTargets)))
        clf.fit(trainFeatures,trainTargets)
        logging.info("Predicting...")    
        predicted_scores = clf.predict(testFeatures)
    else:         
        clf = SGDClassifier(    loss="log", 
                                penalty="l2", 
                                alpha=1e-4, 
                                class_weight="auto")
                            
        print("trainFeatures rows::"+str(trainFeatures.shape[0]))
        print("trainTargets rows::"+str(len(trainTargets)))
        clf.fit(trainFeatures,trainTargets)
        logging.info("Predicting...")    
        predicted_scores = clf.predict_proba(testFeatures).T[1]    
    
    logging.info("Write results...")
    output_file = "avito_starter_solution.csv"
    logging.info("Writing submission to %s" % output_file)
    f = open(os.path.join(dataFolder,output_file), "w")
    f.write("id\n")    
    for pred_score, item_id in sorted(zip(predicted_scores, testItemIds), reverse = True):
        f.write("%d\n" % (item_id))
    f.close()
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    clf = SGDRegressor(n_iter = 20)
    clf.fit(features, values)
    intercept,params = clf.intercept_,clf.coef_ 
    return intercept, params
Beispiel #7
0
def slim_train(A, l1_reg=0.001, l2_reg=0.0001):
    """
    Computes W matrix of SLIM

    This link is useful to understand the parameters used:

        http://web.stanford.edu/~hastie/glmnet_matlab/intro.html

        Basically, we are using this:

            Sum( yi - B0 - xTB) + ...
        As:
            Sum( aj - 0 - ATwj) + ...

    Remember that we are wanting to learn wj. If you don't undestand this
    mathematical notation, I suggest you to read section III of:

        http://glaros.dtc.umn.edu/gkhome/slim/overview
    """
    alpha = l1_reg + l2_reg
    l1_ratio = l1_reg / alpha

    model = SGDRegressor(
        penalty='elasticnet',
        fit_intercept=False,
        alpha=alpha,
        l1_ratio=l1_ratio,
    )

    # TODO: get dimensions in the right way
    m, n = A.shape

    # Fit each column of W separately
    W = lil_matrix((n, n))

    for j in range(n):
        if j % 50 == 0:
            print('-> %2.2f%%' % ((j / float(n)) * 100))

        aj = A[:, j].copy()
        # We need to remove the column j before training
        A[:, j] = 0

        model.fit(A, aj.toarray().ravel())
        # We need to reinstate the matrix
        A[:, j] = aj

        w = model.coef_

        # Removing negative values because it makes no sense in our approach
        w[w < 0] = 0

        for el in w.nonzero()[0]:
            W[(el, j)] = w[el]

    return W
Beispiel #8
0
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    model = SGDRegressor()
    model.fit(features, values)
    intercept = model.intercept_ 
    params = model.coef_ 
    
    return intercept, params
def sgd(pd, pl, qd, ql):
    params = {'loss':['squared_loss', 'huber', 'epsilon_insensitive',
                     'squared_epsilon_insensitive'],
                'alpha':expon(scale=1),
                'epsilon':expon(scale=1),
                'l1_ratio':uniform(),
                'penalty':[ 'l2', 'l1', 'elasticnet']}
    clf = SGDRegressor()
    #clf = RandomizedSearchCV(clf, params, n_jobs=2, n_iter=10, verbose=10)
    print("Training Linear SVM Randomly")
    clf.fit(pd, pl)
    print("Score: " + str(clf.score(qd, ql)))
    return clf
Beispiel #10
0
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    
    ###########################
    ### YOUR CODE GOES HERE ###
    ###########################
    classifier = SGDRegressor(n_iter = 20)
    classifier.fit(features, values)
    intercept = classifier.intercept_
    params = classifier.coef_
    return intercept, params
def main(train_file, model_file):
    #train_x, train_y = load_sparse_trainingData_memory(train_file, 2 * get_len_vector())
    train_x, train_y = load_long_training_data_memory()
    #train_x, train_y = load_trainingData(train_file)
    logging('len of y: %d' % train_y.shape)
    logging(train_x.shape)
    #LR = LinearRegression(copy_X = False, normalize = True)
    LR = SGDRegressor(verbose=1)
    logging("training model...")
    starttime = datetime.now()
    LR.fit(train_x, train_y)
    logging("training model, eplased time:%s" % str(datetime.now() - starttime))
    logging("saving model")
    joblib.dump(LR, model_file)
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    
    y = values
    X = features
    clf = SGDRegressor()
    clf.fit(X, y)
    
    intercept = clf.intercept_
    params = clf.coef_
    
    return intercept, params
Beispiel #13
0
def sgd_regressor(x, y, alpha):
    kf = KFold(len(x), n_folds=3)
    scores = []
    for train_index, test_index in kf:
        X_train, X_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        scaler = StandardScaler()
        scaler.fit(X_train)
        x_train = scaler.transform(X_train)
        x_test = scaler.transform(X_test)
        clf = SGDRegressor(loss='squared_loss', alpha=alpha)
        clf.fit(x_train, y_train)
        scores.append(mean_squared_error(clf.predict(x_test), y_test) ** 0.5)
    # print 'SGDRegressor'
    return np.mean(scores)
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    #features = sm.add_constant(features)
    model = SGDRegressor(n_iter=30)
    #normalize_res = normalize_features(features)
    model.fit(features,values)
      
    ###########################
    ### YOUR CODE GOES HERE ###
    ###########################
    intercept = model.intercept_
    params = model.coef_
    return intercept, params
Beispiel #15
0
def build_sgd_regressor(X_test, X_train_full, y_train_full):

    #print "Building SGD regressor..."

    rf = SGDRegressor(loss="modified_huber", penalty="elasticnet", n_iter=20000, alpha=0.1, epsilon=0.01)
    probas_rf = rf.fit(X_train_full, y_train_full).predict(X_test)
    return probas_rf
Beispiel #16
0
def linear_regression_GD(features, values):
    means, std_devs, features = normalized_features(features)
    model = SGDRegressor(eta0=0.001)
    results = model.fit(features, values)
    intercept = results.intercept_
    params = results.coef_
    return intercept, params
def predictLinearRegress(attributeList, starTargetList):

    print("\nLinear Regression")

    starTargetList = np.array(starTargetList)
    Xtrain, Xtest, Ytrain, Ytest = ml.splitData(attributeList, starTargetList, 0.75)

    lr = ml.linear.linearRegress(Xtrain, Ytrain)

    yHatInitial = lr.predict(Xtest)
    print("MSE test: ", mean_squared_error(yHatInitial, Ytest))
    print("RMSE test: ", math.sqrt(mean_squared_error(yHatInitial, Ytest)))


    incorrect = 0
    total = 0
    for i, value in enumerate(yHatInitial):
        if(abs(yHatInitial[i] - Ytest[i]) > 0.5):
            incorrect += 1
        total += 1

    ratioIncorrect = float(float(incorrect) / float(total))
    print("Ratio incorrect: " + str(ratioIncorrect))


    onesCol = np.ones((len(Xtrain),1))
    Xtrain = np.concatenate((onesCol, Xtrain), 1)
    onesCol = np.ones((len(Xtest),1))
    Xtest = np.concatenate((onesCol, Xtest), 1)
    m, n = np.shape(Xtrain)

    clf = SGDRegressor(loss="squared_loss")
    clf.fit(Xtrain, Ytrain)
    yHat = clf.predict(Xtest)

    print("MSE after GD: ", mean_squared_error(yHat, Ytest))
    print("RMSE after GD: ", math.sqrt(mean_squared_error(yHat, Ytest)))

    incorrect = 0
    total = 0
    for i, value in enumerate(yHat):
        if(abs(yHat[i] - Ytest[i]) > 0.5):
            incorrect += 1
        total += 1

    ratioIncorrect = float(float(incorrect) / float(total))
    print("Ratio incorrect: " + str(ratioIncorrect))
Beispiel #18
0
class EdenRegressor(BaseEstimator, RegressorMixin):
    """Build a regressor for graphs."""

    def __init__(self, r=3, d=8, nbits=16, discrete=True,
                 normalization=True, inner_normalization=True,
                 penalty='elasticnet', loss='squared_loss'):
        """construct."""
        self.set_params(r, d, nbits, discrete,
                        normalization, inner_normalization,
                        penalty, loss)

    def set_params(self, r=3, d=8, nbits=16, discrete=True,
                   normalization=True, inner_normalization=True,
                   penalty='elasticnet', loss='squared_loss'):
        """setter."""
        self.r = r
        self.d = d
        self.nbits = nbits
        self.normalization = normalization
        self.inner_normalization = inner_normalization
        self.discrete = discrete
        self.model = SGDRegressor(
            loss=loss, penalty=penalty,
            average=True, shuffle=True,
            max_iter=5, tol=None)
        self.vectorizer = Vectorizer(
            r=self.r, d=self.d,
            normalization=self.normalization,
            inner_normalization=self.inner_normalization,
            discrete=self.discrete,
            nbits=self.nbits)
        return self

    def transform(self, graphs):
        """transform."""
        x = self.vectorizer.transform(graphs)
        return x

    @timeit
    def kernel_matrix(self, graphs):
        """kernel_matrix."""
        x = self.transform(graphs)
        return metrics.pairwise.pairwise_kernels(x, metric='linear')

    def fit(self, graphs, targets, randomize=True):
        """fit."""
        x = self.transform(graphs)
        self.model = self.model.fit(x, targets)
        return self

    def predict(self, graphs):
        """predict."""
        x = self.transform(graphs)
        preds = self.model.predict(x)
        return preds

    def decision_function(self, graphs):
        """decision_function."""
        return self.predict(graphs)
def gradiantDescent(trainData,testData,trainOuts,testOuts):
	clf = SGDRegressor(loss="squared_loss")
	print(clf.fit(trainData,trainOuts))
	print(clf.coef_)
	predictions = clf.predict(testData)
	print(predictions)
	misses,error = sup.crunchTestResults(predictions,testOuts,.5)
	print(1-error)
Beispiel #20
0
def linear_regression(features, values):

    sgd = SGDRegressor()
    results = sgd.fit(values, features)
    intercept = sgd.intercept_
    params = results.get_params()

    return intercept, params
    def train(self):
        X_train = np.vstack([self.lang_1_w2v[p[0]] for p in self.bilingual_mappings])
        Z_train = np.vstack([self.lang_2_w2v[p[1]] for p in self.bilingual_mappings])

        # there's a trick here -- train each y column separately (as a separate SGD problem. They are all independent
        # so this should be ok
        Z_cols = [Z_train[:,i] for i in range(Z_train.shape[1])]

        # train a model for each row of W, and get the W coefficients
        trained_coef_rows = []
        for z in Z_cols:
            clf = SGDRegressor()
            clf.fit(X_train, z)
            trained_coef_rows.append(clf.coef_)

        # now stack all the rows together to reconstruct W
        self.W = np.vstack(trained_coef_rows)
Beispiel #22
0
def sgd_text_model(x_train, y_train, x_test, x_valid, cache_name, use_cache=False):
    if use_cache:
        fhand = open(cache_name, 'r')
        data_dict = pickle.load(fhand)
        return data_dict['test_pred'], data_dict['valid_pred']
    np.random.seed(seed=123)
    model = SGDRegressor(eta0=1000, fit_intercept=True, l1_ratio=0.15,
                         learning_rate='invscaling', loss='huber', n_iter=200,
                         p=None, penalty='l1', power_t=.1, random_state=123,
                         rho=None, shuffle=True, verbose=0, warm_start=False)
    model.fit(x_train, y_train)
    test_pred = model.predict(x_test)
    valid_pred = model.predict(x_valid)
    data_dict = {'test_pred': test_pred, 'valid_pred': valid_pred}
    fhand = open(cache_name, 'w')
    pickle.dump(data_dict, fhand)
    fhand.close()
    return test_pred, valid_pred
Beispiel #23
0
 def fit(self, train_features, train_labels, N, c_val=0.0001, tol_val=0.001):
    # break features into N sets
    feat_dim = train_features.shape[1]
    feat_per_bag = feat_dim / N
    self.SVRs = []
    for i in range(N):
       if i < N-1:
          cur_train_feat_bag = train_features[:,i*feat_per_bag:(i+1)*feat_per_bag]
       else:
          cur_train_feat_bag = train_features[:,i*feat_per_bag:]
       # now train individual SVR
       # model = svm.SVR(C=c_val, kernel='linear', tol=tol_val)
       # model = LSVR(C=c_val, tol=tol_val)
       model = SGDR(loss='epsilon_insensitive',alpha=c_val)
       print 'current training on dimensionality: ', cur_train_feat_bag.shape[1], '\n' 
       model.fit(cur_train_feat_bag, train_labels)
       self.SVRs.append(model)
    return self.SVRs
def linear_regression(features, values):

    
    model = SGDRegressor(n_iter=1000)
    results = model.fit(features, values)
    intercept = results.intercept_
    params = results.coef_
    
    return intercept, params
def apply_sgd_( X_train, Y_train, alpha=0.0003, shuffle=True):
    n_iter = np.ceil(10**6 / len(Y_train))
    model = SGDRegressor(loss='squared_loss', 
                         penalty='l2', 
                         alpha=alpha,
                         epsilon=0.01,
                         fit_intercept=True, 
                         n_iter=n_iter, shuffle=shuffle, random_state=int(time.time()*8192)%8192, warm_start=False,
                         verbose=0, 
                         learning_rate='invscaling' )
    # model.fit_transform( X_train, Y_train )
    # model.partial_fit_transform( X_train, Y_train )
    # sample_weights = [ 1/float(m) for x in Y ]
    model.fit( X_train, Y_train, sample_weight=None )
    Theta = [ float( model.intercept_ ) , ]
    Theta.extend( [ float( x ) for x in model.coef_])
    ( model, Theta, J, SCORE ) = performance_analysis( model, Theta, X_train, Y_train, debug=1 )
    return ( model, Theta, J, SCORE )
def SGD_Regression(kf,data,label,k):
	val=0
	for train, test in kf:
		X_train, X_test, y_train, y_test = data[train,:], data[test,:], label[train], label[test]
		log =  SGDRegressor(loss='squared_loss', penalty='l2', alpha=0.0001, l1_ratio=0.15,n_iter=5)
		logit = log.fit(X_train,y_train)
		y_pred =  logit.predict(X_test)
		val += metrics.mean_squared_error(y_test, y_pred) 
	return val/3 
	# print "SGD_Regression, Mean Squared Error ", "{0:.4f}".format(val/3)
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    regressor = SGDRegressor()
    result = regressor.fit(features, values)
    intercept = result.intercept_
    params = result.coef_
   
    return intercept, params
def sgd_cv_id_fold(trn, params={}, tst=None, model_seed=CONST.SEED):
    if tst is not None:
        preds = tst[['Engine']].copy()

    cv_id = utils.get_cv_id(model_seed)

    trn = trn.merge(cv_id, on=['Engine'], how='left')
    assert trn.cv_id.notnull().all()

    valid_preds = pd.DataFrame({
        'preds': [np.nan] * trn.shape[0],
        'actual_RUL': trn.RUL
    })
    features = [c for c in trn.columns if c not in CONST.EX_COLS]

    scaler = preprocessing.StandardScaler()
    trn.loc[:, features] = scaler.fit_transform(trn.loc[:, features])
    if tst is not None:
        tst.loc[:, features] = scaler.transform(tst.loc[:, features])

    for i in list(range(1, utils.get_config()['nfold'] + 1)):
        print(f"CV ID = {i}")
        X_train, y_train = trn.loc[trn.cv_id != i,
                                   features], trn.loc[trn.cv_id != i, 'RUL']
        X_valid, y_valid = trn.loc[trn.cv_id == i,
                                   features], trn.loc[trn.cv_id == i, 'RUL']

        model = SGDRegressor(**params, random_state=seed)
        model.fit(X_train, y_train)
        valid_preds.loc[trn.cv_id == i, 'preds'] = model.predict(X_valid)

        if tst is not None:
            preds[f'fold{i + 1}'] = model.predict(tst[features])

    valid_preds.dropna(inplace=True)
    if tst is None:
        print("CV MAE Score :",
              mean_absolute_error(valid_preds.actual_RUL, valid_preds.preds))
        return mean_absolute_error(valid_preds.actual_RUL, valid_preds.preds)
    else:
        return mean_absolute_error(valid_preds.actual_RUL,
                                   valid_preds.preds), preds
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """

    ###########################
    ### YOUR CODE GOES HERE ###
    ###########################

    clf = SGDRegressor(alpha=0.1, n_iter=20)
    clf.fit(features, values)
    params = clf.get_params()
    intercept = 0

    #print params
    #print len(features[0]), len(clf.coef_)
    #print clf.coef_
    #print clf.intercept_

    return clf.intercept_, clf.coef_
Beispiel #30
0
class SGDPlainNystromRegressor:
    def __init__(self,
                 kernel: str = 'rbf',
                 m: int = 100,
                 lambda_reg: int = 0,
                 **kwargs):
        self.projector = PlainNystrom(kernel=kernel, m=m)
        self.lambda_reg = lambda_reg
        self.coeffs = None
        self.regressor = SGDRegressor(fit_intercept=False, **kwargs)
        self.kwargs = kwargs

    def fit(self, X: np.ndarray, y: np.array = None, **kwargs):
        k_nm = self.projector.fit_transform(X=X, y=y, **kwargs)
        self.regressor.fit(k_nm, y)
        return self

    def predict(self, X):
        projection = self.projector.transform(X=X)
        return self.regressor.predict(projection)
Beispiel #31
0
    def fit(self, U, Y):
        self.initialize()
        #learn X
        #X = self.getX(U,Y)
        X = self.getXBatched(U,Y,TSData.batchSize)
        print("Starting to train the model...")

        #clf = ElasticNet(alpha=5,l1_ratio=0.5,max_iter=50000)
        #for x1,y1 in izip(X,Y):
        #    clf.partial_fit(x1[np.newaxis,:], y1)
        #If not using generator
        X = np.array([i for i in X])
        #X = np.array(X)
        print(X.shape)
        print(Y.shape)
        clf = SGDRegressor(n_iter=100)
        clf.fit(X,np.ravel(Y))
        print(metrics.mean_absolute_error(clf.predict(X),Y))
        print(TSData().getScore(Y, clf.predict(X)))
        self.clf = clf
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """

    clf = SGDRegressor(n_iter=20)
    results = clf.fit(features, values)
    intercept = results.intercept_
    params = results.coef_

    return intercept, params
Beispiel #33
0
def test_not_robust_regression(loss, weighting):
    reg = RobustWeightedRegressor(
        loss=loss,
        max_iter=100,
        weighting=weighting,
        k=0,
        c=1e7,
        burn_in=0,
        random_state=rng,
    )
    reg_not_rob = SGDRegressor(loss=loss, random_state=rng)
    reg.fit(X_r, y_r)
    reg_not_rob.fit(X_r, y_r)
    pred1 = reg.predict(X_r)
    pred2 = reg_not_rob.predict(X_r)
    difference = [
        np.linalg.norm(pred1[i] - pred2[i]) for i in range(len(pred1))
    ]
    assert np.mean(difference) < 1
    assert_almost_equal(reg.score(X_r, y_r), r2_score(y_r, reg.predict(X_r)))
Beispiel #34
0
def test_huber_and_sgd_same_results():
    # Test they should converge to same coefficients for same parameters

    X, y = make_regression_with_outliers(n_samples=10, n_features=2)

    # Fit once to find out the scale parameter. Scale down X and y by scale
    # so that the scale parameter is optimized to 1.0
    huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100,
                           epsilon=1.35)
    huber.fit(X, y)
    X_scale = X / huber.scale_
    y_scale = y / huber.scale_
    huber.fit(X_scale, y_scale)
    assert_almost_equal(huber.scale_, 1.0, 3)

    sgdreg = SGDRegressor(
        alpha=0.0, loss="huber", shuffle=True, random_state=0, max_iter=10000,
        fit_intercept=False, epsilon=1.35, tol=None)
    sgdreg.fit(X_scale, y_scale)
    assert_array_almost_equal(huber.coef_, sgdreg.coef_, 1)
Beispiel #35
0
def test_huber_and_sgd_same_results():
    # Test they should converge to same coefficients for same parameters

    X, y = make_regression_with_outliers(n_samples=10, n_features=2)

    # Fit once to find out the scale parameter. Scale down X and y by scale
    # so that the scale parameter is optimized to 1.0
    huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100,
                           epsilon=1.35)
    huber.fit(X, y)
    X_scale = X / huber.scale_
    y_scale = y / huber.scale_
    huber.fit(X_scale, y_scale)
    assert_almost_equal(huber.scale_, 1.0, 3)

    sgdreg = SGDRegressor(
        alpha=0.0, loss="huber", shuffle=True, random_state=0, max_iter=10000,
        fit_intercept=False, epsilon=1.35, tol=None)
    sgdreg.fit(X_scale, y_scale)
    assert_array_almost_equal(huber.coef_, sgdreg.coef_, 1)
Beispiel #36
0
def perform_sgd_regression(features, values):
    
    clf = SGDRegressor(n_iter=20)
    clf = clf.fit(features, values)
    intercept = clf.intercept_
    params = clf.coef_
    print "intercept:"
    print intercept
    print "params:"
    for i in range(len(params)):
        print "%s: %f" %(features.columns.values[i], params[i])
Beispiel #37
0
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """

    model = SGDRegressor()
    sgd = model.fit(features, values)
    intercept = sgd.intercept_
    params = sgd.coef_

    return intercept, params
def early_stoping(X, y):
    from sklearn.base import clone
    from sklearn.linear_model import SGDRegressor
    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import train_test_split
    # 当warm_start=True时,调用fit()方法后,训练会从停下来的地方继续,而不是从头重新开始。
    sgd_reg = SGDRegressor(max_iter=1, warm_start=True, penalty=None, learning_rate="constant", eta0=0.0005)
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
    minimum_val_error = float("inf")
    best_epoch = None
    best_model = None
    for epoch in range(1000):
        sgd_reg.fit(X_train,  y_train.ravel())
        y_val_predict = sgd_reg.predict(X_val)
        val_error = mean_squared_error(y_val_predict, y_val)
        if val_error < minimum_val_error:
            minimum_val_error = val_error
            best_epoch = epoch
            best_model = clone(sgd_reg)
    print('stopping in:', best_epoch)
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    
    ###########################
    ### YOUR CODE GOES HERE ###
    ###########################

    clf = SGDRegressor(alpha=0.1, n_iter=20)
    clf.fit(features, values)
    params = clf.get_params()
    intercept = 0

    #print params
    #print len(features[0]), len(clf.coef_)
    #print clf.coef_
    #print clf.intercept_

    return clf.intercept_, clf.coef_
def sgd_test():
    # 获取数据
    lb = load_boston()
    # 分割数据集到训练集和测试集
    x_train, x_test, y_train, y_test = train_test_split(lb.data,
                                                        lb.target,
                                                        test_size=0.25)
    print(y_train, '\n', y_test)

    # 进行数据标准化处理(特征值与目标值都得标准化-目标值根据特征值求出[参考公式]) [两个标准化api:特征值[多列]/目标值[因为只有一列]]
    # 特征值 标准化
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)

    # 目标值 标准化
    std_y = StandardScaler()
    # 参数得是二维
    y_train = std_y.fit_transform(y_train.reshape(-1, 1))
    y_test = std_y.transform(y_test.reshape(-1, 1))

    # 梯度下降进行预测
    sg = SGDRegressor()

    sg.fit(x_train, y_train)

    print(sg.coef_)

    # 进行预测
    x_predict_res = sg.predict(x_test)
    # 将标准化结果反转为非标准化之前的
    stand_pre = std_y.inverse_transform(x_predict_res)
    print(stand_pre)

    # 真实样本 分割数据集的预测结果 与 梯度下降的结果
    print(
        '均方误差测试:\n',
        mean_absolute_error(std_y.inverse_transform(y_test),
                            std_y.inverse_transform(x_predict_res)))

    pass
Beispiel #41
0
def train(training_pandas_data, test_pandas_data, label_col, 
          feat_cols, alpha, l1_ratio, max_iter, tol, training_data_path, test_data_path):

    print("train:         " + training_data_path)
    print("test:          " + test_data_path)
    print("alpha:        ", alpha)
    print("l1-ratio:     ", l1_ratio)
    print("max_iter:     ", max_iter)
    print("tol:     ", tol)
    print("label-col:     " + label_col)
    for col in feat_cols:
        print("feat-cols:     " + col)

    # Split data into training labels and testing labels.
    trainingLabels = training_pandas_data[label_col].values
    trainingFeatures = training_pandas_data[feat_cols].values

    testLabels = test_pandas_data[label_col].values
    testFeatures = test_pandas_data[feat_cols].values

    #We will use an SGD model.
    en = SGDRegressor(alpha=alpha, l1_ratio=l1_ratio, warm_start=True, max_iter=max_iter, tol=tol)

    # Here we train the model.
    en.fit(trainingFeatures, trainingLabels)

    # Calculating the scores of the model.
    test_rmse = mean_squared_error(testLabels, en.predict(testFeatures))**0.5
    r2_score_training = en.score(trainingFeatures, trainingLabels)
    r2_score_test = en.score(testFeatures, testLabels)
    print("Test RMSE:", test_rmse)
    print("Training set score:", r2_score_training)
    print("Test set score:", r2_score_test)

    #Logging the RMSE and r2 scores.
    mlflow.log_metric("Test RMSE", test_rmse)
    mlflow.log_metric("Train R2", r2_score_training)
    mlflow.log_metric("Test R2", r2_score_test)

    #Saving the model as an artifact.
    sklearn.log_model(en, "model")
Beispiel #42
0
def main():
    vitals = ['LABEL_RRate', 'LABEL_ABPm', 'LABEL_SpO2', 'LABEL_Heartrate']

    features_train = pd.read_csv('data/train_engineered_4.csv')
    labels_train = pd.read_csv('data/train_labels.csv')
    features_predict = pd.read_csv('data/test_engineered_4.csv')

    # set reduced_size  to reduce batch size
    reduced_size = len(features_predict)
    # reduced_size = 800

    prediction = pd.DataFrame(features_predict['pid']).iloc[0:reduced_size,:]
    metrics_summary = pd.DataFrame(columns=vitals)
    hyperparams = pd.DataFrame(columns=vitals)

    for label in vitals:
        X_train = np.array(features_train)[0:reduced_size]
        y_train = np.array(labels_train[label])[0:reduced_size]
        X_predict = np.array(features_predict)[0:reduced_size]

        # scaling data
        scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_train)
        X_predict = scaler.fit_transform(X_predict)

        model = SGDRegressor(penalty='elasticnet', alpha=0.05, l1_ratio=0.1)

        print()
        print('learning : ', label)
        model.fit(X_train, y_train)

        #predict on the provided test set
        y_predicted = model.predict(X_predict)
        # y_predicted = grid.predict(X_predict)

        prediction[label] = y_predicted


    print(prediction)

    return prediction
Beispiel #43
0
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    y = values
    X = features
    clf = SGDRegressor(n_iter=100)
    result = clf.fit(X, y)
    params = result.coef_
    intercept = result.intercept_

    return intercept, params
Beispiel #44
0
def SGD_boston():
    boston = load_boston()
    x = boston.data
    y = boston.target
    train_x, test_x, train_y, test_y = \
        train_test_split(x, y, test_size=.25)
    std_s = StandardScaler()
    train_x = std_s.fit_transform(train_x)
    test_x = std_s.fit_transform(test_x)

    sgd = SGDRegressor()
    sgd.fit(train_x, train_y)
    score = sgd.score(test_x, test_y)
    predict_y = sgd.predict(test_x)
    print(score)
    print(predict_y[:20])
    print(test_y[:20])
    # print(sgd.coef_)
    # print(sgd.intercept_)

    return None
Beispiel #45
0
def do_lreg_training_runs(d, cfg):
    times = np.array([])

    for i in range(cfg.num_runs):
        print('***Run #%d***' % i)
        start = time.perf_counter()

        model = SGDRegressor(
            eta0=cfg.learning_rate,  #learning rate bias
            max_iter=cfg.epochs,
            random_state=42)
        model.fit(d.x_train, d.y_train)

        elapsed = time.perf_counter() - start
        times = np.append(times, elapsed)

        trace_model_filename = f'{model_filename(cfg, i)}.joblib'
        dump(model, trace_model_filename)
        print(f"#### dump model [{trace_model_filename}]")

    print_times(times)
Beispiel #46
0
def test_regression():
    torch.set_default_tensor_type('torch.DoubleTensor')

    net_ctor = lambda: N.Linear(13, 1)
    loss = F.mse_loss

    # Supports fit, predict, and score
    x, y = load_boston(return_X_y=True)
    model = TorchEstimator(net_ctor, loss, opt_ctor='Adam', lr=1e-3)
    model.fit(x, y, epochs=5)
    model.predict(x)
    model.score(x, y)

    # Comparable to sklearn linear regression
    theirs = SGDRegressor(max_iter=5, eta0=1e-3)
    theirs.fit(x, y)
    h_theirs = theirs.predict(x)
    h_ours = model.predict(x)
    mse_theirs = mean_squared_error(y, h_theirs)
    mse_ours = mean_squared_error(y, h_ours)
    assert mse_ours < mse_theirs  # torch is better than sklearn by a lot
Beispiel #47
0
def test_mbsgd_regressor_default(datatype, nrows,
                                 column_info):
    ncols, n_info = column_info
    X, y = make_regression(n_samples=nrows, n_features=ncols,
                           n_informative=n_info, random_state=0)
    X = X.astype(datatype)
    y = y.astype(datatype)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
                                                        random_state=0)

    cu_mbsgd_regressor = cumlMBSGRegressor()
    cu_mbsgd_regressor.fit(X_train, y_train)
    cu_pred = cu_mbsgd_regressor.predict(X_test).to_array()
    cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype)

    if nrows < 500000:
        skl_sgd_regressor = SGDRegressor()
        skl_sgd_regressor.fit(X_train, y_train)
        skl_pred = skl_sgd_regressor.predict(X_test)
        skl_r2 = r2_score(skl_pred, y_test, convert_dtype=datatype)
        assert abs(cu_r2 - skl_r2) <= 0.02
Beispiel #48
0
class TestingExercise3_06(unittest.TestCase):
    def setUp(self) -> None:
        ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

        self.data = pd.read_csv(
            os.path.join(ROOT_DIR, '..', 'Datasets', 'synth_temp.csv'))

    def test_SGD(self):
        self.data = self.data.loc[self.data.Year > 1901]
        self.data_group_year = self.data.groupby(['Year'
                                                  ]).agg({'RgnAvTemp': 'mean'})
        self.data_group_year['Year'] = self.data_group_year.index
        self.data_group_year = self.data_group_year.rename(
            columns={'RgnAvTemp': 'AvTemp'})
        self.X_min = self.data_group_year.Year.min()
        self.X_range = self.data_group_year.Year.max(
        ) - self.data_group_year.Year.min()
        self.Y_min = self.data_group_year.AvTemp.min()
        self.Y_range = self.data_group_year.AvTemp.max(
        ) - self.data_group_year.AvTemp.min()
        self.scale_X = (self.data_group_year.Year - self.X_min) / self.X_range
        self.train_X = self.scale_X.ravel()
        self.train_Y = ((self.data_group_year.AvTemp - self.Y_min) /
                        self.Y_range).ravel()
        np.random.seed(42)
        self.model = SGDRegressor(loss='squared_loss',
                                  max_iter=100,
                                  learning_rate='constant',
                                  eta0=0.0005,
                                  tol=0.00009,
                                  penalty='none')
        self.model.fit(self.train_X.reshape((-1, 1)), self.train_Y)
        self.Beta0 = (
            self.Y_min + self.Y_range * self.model.intercept_[0] -
            self.Y_range * self.model.coef_[0] * self.X_min / self.X_range)
        self.Beta1 = self.Y_range * self.model.coef_[0] / self.X_range
        self.pred_X = self.data_group_year['Year']
        self.pred_Y = self.model.predict(self.train_X.reshape((-1, 1)))
        self.r2 = r2_score(self.train_Y, self.pred_Y)
        self.assertEqual(round(self.r2, 3), (0.544))
Beispiel #49
0
def myLinear():
    """ 线性回归直接预测房子价格 """
    # 获取数据
    lb = load_boston()
    # 分割数据集到训练集和测试集
    x_train, x_test, y_train, y_test = train_test_split(lb.data,
                                                        lb.target,
                                                        test_size=0.25)

    # 进行标准化处理, 目标值需要进行标准化处理吗
    # 实例化两个标准化API
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.fit_transform(x_test)
    # 目标值
    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train)
    y_test = std_y.fit_transform(y_test)
    # estimator预测
    # 正规方程求解1
    lr = LinearRegression()
    lr.fit(x_train, y_train)
    # 权重参数
    print(lr.coef_)
    # 预测价格
    y_lr_predict = std_y.inverse_transform(lr.predict(x_test))
    print('测试集里面每个房子的预测价格: ', y_lr_predict)
    print('正规方程的均方误差:',
          mean_squared_error(std_y.inverse_transform(y_test), y_lr_predict))
    # 梯度下降进行房子价格预测
    sgd = SGDRegressor()
    sgd.fit(x_train, y_train)
    print(sgd.coef_)
    # 预测测试集的价格
    y_sgd_predict = std_y.inverse_transform(sgd.predict(x_test))
    print('测试集里面每个房子的预测价格: ', y_sgd_predict)
    print('梯度下降的均方误差:',
          mean_squared_error(std_y.inverse_transform(y_test), y_sgd_predict))

    return None
def line_lineregression():
    # 读取并分割数据
    data = load_boston()
    x_train, x_test, y_train, y_test = train_test_split(data.data,
                                                        data.target,
                                                        test_size=0.25)

    # 标准化(与分类问题有所不同,目标值也要标准化,在最终显示时,可用obj.inverse_transform()反标准化)
    std1 = StandardScaler()
    std2 = StandardScaler()

    # 对特征值标准化 -- fit_transform()及transform()的使用可参考 "0.0.训练集、测试集的特征工程.py"
    x_train = std1.fit_transform(x_train)
    x_test = std1.transform(x_test)

    # 对目标值标准化
    y_train = std2.fit_transform(y_train.reshape((-1, 1)))  # 由一维变二维,算法要求的格式
    # y_test = std2.transform(y_test.reshape((-1,1)))  # 同上

    # 实例化预预估器
    lr_1 = LinearRegression()
    sgd_1 = SGDRegressor()  # 学习率默认

    # 1.正规方程求解
    lr_1.fit(x_train, y_train)  # 训练模型
    print(lr_1.coef_)  # 训练得到的模型参数
    result_1 = lr_1.predict(x_test)  # 得到的预测值
    result_1 = std2.inverse_transform(result_1)  # 反标准化
    print("正规求解预测房价为:", result_1)
    print("正规求解均方误差为:", mean_squared_error(y_test, result_1))

    print("*" * 30)

    # 2.梯度下降求解
    sgd_1.fit(x_train, y_train)
    print(sgd_1.coef_)
    result_2 = sgd_1.predict(x_test)
    result_2 = std2.inverse_transform(result_2)  # 反标准化
    print("梯度下降求解预测房价为:", result_2)
    print("梯度下降求解均方误差为:", mean_squared_error(y_test, result_2))
Beispiel #51
0
def get_sgd(X_train, X_test, y_train, y_test):
    temp_max_itr = 100000
    dest_eta = 1e-5
    dest_tol = 1e-3
    temp_coef = 0.01
    dest_coef = temp_coef
    dest_intercept = 0.0
    max = -1000
    #    mode = 'w'
    #    cnt = 1
    while temp_coef <= 2.0:
        temp_intercept = 0.0
        while temp_intercept <= 50.0:
            sgd = SGDRegressor(random_state=15,
                               max_iter=temp_max_itr,
                               eta0=dest_eta,
                               tol=dest_tol,
                               n_iter_no_change=6)
            sgd.fit(X_train,
                    y_train,
                    coef_init=temp_coef,
                    intercept_init=temp_intercept)
            scr = sgd.score(X_test, y_test)
            #Checking if scored more than previous max score
            if max < scr:
                max = scr
                dest_coef = temp_coef
                dest_intercept = temp_intercept
#            if cnt > 1 :
#                mode = 'a'
#            cnt += 1
#            write_to_file(scr,dest_coef, dest_intercept, mode)
            temp_intercept += 1.0
        temp_coef += 0.1
    sgd1 = SGDRegressor(random_state=15,
                        max_iter=temp_max_itr,
                        eta0=dest_eta,
                        tol=dest_tol,
                        n_iter_no_change=6)
    return sgd1, dest_coef, dest_intercept
Beispiel #52
0
class SGDRegressionModel(RegressionModel):
	def __init__(self, train_data):
		RegressionModel.__init__(self, train_data)
		self.model = SGDRegressor()

	def train(self, x=None, y=None):
		x = x if x is not None else self.train_x
		y = y if y is not None else self.train_y

		self.model.fit(x, y)

	def predict(self, x_in):
		return self.model.predict(x_in)

	def evaluate(self, x_in, y_out):
		return self.model.score(x_in, y_out)

	def save(self, filename):
		joblib.dump(self.model, filename)

	def load(self, filename):
		self.model = joblib.load(filename)
Beispiel #53
0
def test_sklreandata():
    x, y = data_xy(sklearn_regdata=True)
    # My reg
    lr = SGDLinear_reg(100, eta=0.01, batch_size=10)
    lr.fit(x, y)
    lr.plot_train_loss()

    ## sklearn sgd
    sgd_reg = SGDRegressor(max_iter = 100,   # 迭代次数
                        penalty = None, # 正则项为空
                        eta0 = 0.01,      # 学习率
                        early_stopping=True
                        )
    sgd_reg.fit(x,y)

    ## sklearn reg
    lrskt = LinearRegression(fit_intercept=True)
    lrskt.fit(x, y)

    print(f'my linear loss: {lr.cost(y, lr.predict(x)):.2f}')
    print(f'sklearn loss: {lr.cost(y, lrskt.predict(x)):.2f}')
    print(f'sklearn sgd loss: {lr.cost(y, sgd_reg.predict(x)):.2f}')
Beispiel #54
0
def test_mbsgd_regressor(lrate, penalty, make_dataset):
    nrows, datatype, X_train, X_test, y_train, y_test = make_dataset

    cu_mbsgd_regressor = cumlMBSGRegressor(learning_rate=lrate, eta0=0.005,
                                           epochs=100, fit_intercept=True,
                                           batch_size=2, tol=0.0,
                                           penalty=penalty)

    cu_mbsgd_regressor.fit(X_train, y_train)
    cu_pred = cu_mbsgd_regressor.predict(X_test).to_array()
    cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype)

    if nrows < 500000:
        skl_sgd_regressor = SGDRegressor(learning_rate=lrate, eta0=0.005,
                                         max_iter=100, fit_intercept=True,
                                         tol=0.0, penalty=penalty,
                                         random_state=0)

        skl_sgd_regressor.fit(X_train, y_train)
        skl_pred = skl_sgd_regressor.predict(X_test)
        skl_r2 = r2_score(skl_pred, y_test, convert_dtype=datatype)
        assert abs(cu_r2 - skl_r2) <= 0.02
Beispiel #55
0
def runSGD(X_train, X_test, y_train, y_test,dataname):
    all_epsilon = [0.001, 0.1 ,0.5, 0.9]
    best_model=None
    max_score=0
    for epsilon in all_epsilon:
        regressor = SGDRegressor(loss='epsilon_insensitive',epsilon=epsilon)
        regressor.fit(X_train, y_train)

        y_pred = regressor.predict(X_test)
        # plt.show()
        plt.scatter(y_test, y_pred)
        plt.plot([y_test.min(), y_test.max()], [y_pred.min(), y_pred.max()], 'r', lw=2)
        score = regressor.score(X_test, y_test)
        if score>max_score:
            best_model=regressor
        plt.title('SGD - {0}\n epsilon ={1} \nScore = {2:.3f} '.format(str(dataname),epsilon, score))
        plt.xlabel('Actual ')
        plt.ylabel('Predict')
        # plt.show()
        plt.savefig('runSGD_{}_{}.png'.format(strftime("%H_%M_%S", gmtime()),epsilon))
        plt.close()
    return best_model
Beispiel #56
0
def regularization():
	"""Plot error vs iterations."""
	alphaList = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
	xTrain = db.load('train', 'data')[:, 1:].astype(int)
	yTrain = np.squeeze(db.load('train', 'y')[:, 1:])
	xValid = db.load('valid', 'data')[:, 1:].astype(int)
	yValid = np.squeeze(db.load('valid', 'y')[:, 1:])
	eTrain = np.zeros(len(alphaList))
	eValid = np.zeros(len(alphaList))
	for i in range(len(alphaList)):
		for _ in range(5):
			model = SGDRegressor(penalty='l2', learning_rate='constant', eta0=0.006,
				max_iter=100)
			model.fit(xTrain, yTrain)
			eTrain[i] += np.sqrt(mean_squared_error(yTrain, model.predict(xTrain)))
			eValid[i] += np.sqrt(mean_squared_error(yValid, model.predict(xValid)))
		eTrain[i] /= 5
		eValid[i] /= 5
	plt.semilogx(alphaList, eTrain, label='Training')
	plt.semilogx(alphaList, eValid, label='Validation')
	plt.legend()
	plt.show()
Beispiel #57
0
def linear():
    """
    线性回归直接预测房子价格
    :return:
    """
    lb=load_boston()
    #分割数据集到训练集和测试集
    x_train,x_test,y_train,y_test=train_test_split(lb.data,lb.target,test_size=0.25)
    #进行标准化处理
    #必须对特征值和目标值进行标准化处理,实例化两个标准化API
    std_x=StandardScaler()
    x_train=std_x.fit_transform(x_train)
    x_test=std_x.transform(x_test)
    #目标值
    std_y=StandardScaler()
    #sklearn0.19版本传进的必须是二维数组reshape(-1,1),由于样本数不知道,则直接填-1,目标值只有一个
    y_train=std_y.fit_transform(y_train.reshape(-1,1))
    y_test=std_y.transform(y_test.reshape(-1,1).reshape(-1,1))
    print(y_train)
    #estimator预测
    #正规方程求解方式预测结果
    lr=LinearRegression()
    lr.fit(x_train,y_train)
    print(lr.coef_)
    #预测测试集的房子价格
    y_predict=lr.predict(x_test)
    #转化回标准化之前的数据形式
    y_predict=std_y.inverse_transform(y_predict)
    print("正规方程预测每个房子的价格:",y_predict)
    print("正规方程的均方误差:",mean_squared_error(std_y.inverse_transform(y_test),y_predict))
    #梯度下降
    sgd=SGDRegressor()
    sgd.fit(x_train,y_train)
    print(sgd.coef_)
    #岭回归
    #alpha正则化力度
    rd=Ridge(alpha=1.0)
    rd.fit(x_train,y_train)
    print(rd.coef_)
def Linear():
    # 获取数据
    lb = load_boston()
    # 分割数据---将数据分割成为“训练集”和“测试集”  返回结果 (训练集、测试集的特征值),(训练集、测试集的目标值)
    x_train, x_test, y_train, y_test = tts(lb.data, lb.target, test_size=0.25)
    # print(y_train, 'weqw\n', y_test)
    # 标准化处理(特征值和目标值都要标准化处理) 要实例化2个的api
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)

    std_y = StandardScaler()
    # 要转化为 二维数组
    y_train = std_y.fit_transform(y_train.reshape(-1, 1))
    y_test = std_y.transform(y_test.reshape(-1, 1))

    # 正规方程
    lr = LinearRegression()
    lr.fit(x_train, y_train)
    print("LR各个特征的权重", lr.coef_)
    predict = lr.predict(x_test)
    # 要进行反标准化得到值
    print("LR预测测试集的房子价格", std_y.inverse_transform(predict))
    # 参数是 真实值 和预测值
    print(
        "正规方程的均方误差",
        err(std_y.inverse_transform(y_test), std_y.inverse_transform(predict)))
    # 梯度下降
    SGD = SGDRegressor()
    SGD.fit(x_train, y_train)
    print("SGD各个特征的权重", SGD.coef_)
    _predict = SGD.predict(x_test)
    # 要进行反标准化得到值
    print("SGD预测测试集的房子价格", std_y.inverse_transform(_predict))
    print(
        "梯度下降的均方误差",
        err(std_y.inverse_transform(y_test),
            std_y.inverse_transform(_predict)))
    return None
def linear_model2():
    #1.
    data = load_boston()
    #2.
    x_train, x_test, y_train, y_test = train_test_split(data.data,
                                                        data.target,
                                                        test_size=0.2,
                                                        random_state=22)
    #3.
    Stan = StandardScaler()
    x_train = Stan.fit_transform(x_train)
    x_test = Stan.fit_transform(x_test)

    #4
    linear = SGDRegressor(max_iter=1000)  #最大迭代次数
    linear.fit(x_train, y_train)

    #5
    ss = linear.predict(x_test)
    #print('梯度下降预测值是:',ss)
    print('梯度下降均方误差:', mean_squared_error(y_test, ss))
    print('梯度下降系数:', linear.coef_)
Beispiel #60
0
def test_both_fit_and_score_contain_sample_weight(sample_weight_passed_as):
    mlflow.sklearn.autolog()

    from sklearn.linear_model import SGDRegressor

    # ensure that we use an appropriate model for this test
    assert "sample_weight" in _get_arg_names(SGDRegressor.fit)
    assert "sample_weight" in _get_arg_names(SGDRegressor.score)

    mock_obj = mock.Mock()

    def mock_score(self, X, y, sample_weight=None):  # pylint: disable=unused-argument
        mock_obj(X, y, sample_weight)
        return 0

    assert inspect.signature(
        SGDRegressor.score) == inspect.signature(mock_score)

    SGDRegressor.score = mock_score
    model = SGDRegressor()
    X, y = get_iris()
    sample_weight = abs(np.random.randn(len(X)))

    with mlflow.start_run() as run:
        if sample_weight_passed_as == "positional":
            model.fit(X, y, None, None, sample_weight)
        elif sample_weight_passed_as == "keyword":
            model.fit(X, y, sample_weight=sample_weight)
        mock_obj.assert_called_once_with(X, y, sample_weight)

    run_id = run.info.run_id
    params, metrics, tags, artifacts = get_run_data(run_id)
    assert params == truncate_dict(
        stringify_dict_values(model.get_params(deep=True)))
    assert {TRAINING_SCORE: model.score(X, y)}.items() <= metrics.items()
    assert tags == get_expected_class_tags(model)
    assert MODEL_DIR in artifacts
    assert_predict_equal(load_model_by_run_id(run_id), model, X)