Example #1
0
def poly_regression_lgbmr(X_train_confirmed, y_train_confirmed, future_forcast):
    linear_model = LGBMRegressor(n_estimators=1300)
    linear_model.fit(X_train_confirmed, y_train_confirmed)
    #linear_model = LinearRegression(normalize=True, fit_intercept=False)
    #linear_model.fit(X_train_confirmed, y_train_confirmed)
    #valid_linear_pred = linear_model.predict(X_valid_confirmed)
    poly_pred_future_forcast = linear_model.predict(future_forcast)
    #poly_pred_future_forcast = np.exp(poly_pred_future_forcast)
    #print('MAE:', mean_absolute_error(y_valid_confirmed, valid_linear_pred))
    #print('MSE:',mean_squared_error(y_valid_confirmed, valid_linear_pred))
    return poly_pred_future_forcast
Example #2
0
    def getModel_deprecated(self, x_train, y_train, x_test, y_test):
        # use the validate process to pick the most important 10 linear features
        # scale the data
        min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
        scalar = min_max_scaler.fit(x_train)
        x_train_scaled = scalar.transform(x_train)
        # iterate through all models
        selected_features = []
        selected_poly = False
        min_mse = 99999
        selected_model_name = ''
        for model_name, model in PModel.CANDIDATE_MODELS.items():
            rfe = RFE(model, 5)
            fit = rfe.fit(x_train_scaled, y_train)
            # get the feature names:
            feature_names = list(
                filter(lambda f: fit.ranking_[self.features.index(f)] == 1,
                       self.features))
            selected_x_train = x_train[feature_names]
            selected_x_train_poly = PolynomialFeatures(
                degree=2).fit_transform(selected_x_train)
            selected_x_test_poly = PolynomialFeatures(degree=2).fit_transform(
                self.x_test[feature_names])
            # train 1st order
            linear_model = clone(model)
            linear_model.fit(selected_x_train, y_train)
            diff, mse = self.validate(self.x_test[feature_names], linear_model)
            if mse < min_mse:
                selected_features = feature_names
                selected_poly = False
                selected_model_name = model_name
                min_mse = mse
                self.model = linear_model
            # train the 2nd order
            high_model = clone(model)
            high_model.fit(selected_x_train_poly, y_train)
            diff, mse = self.validate(selected_x_test_poly, high_model)
            if mse < min_mse:
                selected_poly = True
                min_mse = mse
                self.model = high_model

        # set all members
        self.polyFeature = selected_poly
        self.features = selected_features
        self.modelType = selected_model_name
        if selected_poly:
            self.x_test_selected = selected_x_test_poly
        else:
            self.x_test_selected = self.x_test[selected_features]
Example #3
0
def poly_regression_2(X_train_confirmed, y_train_confirmed, future_forcast, country_cases):
    poly = PolynomialFeatures(degree=8)
    poly_X_train_confirmed = poly.fit_transform(X_train_confirmed)
    #poly_X_valid_confirmed = poly.fit_transform(X_valid_confirmed)
    poly_future_forcast = poly.fit_transform(future_forcast)
    linear_model = make_pipeline(PolynomialFeatures(2), HuberRegressor())
    #linear_model = LinearRegression(normalize=True, fit_intercept=False)
    linear_model.fit(X_train_confirmed, y_train_confirmed)
    #valid_linear_pred = linear_model.predict(X_valid_confirmed)
    poly_pred_future_forcast = linear_model.predict(future_forcast)
    print('MSE:',mean_squared_error(country_cases[:len(future_forcast)], poly_pred_future_forcast))
    #print('MAE:', mean_absolute_error(y_valid_confirmed, valid_linear_pred))
    #print('MSE:',mean_squared_error(y_valid_confirmed, valid_linear_pred))
    return poly_pred_future_forcast
Example #4
0
def poly_regression_regr(X_train_confirmed, country_cases, future_forcast):
    linear_model = regr(n_jobs=-1)
    estimators=100
    scores=[]
    linear_model.set_params(n_estimators=estimators)
    linear_model.fit(X_train_confirmed, country_cases)
    #scores.append(linear_model.score(X_test, y_test))
    #linear_model = LinearRegression(normalize=True, fit_intercept=False)
    #linear_model.fit(X_train_confirmed, y_train_confirmed)
    poly_pred_future_forcast = linear_model.predict(future_forcast)
    #poly_pred_future_forcast = np.exp(poly_pred_future_forcast)
    #print('MAE:', mean_absolute_error(y_valid_confirmed, valid_linear_pred))
    #print('MSE:',mean_squared_error(y_valid_confirmed, valid_linear_pred))
    return poly_pred_future_forcast
Example #5
0
def linear_regression(train_features, train_targets, test_features, test_targets):
    # Train
    linear_model = sklearn.linear_model.LinearRegression()
    linear_model.fit(train_features, train_targets)
    predict_targets = linear_model.predict(test_features)
    n_test_sample = len(test_targets)
    X = range(n_test_sample)
    # validation
    error = numpy.linalg.norm(predict_targets - test_targets, ord = 1) / n_test_sample
    print "Linear Regression Error: %.2f" % (error)
    #Draw
    plot.plot(X, predict_targets, 'r--', label='Predict Price')
    plot.plot(X, test_targets, 'g', label='True Price')
    legend = plot.legend()
    plot.title('Linear Regression')
    plot.show()
Example #6
0
 def linear_regression(self):
     linear_model = LinearRegression()
     linear_model.fit(self.x_train, self.y_train)
     y_pred_test = linear_model.predict(self.x_test)
     print('Linear Regression')
     print(self.big_line)
     print('Mean squared error (MSE): %.2f' %
           mean_squared_error(self.y_test, y_pred_test))
     print('Mean absolute error (MSE): %.2f' %
           mean_absolute_error(self.y_test, y_pred_test))
     print('Coefficient of determination (R^2): %.2f' %
           r2_score(self.y_test, y_pred_test))
     total_correct = 0
     for i in range(len(y_pred_test)):
         if abs(y_pred_test[i] - self.y_test[i]) <= 1:
             total_correct += 1
     print(
         f'Percent Correct within 1 Yard: {total_correct/len(y_pred_test) * 100}%'
     )
     print()
    def SVRlinear(self, normalization, df, testSize):
        X, y = df.drop(['Project', 'ActualDuration'],
                       axis=1), df['ActualDuration'].astype('int')
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=testSize)

        linear_model = svm.SVR(kernel="linear")
        linear_model.fit(X_train, y_train)
        train_prediction = linear_model.predict(X_train)
        test_prediction = linear_model.predict(X_test)

        print("SVR linear prediction with " + str(normalization.percentageOfWeeksRegression*100) + "% of the data in each of the " + str(len(normalization.allProjectDataFrames)) + \
            " projects, \nand a train/test ratio of " + str(int((1-testSize)*100)) + "/" + str(int(testSize*100)) + " resulted in:")
        print("MAE:\t{0:.3f}".format(
            metrics.mean_absolute_error(y_test, test_prediction)))
        print("RMSE:\t{0:.3f}".format(
            metrics.mean_squared_error(y_test, test_prediction,
                                       squared=False)))
        print("R^2:\t{0:>1.3f}\n".format(
            metrics.r2_score(y_test, test_prediction)))
        return ""
Example #8
0
    def abs_reg_coeffs(self, linear_model):
        """ Method for calculating the top X variables by the absolute coefficient
        
        Args:
        df (Dataframe)
        target_var (String)
        k_features (Integer)
        linear_model (Class)
    
        Attributes:
        df:  Pandas dataframe containing the target and feature variables
        target_var:  The target variable
        k_features:  The number of features to return
        linear_model:  A Linear Model using sci-kit learn linear_model
        
        Returns:
        Pandas Dataframe with the top X variables by absolute coefficient size
        """

        df = self.df

        cat_features = df.loc[:, df.dtypes == object]

        if not cat_features.empty:
            df = self.prep_cat_vars(df)

        X = df.drop([self.target_var], axis=1)
        y = df[self.target_var]
        feat_labels = pd.DataFrame(X.columns)

        # fit the model
        lm = linear_model.fit(X, y)

        # get the coefficients and features into a data frame and create the rank
        lm_coeff = pd.DataFrame(lm.coef_).T
        feat_labels = pd.DataFrame(X.columns[1:])
        lm_reg_coeff = feat_labels.merge(lm_coeff,
                                         left_index=True,
                                         right_index=True)
        lm_reg_coeff.columns = ['features', 'coeff']
        lm_reg_coeff['coeff_abs'] = lm_reg_coeff['coeff'].abs()
        lm_reg_coeff.sort_values('coeff_abs', ascending=False, inplace=True)
        lm_reg_coeff['coeff_rank'] = range(1, len(lm_reg_coeff) + 1)

        lm_reg_coeff = lm_reg_coeff[lm_reg_coeff.coeff_rank <= self.k_features]

        return lm_reg_coeff
Example #9
0
### list the features you want to look at--first item in the 
### list will be the "target" feature
features_list = ["bonus", "salary"]
data = featureFormat( dictionary, features_list, remove_any_zeroes=True)
target, features = targetFeatureSplit( data )

### training-testing split needed in regression, just like classification
from sklearn.cross_validation import train_test_split
feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.5, random_state=42)
train_color = "b"
test_color = "r"



from sklearn import linear_model as lm
reg = lm.fit(features_train, target_train)




### draw the scatterplot, with color-coded training and testing points
import matplotlib.pyplot as plt
for feature, target in zip(feature_test, target_test):
    plt.scatter( feature, target, color=test_color ) 
for feature, target in zip(feature_train, target_train):
    plt.scatter( feature, target, color=train_color ) 

### labels for the legend
plt.scatter(feature_test[0], target_test[0], color=test_color, label="test")
plt.scatter(feature_test[0], target_test[0], color=train_color, label="train")
Example #10
0
linear_model.predict(train_features[:10])

linear_model.layers[1].kernel

"""### Training model configuration 
Mean absolute error will be optimized using Adam algorithm. Adam optimization is a stochastic gradient descent method that is based on adaptive estimation of first-order and second-order moments.
"""

linear_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

history = linear_model.fit(
    train_features, train_labels, 
    epochs=200,
    verbose=0,
    # Calculate validation results on 20% of the training data
    validation_split = 0.2)

hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.ylim([0, 900000])
  plt.xlabel('Epoch')
  plt.ylabel('Error [medianCompexValue]')
  plt.legend()
  plt.grid(True)
Example #11
0
sns.regplot(x="T2", y="T6", data=slr_df)

# In[47]:

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

# In[48]:

from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()

# In[49]:

model = linear_model.fit(df[['T2']], df.T6)

# In[51]:

from sklearn.model_selection import train_test_split

# model

# In[55]:

predicted_values = linear_model.predict(df[['T2']])

# In[56]:

from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(df[['T2']], predicted_values)
Example #12
0
inputs = StandardScaler().fit_transform(inputs)
output = StandardScaler().fit_transform(output)
'''

# create models
nn_model = sklearn.neural_network.MLPRegressor(solver='lbfgs',
                                               alpha=1e-5,
                                               hidden_layer_sizes=hl_size,
                                               activation=act)
linear_model = sklearn.linear_model.LinearRegression()
rtree = sklearn.tree.DecisionTreeRegressor(min_samples_leaf=10,
                                           max_features=0.50)

# fit models to data
nn_model.fit(inputs[train_indx, :], output[train_indx])
linear_model.fit(inputs[train_indx, :], output[train_indx])
rtree.fit(inputs[train_indx, :], output[train_indx])

# predict with model
nn_predicted = nn_model.predict(inputs[test_indx])
linear_predicted = linear_model.predict(inputs[test_indx])
rtree_predicted = rtree.predict(inputs[test_indx])

nn_known_predicted = nn_model.predict(inputs[train_indx])
linear_known_predicted = linear_model.predict(inputs[train_indx])
rtree_known_predicted = rtree.predict(inputs[train_indx])

# target vs predicted
plt.figure()
plt.plot(output[test_indx], nn_predicted, '.', label='Neural net', color='r')
plt.plot(output[test_indx],
Example #13
0
# In[10]:

from sklearn import linear_model

# Choosing the linear model
linear_model = linear_model.Lasso(alpha=0.01)

#Preparing the model by setting up target and features
features = [
    "Cement", "Blast_Furnace_Slag", "Fly_Ash", "Water", "Superplasticizer",
    "Coarse_Aggregate", "Fine_Aggregate", "Age"
]
target = "Concrete_Compressive_Strength"

# Fitting the linear model
linear_model.fit(df[features], df[target])

#Coefficients of the linear model
pd.DataFrame([dict(zip(features, linear_model.coef_))])

# ## 7.Generate predictions for all the observations and a scatterplot comparing the predicted compressive strengths to the actual values.
#

# In[11]:

preds = linear_model.predict(df[features])
predictions_df = df.assign(predictions=preds)
predictions_df[["Concrete_Compressive_Strength", "predictions"]]

# In[12]:
linear_Y = policy_claim_train_after_preprocessing["Next_Premium"]
linear_X = policy_claim_train_after_preprocessing[["Last_Renewal",
                                                   "竊盜", "車損", 
                                                   "女", "法人",
                                                   "國產車",
                                                   "lia_class", "plia_acc", "plia_acc", 
                                                   "claim", 
                                                   "Engine_Displacement_(Cubic_Centimeter)",
                                                   "Manafactured_Year_and_Month",
                                                   "age",
                                                   "Replacement_cost_of_insured_vehicle",
                                                   "Coverage_Deductible_if_applied",
                                                   "Insured_Amount2",
                                                   "Insured_Amount3"]]
linear_model = sm.OLS(linear_Y, linear_X)
result = linear_model.fit()
print(result.summary())

#                             OLS Regression Results                            
# ==============================================================================
# Dep. Variable:           Next_Premium   R-squared:                       0.541
# Model:                            OLS   Adj. R-squared:                  0.541
# Method:                 Least Squares   F-statistic:                 7.807e+04
# Date:                Sun, 09 Sep 2018   Prob (F-statistic):               0.00
# Time:                        17:16:27   Log-Likelihood:            -1.1010e+07
# No. Observations:             1061148   AIC:                         2.202e+07
# Df Residuals:                 1061132   BIC:                         2.202e+07
# Df Model:                          16                                         
# Covariance Type:            nonrobust                                         
# ==========================================================================================================
#                                              coef    std err          t      P>|t|      [0.025      0.975]
Example #15
0
                                      n_informative=2,
                                      n_targets=1,
                                      coef=True,
                                      random_state=1)
print('Actual coefficients: {}\n'.format(coef))

ones = np.ones((len(X), 1))
X = np.append(ones, X, axis=1)

X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    X, y, test_size=.2)

print('\nTraining data (X/y): \n{}\n{}\n'.format(X_train[:5], y_train[:5]))

linear_model = linear_model.LinearRegression()
linear_model.fit(X_train, y_train)

print('Coefficients (scikit-learn): {}\n'.format(linear_model.coef_))
print('Intercept (scikit-learn): {}\n'.format(linear_model.intercept_))
print('Accuracy (scikit-learn train): {}\n'.format(
    metrics.r2_score(y_train, linear_model.predict(X_train))))
print('Accuracy (scikit-learn test): {}\n'.format(
    metrics.r2_score(y_test, linear_model.predict(X_test))))

initial_weights = np.zeros(X_train.shape[1])
step_size = 1e-7
tolerance = 1e-3
weights = regression_gradient_descent(X_train, y_train, initial_weights,
                                      step_size, tolerance)

print('Coefficients (from scratch): {}\n'.format(weights))
Example #16
0
plt.xlabel('F (N)')
plt.ylabel('a (m/s2)')
plt.legend()
plt.title('Data = Signal + Noise')


# **The aim of machine learning is to find the signal**

# In[36]:


# We know the signal, let's see if ML can find it
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
linear_model.fit(F, a)


# In[37]:


# Signal vs. Fit
plt.figure(dpi=200)
plt.scatter(F, a, label='data')
plt.plot(F, F/m, label='signal', color='orange')
plt.plot(F, linear_model.predict(F), label='fit')

for i in range(len(F)):
    plt.plot([F[i], F[i]], [F[i]/m, a[i]], 'k--', label='noise' if i == 0 else None)

plt.xlabel('F (N)')
Example #17
0
X = data.X.values
X
X=X.reshape(-1,1)
X
y = data.Y.values
y=y.reshape(-1,1)
y

#%%%

from sklearn import linear_model


lm = linear_model.LinearRegression()
model1 = lm.fit(X, y)
print(model1)
model1.score(X,y)  #R2





#Coefficients
model1.coef_   #b1 coef
model1.intercept_ #b0 coef
y_pred1 = model1.predict(X)
y_pred1


Example #18
0
scoring = 'accuracy'
# Spot Check Algorithms

lr = LogisticRegression(C=6)  #42,44,44,45,45,45
#lr=SVC(C=6.0,kernel='linear')

#models.append(('SVM', SVC(C=6.0,kernel='linear'))) #43,43,43,43,43,43

# evaluate each model in turn

kfold = model_selection.KFold(n_splits=10, random_state=seed)
cv_results = model_selection.cross_val_score(lr,
                                             X_train,
                                             Y_train,
                                             cv=kfold,
                                             scoring=scoring)
msg = "logistic R: ortalama= %f standart sapma =(%f)\n" % (cv_results.mean(),
                                                           cv_results.std())
print(msg)

lr.fit(X_train, Y_train)
predictions = lr.predict(X_validation)
print("dogruluk oranı: ", accuracy_score(Y_validation, predictions))
print("karşılaştırma matrisi :\n ", confusion_matrix(Y_validation,
                                                     predictions))
print("sınıflandırma sonuçları:\n",
      classification_report(Y_validation, predictions))

prediction_video = lr.predict([ext])
print("video tahmin etiketi :", prediction_video)
#                      Test                                                     #
# -------------------------------------------------------------------------------


X, y, coef = datasets.make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, coef=True, random_state=1)
print('Actual coefficients: {}\n'.format(coef))

ones = np.ones((len(X),1))
X = np.append(ones, X, axis=1)

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=.2)

print('\nTraining data (X/y): \n{}\n{}\n'.format(X_train[:5], y_train[:5]))

linear_model = linear_model.LinearRegression()
linear_model.fit(X_train, y_train)

print('Coefficients (scikit-learn): {}\n'.format(linear_model.coef_))
print('Intercept (scikit-learn): {}\n'.format(linear_model.intercept_))
print('Accuracy (scikit-learn train): {}\n'.format(metrics.r2_score(y_train, linear_model.predict(X_train))))
print('Accuracy (scikit-learn test): {}\n'.format(metrics.r2_score(y_test, linear_model.predict(X_test))))

initial_weights = np.zeros(X_train.shape[1])
step_size = 1e-7
tolerance = 1e-3
weights = regression_gradient_descent(X_train, y_train, initial_weights, step_size, tolerance)

print('Coefficients (from scratch): {}\n'.format(weights))
print('Accuracy (from scratch train): {}\n'.format(metrics.r2_score(y_train, predict_output(X_train, weights))))
print('Accuracy (from scratch test): {}\n'.format(metrics.r2_score(y_test, predict_output(X_test, weights))))
Example #20
0
    #joining the data of two tables
    consolidated_data = pd.merge(country_satisfacton_value,
                                 coutry_gdp_values,
                                 on='Country')
    consolidated_data.columns = [
        'Country', 'Life Satisfaction', 'GDP per Capita'
    ]
    print(consolidated_data)
    return consolidated_data


consolidated_data = prepare_country_stats(oecd_bli, gdp_per_capita)

X = np.c_[consolidated_data["GDP per Capita"]]
Y = np.c_[consolidated_data["Life Satisfaction"]]

#Visualize the Data
consolidated_data.plot(kind='scatter',
                       x='GDP per Capita',
                       y='Life Satisfaction')
plt.show()

#Linear Model
linear_model = sklearn.linear_model.LinearRegression()

#Training the Model
linear_model.fit(X, Y)

#Make a prediction for Cyprus
X_new = [[22587]]
print(linear_model.predict(X_new))