def train_model(variables):
    if len(variables) == 0:
        return None
    model = LinearRegression()
    model.fit(X[variables], y)
    return model


def score_model(model, variables):
    if len(variables) == 0:
        return AIC_score(y, [y.mean()] * len(y), model, df=1)
    return AIC_score(y, model.predict(X[variables]), model)


best_model, best_variables = stepwise_selection(X.columns,
                                                train_model,
                                                score_model,
                                                verbose=True)

print()
print(f'Intercept: {best_model.intercept_:.3f}')
print('Coefficients:')
for name, coef in zip(best_variables, best_model.coef_):
    print(f' {name}: {coef}')

### Weighted regression
# We can calculate the Year from the date column using either a list comprehension or the data frame's `apply` method.

house['Year'] = [int(date.split('-')[0]) for date in house.DocumentDate]
house['Year'] = house.DocumentDate.apply(lambda d: int(d.split('-')[0]))
house['Weight'] = house.Year - 2005
Example #2
0
def score_model(model, variables):
    if len(variables) == 0:
        return AIC_score(train_y, [train_y.mean()] * len(train_y), model, df=1)
    return AIC_score(train_y, model.predict(train_X[variables]), model)


allVars = train_X.columns

best_model, best_vars = backward_elimination(allVars,
                                             train_model,
                                             score_model,
                                             verbose=True)

best_model1, best_vars1 = stepwise_selection(allVars,
                                             train_model,
                                             score_model,
                                             verbose=True)

best_model2, best_vars2 = forward_selection(allVars,
                                            train_model,
                                            score_model,
                                            verbose=True)

## REGULARIZATION
bayR = BayesianRidge(normalize=True)
bayR.fit(train_X, train_y)
regressionSummary(valid_y, bayR.predict(valid_X))
alpha = bayR.lambda_ / bayR.alpha_
print('Bayesian ridge chosen Regularization: ', alpha)