Ejemplo n.º 1
0
def LinearReg(file1, file2):
    feature1, lable1 = file2matrix(file1)
    regr = LinearRegression()
    regr.fit(feature1, lable1)
    feature2, label2 = file2matrix(file2)
    y_true = label2
    y_score = regr.decision_function(feature2)
    y_pred = regr.predict(feature2)
    return y_true, y_score, y_pred
class LinearRegression():
    def __init__(self, fit_intercept=True, normalize=False, copy_X=True, n_jobs=1):
        self.LR = LR(fit_intercept, normalize, copy_X, n_jobs)

    def decision_function(self, x):
        return self.LR.decision_function(x)

    def fit(self, x, y):
        return self.LR.fit(x, y)

    def get_params(self):
        return self.LR.get_params()

    def predict(self, x):
        return self.LR.predict(x)

    def set_params(self, **params):
        self.LR.set_params(params)
Ejemplo n.º 3
0
import numpy as np
def prediction_error(predict,test):
    return np.array([abs(diff) for diff in predict-test])    

import load_data
loader = load_data.bikeshare_loader()
loader.preprocess()
#train set
(X,y_c)= loader.training_data(range(9),9)
(X,y_r)= loader.training_data(range(9),10)
#test set
(test_X,test_y_c)= loader.test_data(range(9),9)
(test_X,test_y_c)= loader.test_data(range(9),10)

from sklearn.linear_model import LinearRegression
linreg=LinearRegression(fit_intercept=True, normalize=True)
linreg.fit(X,y_c)
print "Linear coefficients:"
print linreg.decision_function(X)

predict_y_c=linreg.predict(test_X)
error_y_c=prediction_error(predict_y_c,test_y_c)
print "Max Value: {}, Average error: {}".format (test_y_c.max(),error_y_c.mean())


import matplotlib.pyplot as plt
plt.figure()
plt.title("Linear Regression")
plt.plot(test_X[:,0],predict_y_c,'b')
plt.plot(test_X[:,0],test_y_c,'g')
plt.plot(test_X[:,0],error_y_c,'r')
Ejemplo n.º 4
0
# 3.7 Grafique la curva ROC ()


def plot_roc_curve(fpr, tpr, label=None):
    import matplotlib.pyplot as plt
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.axis([0, 1, 0, 1])
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.title("ROC Curve")
    plt.show()


y_score = model.decision_function(X)
(fpr, tpr, thresholds) = metrics.roc_curve(y, y_score)
plot_roc_curve(fpr, tpr)

# 3.8 Calcule la Probabilidad de cada clasificación y compare la regla de clasificación,
# la clase predicha y la clase real

p = model.predict_proba(X)  # versus y_pred versus y

#3.9  Realice la clasificación multiclase (8 clases) utilizando el citerio OnevsRest
# Calcule la probabilidad predicha, su consistencia con la clasificación efectiva y la clase real

data_ret['target'] = np.empty((len(data_ret), 1))

for i in range(0, len(data_ret)):
    if (data_ret.iloc[i, 0] < -0.01):
Ejemplo n.º 5
0
class Model:
    def __init__(self,
                 stock,
                 params={'lag': 5},
                 param_ranges={'lag': range(2, 20, 2)},
                 debug=False):
        """Initializes model"""
        self.mod = LinearRegression()
        self.name = 'LINREG'
        self.params = params
        self.param_ranges = param_ranges
        self.debug = debug
        self.investments = {}
        self.performance = {}
        self.stock = stock
        self.yields = {}
        self.predictedYs = []
        self.actualYs = []
        self.pYields = []
        self.cashStock = {}
        self.classification = False

    def __str__(self):
        return "Linear Regression Model"

    def addPerformance(self, alpha, performance):
        self.performance[alpha] = performance

    def addCashStock(self, alpha, cashStock):
        self.cashStock[alpha] = cashStock

    def addInvestments(self, alpha, investments):
        self.investments[alpha] = investments

    def addYield(self, alpha, pyield):
        self.yields[alpha] = pyield

    def fit(self, X, y):
        self.mod.fit(X, y)

    def score(self, X, y):
        return self.mod.score(X, y)

    def initMod(self, data, params):
        self.params = params
        self.lag_n = params['lag']
        self.lag = TimeLag(self.lag_n)
        self.laggedData = self.lag.transform(data)

    def validate(self, day, n_splits=2, kfold=True):
        kf = KFold(n_splits=2)
        dayBefore = day - datetime.timedelta(days=1)

        combinations = self.generateCombinations(self.param_ranges)
        bestParams = []
        bestScore = -100
        X = self.stock.data['Close'][:day]
        cat = 'Classification' if self.classification else 'Close'
        if self.debug:
            print("input for model " + str(X.tail()))
        if not kfold:
            self.initMod(X, self.params)
            y = self.stock.data[cat][:dayBefore].iloc[self.lag_n:]
            self.fit(self.laggedData[:dayBefore], y)
            return
        for combo in combinations:
            total = 0
            self.initMod(X, combo)
            y = self.stock.data[cat][:dayBefore].iloc[self.lag_n:]
            for train_index, test_index in kf.split(
                    self.laggedData[:dayBefore]):
                X_train, X_test = self.laggedData.iloc[
                    train_index], self.laggedData.iloc[test_index]
                y_train, y_test = y.iloc[train_index], y.iloc[test_index]
                self.fit(X_train, y_train)
                total += self.score(X_test, y_test)
            if self.debug:
                print("total score: " + str(total) + "   for params: " +
                      str(combo) + "   avg score: " + str(total / n_splits))
            if total / n_splits > bestScore:
                bestScore = total / n_splits
                bestParams = combo
        if self.debug:
            print("model validated, chosing params: " + str(bestParams))
        self.initMod(X, bestParams)
        y = self.stock.data[cat][:dayBefore].iloc[self.lag_n:]
        self.fit(self.laggedData[:dayBefore], y)

    def numValidations(self, freq):
        if freq == 0:
            return [0]
        else:
            return range(0, self.stock.n_days_test, freq)

    def generateCombinations(self, params):
        options = []
        keys = []
        for key, value in params.items():
            options.append(value)
            keys.append(key)
        combos = [x for x in itertools.product(*options)]
        comboDicts = []
        for combo in combos:
            temp = {}
            for i in range(len(keys)):
                temp[keys[i]] = combo[i]
            comboDicts.append(temp)
        return comboDicts

    def getYields(self, validationFreq=0):
        pYields = []
        validationDays = self.numValidations(validationFreq)
        predictedYs = []
        actualYs = []
        for i in range(len(self.stock.testData)):
            day = self.stock.testData.index[i]
            self.validate(day, kfold=(i in validationDays))
            if self.debug:
                print("training model for day " + str(day))
                print("Lagged data for day " + str(day) + " : " +
                      str(self.laggedData[day:day]))
            predictY = self.mod.predict(self.laggedData[day:day])
            oldY = self.stock.testData.iloc[i]['Open']
            actualY = self.stock.testData.iloc[i]['Close']
            pYield = (predictY - oldY) / oldY

            if self.classification:
                conf = self.mod.decision_function(self.laggedData[day:day])
                pYield = conf
                predictY = [actualY + 5] if predictY == 1 else [actualY - 5]
            pYields.append(pYield[0])
            #            if self.name=='LASSO' or self.name=='RIDGE' or self.name=='RIDGECLASS' or self.name=='MLP':
            predictedYs.append(predictY[0])
            #            else:
            #                print (self.name)
            #                predictedYs.append(predictY[0][0])
            actualYs.append(actualY)
        self.predictedYs = predictedYs
        self.actualYs = actualYs
        self.pYields = pYields
        self.meanError = sum(
            map(lambda x, y: abs(x - y), predictedYs,
                actualYs)) / len(predictedYs)
        return pYields