Esempio n. 1
0
 def modFirst(self, paramF="train.json"):
     """returns the default model"""
     tml = modL.modelList(paramF)
     clf = tml.regL['bagReg']['mod']
     decT = tml.regL['decTree']['mod']
     clf.set_params(base_estimator=decT)
     return clf
Esempio n. 2
0
 def modFirst(self, paramF):
     """returns the default model"""
     tml = modL.modelList(paramF)
     clf = tml.regL[self.modName]['mod']
     # if self.modName == "perceptron":
     #     clf.set_params(hidden_layer_sizes=(self.X.shape[2],))
     return clf
Esempio n. 3
0
def regressorSingle(X, y, nXval=6, isShuffle=True, paramF="train.json"):
    """apply a regressor"""
    tml = modL.modelList()
    clf = tml.regL['bagReg']['mod']
    decT = tml.regL['decTree']['mod']
    clf.set_params(base_estimator=decT)
    N = len(X)
    corrL = []
    fitL = []
    if isShuffle:
        shuffleL = random.sample(range(N), N)
    else:
        shuffleL = list(range(N))
    X = np.array(X)
    X = np.nan_to_num(X)
    y = np.array(y)
    for j in range(nXval):  #cross validation
        partS = [int(j / nXval * N), int((j + 1) / nXval * N)]
        idL = [x for x in range(0, partS[0])] + [x for x in range(partS[1], N)]
        idL = shuffleL[0:partS[0]] + shuffleL[partS[1]:]
        fit_q = clf.fit(X[idL, :], y[idL])
        y_pred = fit_q.predict(X)
        corrL.append(sp.stats.pearsonr(y, y_pred)[0])
        fitL.append(fit_q)
    # if np.isnan(corrL)[0]:
    #     return fit_q, [0]
    if False:  # pick a random model
        nRandom = int(nXval * np.random.uniform())
        fit_q = fitL[nRandom]
    else:  # pick the best
        fit_q = [fitL[x] for x in range(nXval) if corrL[x] == max(corrL)][0]
    return fit_q, corrL
Esempio n. 4
0
def linLeastSq(X, y):
    """linear model with least square"""
    tml = modL.modelList()
    clf = tml.regL['elastic_cv']['mod']
    model = clf.fit(X, y)
    return model.coef_
    # model = sm.OLS(y,X).fit()
    # return model.params
    if False:
        predictions = model.predict(X)
        X1 = np.c_[X, np.ones(X.shape[0])]  # add bias term
        beta_hat = np.linalg.lstsq(X1, y)[0][:X.shape[1]]
        return beta_hat
        beta_hat = np.dot(np.linalg.inv(np.dot(X1.T, X1)), np.dot(X1.T, y))
        beta_hat = np.linalg.lstsq(np.vstack([X, np.ones(len(X))]).T, y)[0]

    def ser_sin(x, t, param):
        return x * t.sum(axis=0)

    def ser_fun_min(x, t, y, param):
        return ser_sin(x, t, param).sum() - y.sum()

    x0 = X.sum(axis=0)
    x0 = x0 / x0.mean()
    x0 = np.linspace(1, 1, X.shape[1])
    res = least_squares(ser_fun_min, x0, args=(X, y, x0))
    beta_hat = res['x']
    return beta_hat
Esempio n. 5
0
def regressor(X, y, nXval=6, isShuffle=True, paramF="train.json"):
    from sklearn.tree import DecisionTreeRegressor
    from sklearn.ensemble import BaggingRegressor
    tml = modL.modelList()
    clf = tml.regL['bagReg']['mod']
    decT = tml.regL['decTree']['mod']
    clf.set_params(base_estimator=decT)
    decT = DecisionTreeRegressor(criterion='mse',
                                 max_depth=None,
                                 max_features=None,
                                 max_leaf_nodes=None,
                                 min_impurity_decrease=0.0,
                                 min_impurity_split=None,
                                 min_samples_leaf=1,
                                 min_samples_split=2,
                                 min_weight_fraction_leaf=0.0,
                                 random_state=None,
                                 splitter='best')
    clf = BaggingRegressor(base_estimator=decT,
                           bootstrap=True,
                           bootstrap_features=False,
                           max_features=1.0,
                           max_samples=1.0,
                           n_estimators=10,
                           n_jobs=1,
                           oob_score=False,
                           random_state=None,
                           verbose=0,
                           warm_start=False)
    N = len(X)
    X = np.array(X)
    X = np.nan_to_num(X)
    y = np.array(y)
    corrL = []
    fitL = []
    if isShuffle:
        shuffleL = random.sample(range(N), N)
    else:
        shuffleL = list(range(N))
    if nXval == 1:
        fit_q = clf.fit(X, y)
        y_pred = fit_q.predict(X)
        return fit_q, {}

    for j in range(nXval):  #cross validation
        partS = [int(j / nXval * N), int((j + 1) / nXval * N)]
        idL = [x for x in range(0, partS[0])] + [x for x in range(partS[1], N)]
        idL = shuffleL[0:partS[0]] + shuffleL[partS[1]:]
        fit_q = clf.fit(X[idL, :], y[idL])
        y_pred = fit_q.predict(X)
        corrL.append(t_s.calcMetrics(y, y_pred))
        fitL.append(fit_q)
        # if np.isnan(corrL)[0]:
        #     return fit_q, [0]
    if True:  # pick a random model
        nRandom = int(nXval * np.random.uniform())
        fit_q = fitL[nRandom]
    else:  # pick the best
        fit_q = [fitL[x] for x in range(nXval) if corrL[x] == max(corrL)][0]
    return fit_q, pd.DataFrame(corrL)
Esempio n. 6
0
 def modPick(self, clf):
     """pick a random configuration set from the grid"""
     tml = modL.modelList()
     pLasso = tml.gridL[self.modName]
     paraB = clf.get_params()
     k = random.choice(list(pLasso))
     v = random.choice(pLasso[k])
     paraB[k] = v
     clf.set_params(**paraB)
     return clf, paraB
Esempio n. 7
0
 def loopMod(self, paramF="train.json", test_size=0.4):
     """loop over all avaiable models"""
     N = len(self.y)
     shuffleL = random.sample(range(N), N)
     partS = [0, int(N * (1. - test_size)), int(N * (1.)), N]
     trainL = shuffleL[partS[0]:partS[1]]
     testL = shuffleL[partS[1]:partS[2]]
     #self.X_train,self.X_test,self.y_train,self.y_test = sk.model_selection.train_test_split(self.X, self.y,test_size=test_size,random_state=0)
     trainR = []
     model = []
     rocC = []
     tml = modL.modelList(paramF)
     tml.set_params()
     for index in range(tml.nCat()):
         clf = tml.retCat(index)
         if not clf['active']:
             continue
         # try:
         mod, trainS, testS, t_diff, x_pr, y_pr, auc, fsc, acc, cv = self.perfCla(
             clf, trainL, testL)
         # except:
         #     print('error: returning model')
         #     return clf['mod'], trainR
         trainR.append([
             clf['name'], trainS, testS, t_diff, auc, fsc, acc, clf["type"]
         ])
         model.append(mod)
         rocC.append([x_pr, y_pr])
         #print("{m} trained {c} in {f:.2f} s".format(m=modN,c=index,f=t_diff))
     trainR = pd.DataFrame(trainR)
     trainR.columns = [
         "model", "train_score", "test_score", "time", "auc", "fsc", "acc",
         "type"
     ]
     trainR.loc[:, 'perf'] = trainR['acc'] * trainR['auc']
     trainR = trainR.sort_values(['perf'], ascending=False)
     mod = model[trainR.index.values[0]]
     self.rocC = rocC
     self.trainR = trainR
     y_pred = mod.predict(self.X)
     try:
         y_class = y_pred.dot(range(y_pred.shape[1]))
     except IndexError:
         y_class = y_pred
     self.y_pred = y_pred
     return mod, trainR  #, self.y, y_class
Esempio n. 8
0
 def modPick(self, clf):
     """pick a random configuration set from the grid"""
     tml = modL.modelList()
     pDecT = tml.gridL['decTree']
     pBag = tml.gridL['bagging']
     paraB = clf.get_params()
     del paraB['base_estimator']
     decT = clf.get_params()['base_estimator']
     paraS = decT.get_params()
     k = random.choice(list(pDecT))
     v = random.choice(pDecT[k])
     paraS[k] = v
     k = random.choice(list(pBag))
     v = random.choice(pBag[k])
     paraB[k] = v
     decT.set_params(**paraS)
     clf.set_params(**paraB)
     clf.set_params(base_estimator=decT)
     s = {**paraS, **paraB}
     return clf, s
Esempio n. 9
0
    def tune(self, paramF="train.json", tuneF="train_tune.json"):
        """tune all avaiable models"""
        tml = modL.modelList(paramF)
        params = tml.get_params()
        with open(tuneF) as f:
            pgrid = json.load(f)
        for idx in range(len(pgrid)):
            if not pgrid[idx]['active']:
                continue
            print("tuning: " + pgrid[idx]['name'])
            clf = tml.retCat(idx)['mod']
            CV_rfc = GridSearchCV(estimator=clf,
                                  param_grid=pgrid[idx]['param_grid'],
                                  cv=5,
                                  return_train_score=False)
            CV_rfc.fit(self.X, self.y)
            for k, v in CV_rfc.best_params_.items():
                params[idx][k] = v

        with open(paramF, 'w') as f:
            f.write(json.dumps(params))
Esempio n. 10
0
def regressor(X, vf, vg, nXval=False, isShuffle=True, paramF="train.json"):
    """apply a regressor"""
    tml = modL.modelList(paramF)
    clf = tml.regL['bagReg']['mod']
    decT = tml.regL['decTree']['mod']
    clf.set_params(base_estimator=decT)
    y = vg / vf
    if False:
        y = (vf - vg) / vf
        r_tayl = (1. - r_taylor)
    y[y != y] = 1.
    y[y == float('Inf')] = 1.
    N = len(X)
    corrL = []
    fitL = []
    if isShuffle:
        shuffleL = random.sample(range(N), N)
    else:
        shuffleL = list(range(N))
    X = np.array(X)
    X = np.nan_to_num(X)
    y = np.array(y)
    for j in range(nXval):  #cross validation
        partS = [int(j / nXval * N), int((j + 1) / nXval * N)]
        idL = [x for x in range(0, partS[0])] + [x for x in range(partS[1], N)]
        idL = shuffleL[0:partS[0]] + shuffleL[partS[1]:]
        fit_q = clf.fit(X[idL, :], y[idL])
        r_quot = fit_q.predict(X)
        corr = vf * r_quot
        corrL.append(sp.stats.pearsonr(corr, vg)[0])
        fitL.append(fit_q)
    if np.isnan(corrL)[0]:
        return fit_q, [0]
    if False:  # pick a random model
        nRandom = int(nXval * np.random.uniform())
        fit_q = fitL[nRandom]
    else:  # pick the best
        fit_q = [fitL[x] for x in range(nXval) if corrL[x] == max(corrL)][0]
    return fit_q, corrL
Esempio n. 11
0
    tL = [
        'temperature', 'apparentTemperature', 'dewPoint', 'humidity',
        'windSpeed', 'windGust', 'windBearing', 'cloudCover', 'uvIndex',
        'visibility', 'precipAccumulation', 'pressure', 'ozone'
    ]

    tL = [
        'temperature', 'apparentTemperature', 'humidity', 'ozone', 'pressure',
        'windSpeed', 'windBearing', 'cloudCover', 'precipAccumulation'
    ]
    m = "rain"
    X = t_s.interpMissing(timeL[tL])
    y = s_s.interpMissing(hourL[m])
    #y, _ = t_r.binVector(y,nBin=7,threshold=0.5)

    mod = t_l.modelList(paramF=baseDir + "train/weath_" + m + ".json")
    mod.get_params()

    importlib.reload(t_l)
    importlib.reload(tlib)
    tMod = tlib.trainMod(X, y)
    mod, trainR = tMod.loopMod(paramF=baseDir + "train/weath_" + m + ".json",
                               test_size=.4)
    tMod.plotRoc()

if False:
    print('----------------------feature-importance------------------------')
    tL = [
        'temperature', 'apparentTemperature', 'humidity', 'ozone', 'pressure',
        'windSpeed', 'windBearing', 'cloudCover', 'precipAccumulation'
    ]