Esempio n. 1
0
class Simple:
    def __init__(self, a, b, c, d):
        self.model = TheilSenRegressor()

    def update_a_b(self, x, y):
        self.model.fit(x.reshape(-1, 1), y)

    def set_c_d(self, c, d):
        pass

    def get_y(self, x):
        return self.model.predict(x.reshape(-1, 1))

    def get_likelihood(self, x, y):
        return 1 / float(x.shape[0]) * np.sum(np.abs(y - self.get_y(x)))

    def to_string(self):
        return "a:{}, b:{}".format(self.model.coef_, self.model.intercept_)

    def get_a_b(self):
        return self.model.coef_, self.model.intercept_

    @staticmethod
    def var_to_weight(v):
        return 1

    @staticmethod
    def get_c_d(x, r):
        return None, None
Esempio n. 2
0
def robust_cor(x, y):
    if isinstance(x[0], list):
        x = list(map(list, zip(*x)))
    else:
        x = np.array(x).reshape(-1, 1)
    X = np.array(x)
    Y = np.array(y)
    theil_regr = TheilSenRegressor(random_state=42)
    theil_regr.fit(X, Y)
    y_pred = theil_regr.predict(X)
    res = y_pred - y
    tot_dev = y - np.mean(y)
    SSres = np.dot(res, res)
    SStot = np.dot(tot_dev, tot_dev)
    adjR2 = 1 - (SSres / SStot) * (X.shape[0] - 1) / (X.shape[0] - X.shape[1] -
                                                      1)
    sgn = np.sign(theil_regr.coef_)[0]
    if adjR2 > 0:
        corr_val = sgn * np.sqrt(adjR2)
    else:
        corr_val = 0
    return [
        corr_val, theil_regr.coef_, theil_regr.intercept_,
        theil_regr.breakdown_
    ]
Esempio n. 3
0
def getscore_getnext(df, days_ahead, coin):

    forecast_val = days_ahead

    forecast_col = 'close'
    df.fillna(value=-99999, inplace=True)
    df['label'] = df[forecast_col].shift(-forecast_val)

    #X = X[:-forecast_val]

    X = np.array(df.drop(['label', 'date'], 1))

    X = preprocessing.scale(X)

    futureX = X[-1:]
    X = X[:-forecast_val]
    df.dropna(inplace=True)

    y = np.array(df['label'])

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.15)
    '''
    inPickle = open('%s.pickle' %(coin), 'rb')
    clf = pickle.load(inPickle)
    '''
    clf = TheilSenRegressor()

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    #print "accuracy with 1.0 being perfect:", (confidence)
    futureval = clf.predict(futureX)
    return (confidence, futureval)
Esempio n. 4
0
class Regressor(BaseEstimator):
    def __init__(self):

        self.regressorName="linear"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=30, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":

            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression()
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()

    def fit(self, X, y):
        X=csc_matrix(X)
        print "Training Algorithm"
        self.clf.fit(X, y)
        #print self.clf.best_estimator_

    def predict(self, X):
        X=csr_matrix(X)
        print "Testing Algorithm"
        return self.clf.predict(X)

    def getRegressor(self):
        return self.clf

    def getRegressorName(self):
        return self.regressorName

    def getParamGrid(self):
        if self.regressorName=="rf":
            defaultGrid=[None]
            maxDepthGrid=np.arange(10,70,7)
            maxFeaturesGrid=["sqrt","log2",None]
            maxTreesGrid=np.arange(10,100,10)
            param_grid = {'max_features': defaultGrid}
        elif self.regressorName == "gb":
            #maxDepthGrid=np.arange(3,20,5)
            learningRateGrid=np.arange(50,100,10)
            #param_grid = {'max_depth': maxDepthGrid}
            #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']}
            param_grid={'alpha':[0.9]}
        return param_grid
Esempio n. 5
0
class Regressor(BaseEstimator):
    def __init__(self):
 
        self.regressorName="gb"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=400, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":
 
            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression(alpha=0.01,max_iter=5000)
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()
 
    def fit(self, X, y):
        #X=csc_matrix(X)
        self.clf.fit(X, y)
        #print self.clf.best_estimator_
 
    def predict(self, X):
        #X=csr_matrix(X)
        return self.clf.predict(X)
 
    def getRegressor(self):
        return self.clf
 
    def getRegressorName(self):
        return self.regressorName
 
    def getParamGrid(self):
        if self.regressorName=="rf":
            defaultGrid=[None]
            maxDepthGrid=np.arange(10,70,7)
            maxFeaturesGrid=["sqrt","log2",None]
            maxTreesGrid=np.arange(10,100,10)
            param_grid = {'max_features': defaultGrid}
        elif self.regressorName == "gb":
            #maxDepthGrid=np.arange(3,20,5)
            learningRateGrid=np.arange(50,100,10)
            #param_grid = {'max_depth': maxDepthGrid}
            #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']}
            param_grid={'alpha':[0.9]}
        return param_grid
Esempio n. 6
0
def _fit_robust_line(shifts):
    """ Use a robust linear regression algorithm to fit a line to the data."""
    from sklearn.linear_model import TheilSenRegressor

    X = np.arange(len(shifts)).reshape(-1, 1)
    y = shifts
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    line = model.predict(X)

    return line
Esempio n. 7
0
class _TheilSenRegressorImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Esempio n. 8
0
def theilsen_regress_predict(var):
    """
    Input:-
    var: 1-D array var
    regressortype = LinearRegression, TheilSenRegressor

    Output: regression coefficient

    """
    regressor = TheilSenRegressor()
    y = np.asarray(var).reshape(-1, 1)
    X = np.arange(len(y)).reshape(-1, 1)
    regressor.fit(X, y)
    return regressor.predict(X)
Esempio n. 9
0
def test_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
    theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
    # Check fit_intercept=True case. This will not be equal to the Least
    # Squares solution since the intercept is calculated differently.
    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
    y_pred = theil_sen.predict(X)
    assert_array_almost_equal(y_pred, y, 12)
def test_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
    theil_sen = TheilSenRegressor(fit_intercept=False,
                                  random_state=0).fit(X, y)
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
    # Check fit_intercept=True case. This will not be equal to the Least
    # Squares solution since the intercept is calculated differently.
    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
    y_pred = theil_sen.predict(X)
    assert_array_almost_equal(y_pred, y, 12)
class r07522507_TheilSenRegressor(regression):
    def trainAlgo(self):
        self.model = TheilSenRegressor(
            fit_intercept=self.param['fit_intercept'],
            copy_X=self.param['copy_X'],
            max_subpopulation=self.param['max_subpopulation'],
            n_subsamples=self.param['n_subsamples'],
            max_iter=self.param['max_iter'],
            tol=self.param['tol'],
            random_state=self.param['random_state'],
            verbose=self.param['verbose'],
        )
        self.model.fit(self.inputData['X'], self.outputData['Y'])

    def predictAlgo(self):
        self.result['Y'] = self.model.predict(self.inputData['X'])
Esempio n. 12
0
    def train_and_return_model_replicas(self, host, port, username, password,
                                        appType, appNames, folderNames):
        df = self.getAndCombineAllDbs(host, port, username, password, appNames,
                                      folderNames)
        df['total_cpu_util'] = df['pod_util_cpu_avg'] * df['num_pods']
        df['total_mem_util'] = df['pod_util_mem_avg'] * df['num_pods']
        df_X = df[['requests']].values
        df_Y = df[['total_cpu_util']].values
        X_train, X_test, y_train, y_test = train_test_split(df_X,
                                                            df_Y,
                                                            test_size=0.33,
                                                            random_state=42)
        X, y = make_regression(n_samples=df_X.shape[0],
                               n_features=1,
                               noise=4.0,
                               random_state=0)
        regr = TheilSenRegressor(random_state=0).fit(X_train, y_train)
        regr.score(X, y)
        y_pred = regr.predict(X_test)

        rms = sqrt(mean_squared_error(y_test, y_pred))
        print('RMs score: %.2f' % rms)
        return regr, rms
Esempio n. 13
0
          fontsize=18)
plt.show()
# -

from sklearn.linear_model import TheilSenRegressor

# +
lr.fit(X, y)

# Entreno RANSAC
theil_model = TheilSenRegressor(random_state=42).fit(X, y)

# Datos predichos para graficar después
line_X = np.arange(X.min(), X.max())[:, np.newaxis]
line_y = lr.predict(line_X)
line_y_theil = theil_model.predict(line_X)

lw = 2
fig = plt.figure(figsize=(12, 6), dpi=100)

plt.scatter(X, y, marker='.')
plt.plot(line_X, line_y, color='navy', linewidth=lw, label='Lineal')
plt.plot(line_X, line_y_theil, color='green', linewidth=lw, label='Theil Sen')
plt.plot(line_X, line_y_ransac, color='tomato', linewidth=lw, label='RANSAC')
plt.xlabel('X', weight="bold", fontsize=16)
plt.ylabel('Y', weight="bold", fontsize=16)

plt.text(
    -1,
    300,
    "y = {:.2f}x + {:.2f}".format(lr.coef_[0], lr.intercept_),
Esempio n. 14
0
# plt.show()


# Theil-Sen estimator: 
# General info: https://en.wikipedia.org/wiki/Theil%E2%80%93Sen_estimator
# Good ONLY for LINEAR REGRESSION
# Sci-kit learn implementation: http://scikit-learn.org/stable/auto_examples/linear_model/plot_theilsen.html

# Init the Theil-Sen estimator instance
theil = TheilSenRegressor()

# Fit with the Theil-Sen estimator
theil.fit(x, line_data)

# Get the fitted data result
line_theil = theil.predict(x)

# Plot Theil-Sen results
plt.plot(x, line_theil, color='red', label='Theil-Sen')

plt.legend(loc='lower right')

plt.show()

plt.clf()

###################################

# Minimization - e.g. how to find a minimum of a function?

def f1(x):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import classification_report, confusion_matrix

#loading the dataset
train = pd.read_csv("C:/Users/HP/Desktop/train (1).csv")
test = pd.read_csv("C:/Users/HP/Desktop/test (2).csv")
train = train.dropna()
test = test.dropna()
train.head()

X_train = np.array(train.iloc[:, :-1].values)
y_train = np.array(train.iloc[:, 1].values)
X_test = np.array(test.iloc[:, :-1].values)
y_test = np.array(test.iloc[:, 1].values)

#TheilSen Regressor
from sklearn.linear_model import TheilSenRegressor
model = TheilSenRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = model.score(X_test, y_test)
plt.plot(X_train, model.predict(X_train), color='b')
plt.show()
print(accuracy)

print(accuracy)
def computeLR(data: pd.DataFrame, dimensions, record_id):
    # reg = LinearRegression()
    reg = TheilSenRegressor(random_state=1, max_subpopulation=50)
    values = data.values

    numDims = np.size(values, 1)

    X = values[:, 0:numDims - 1]
    Y = values[:, numDims - 1].reshape(-1, 1)

    ndf = data.copy(deep=True)
    ndf.reset_index(drop=True, inplace=True)

    ndf["X"] = X
    ndf["Y"] = Y
    ndf["Filter"] = True
    prev_length = 0
    within = None
    m = 0

    for _ in range(10):
        curr_idx = ndf.index[ndf.loc[:, "Filter"]]  # type: ignore
        curr = ndf.iloc[curr_idx, :]

        if prev_length == curr.shape[0]:
            break

        prev_length = curr.shape[0]

        x, y = curr["X"].values.reshape(-1, 1), curr["Y"].values

        reg.fit(x, y)
        ts = reg.predict(X)

        residuals = ts - ndf["Y"].values

        residuals = abs(residuals)

        inlier_residuals = abs(reg.predict(x) - y)

        m = np.median(inlier_residuals)

        within = residuals < (5 * m)

        ndf["Filter"] = within

    within = ndf["Filter"].astype(int)  # type: ignore

    coeffs = reg.coef_.tolist()
    intercept = reg.intercept_

    threshold = m  # type: ignore

    return [
        LR(
            dimensions=dimensions,
            output=",".join(map(str, within.tolist())),
            info=json.dumps({
                "threshold": threshold,
                "coeff": coeffs,
                "intercept": intercept,
                "type": "within",
            }),
            record_id=record_id,
        )
    ]
Esempio n. 17
0
df=df[df["Fluid"]!="Oli"]

df["frequency"]=df["frequency"]/60
df["frequency"]=df["frequency"].astype(float)

#df=df[df["Serie"]!="B1.1"]
df=df[df["Serie"]!="A1.1"]
#df=df[df["Serie"]!="B1.2"]


###Power-Frequency correlation

#Theil-Sen
ts=TheilSenRegressor(fit_intercept=True)
ts.fit(X=df[["frequency"]],y=df["power"])
df["ts-estimated"]=ts.predict(df[["frequency"]])

#Least-Squares
lsq=LinearRegression()
lsq.fit(X=df[["frequency"]],y=df["power"])
df["lsq-estimated"]=lsq.predict(df[["frequency"]])
print('Least Squares: P={}·n +{}, Rsq{}'.format(lsq.coef_, lsq.intercept_, lsq.score(X=df[["frequency"]],y=df["power"])))
print(mean_squared_error(df["power"],df["lsq-estimated"]))

#Get confidence
conf_max=[]
conf_min=[]
frequencydummy=[]
for freq in df["frequency"].unique():
    if freq != 1150/60 and freq != 1250/60:
        serie=df[df["frequency"]==freq]["power"]
Esempio n. 18
0
def DumpTimestamps(video,
                   vid_boundary_frames,
                   output_fig_path,
                   output_csv_path,
                   input_vid_fname_stem,
                   debug=True):
    """Takes video as input and dumps hex-encoded timestamps to file"""

    import pandas as pd
    import numpy as np
    all_timestamps = []
    for pixels in video[:, 0, :14]:
        # convert pixel integers to strings with hexadecimal representation
        # eg integer 1 output is '0x1'
        # crop the '0x' off the string, and pad each digit with leading zeros if necessary.
        # Then join all the 2-character strings together into one big string.
        all_timestamps.append("".join([hex(_)[2:].zfill(2) for _ in pixels]))

    df = pd.DataFrame(all_timestamps, columns=['raw'])

    def FormatTimestamp(s):
        tstr = list(s)
        tstr.insert(8, ' ')
        tstr.insert(13, '-')
        tstr.insert(16, '-')
        tstr.insert(19, ' ')
        tstr.insert(22, ':')
        tstr.insert(25, ':')
        tstr.insert(28, '.')
        return "".join(tstr)

    # Assign each frame in the concatenated video the video part it came from
    df['video'] = 1
    for i, frame_i in enumerate(vid_boundary_frames, start=2):
        rows = list(range(frame_i, len(video)))
        df.loc[rows, 'video'] = i

    # Parse timestamp
    df['timestamp'] = df['raw'].apply(FormatTimestamp)
    df['frame_index'] = df['raw'].str[:8]
    df['date'] = df['raw'].str[8:16]
    df['hour'] = df['raw'].str[16:18]
    df['min'] = df['raw'].str[18:20]
    df['rawsec'] = df['raw'].str[20:22]

    df['sec'] = pd.to_numeric(
        df['rawsec'], errors='coerce').fillna(method='ffill').astype(int)

    # Adjust for seconds rolling over to the next minute
    sec_copy = df['sec'].values.copy()
    t_i = sec_copy[0]
    for i, t_i_plus_1 in enumerate(df['sec'].values[1:], start=1):
        if t_i_plus_1 < t_i:
            sec_copy[i] += 60
    df['sec'] = sec_copy
    df['sec'] = df['sec'].astype(str)

    df['sec_fraction'] = df['raw'].str[22:].str.extract(
        r'^(\d+)')  #.str.ljust(6,'0')
    df['raw_time'] = pd.to_numeric(df['sec'] + '.' + df['sec_fraction'],
                                   errors='coerce').fillna(method='bfill')

    #from scipy.stats import linregress
    #slope, intercept, r_value, p_value, std_err = linregress( np.array( df.index ), df['time'].values )
    #from sklearn.linear_model import RANSACRegressor
    #model = RANSACRegressor()
    from sklearn.linear_model import TheilSenRegressor
    model = TheilSenRegressor()
    X = np.array(df.index).reshape(-1, 1)
    Y = df['raw_time'].values
    model.fit(X, Y)
    print("Fitting timestamps with slope and intercept.")
    print(
        f"Timestamp estimated coefficients: intercept={float(model.intercept_):0.2f}, slope={float(model.coef_)*1000:0.3f}ms/frame"
    )

    Y_pred = model.predict(X)
    df['adj_time'] = Y_pred
    df['delta t (ms)'] = (df['raw_time'] -
                          df['raw_time'].shift()).fillna(0) * 1000
    df['delta t %-ile'] = df['delta t (ms)'].rank(pct=True)

    import matplotlib.pyplot as plt
    fig, ax1 = plt.subplots(dpi=300)
    df.plot(y=['raw_time', 'adj_time'], ax=ax1)
    ax1.set_ylabel("time (s)")
    ax1.set_xlabel("frame index")
    ax1.set_title(f'"{input_vid_fname_stem}" raw and adjusted timestamps')
    #ax2 = ax1.twinx()
    #ax2.plot( (out['adj_time']-out['reg_time']), label='diff', color='r')
    #ax2.set_ylabel( "difference between raw time and straight line (s)")
    for x in vid_boundary_frames:
        ax1.axvline(x, linestyle='dashed', color='black')

    fig.savefig(str(output_fig_path))
    plt.close(fig)  # Explicitly close to free memory and avoid warning
    df.to_csv(str(output_csv_path))
    print(
        f"Wrote \"{str( output_fig_path )}\" and \"{str( output_csv_path ) }\" to disk"
    )

    return (df.loc[len(video) - 1, 'adj_time'] - df.loc[0, 'adj_time'])
Esempio n. 19
0
def fix_ecg_peaks(ecg, plt=None):
    ecg = ecg.copy()

    slopesize = int(ecg.fps / 45.0)

    # climb to maxima, and invert if necessary
    ecgidx = [
        max(i - slopesize, 0) +
        np.argmax(ecg.x[max(i - slopesize, 0):min(i + slopesize,
                                                  len(ecg.x) - 1)])
        for i in ecg.ibeats
    ]
    beatheight = np.mean(ecg.x[ecgidx]) - np.mean(
        ecg.x)  # average detected beat amplitude
    negecgidx = [
        max(i - slopesize, 0) +
        np.argmin(ecg.x[max(i - slopesize, 0):min(i + slopesize,
                                                  len(ecg.x) - 1)])
        for i in ecg.ibeats
    ]
    negbeatheight = np.mean(ecg.x[negecgidx]) - np.mean(
        ecg.x)  # average detected beat amplitude in the other direction
    if np.abs(negbeatheight) > np.abs(
            beatheight
    ):  # if the other direction has "higher" peaks, invert signal
        ecg.x *= -1
        ecgidx = negecgidx

    if plt != None:
        plt.plot(ecg.t, ecg.x)
        plt.scatter(ecg.t[ecg.ibeats], ecg.x[ecg.ibeats], 30, 'y')

    window = slopesize / 2
    fixed_indices, fixed_times = [], []
    # loop through and linearly interpolate peak flanks
    for i in ecgidx:
        up_start = i
        while ecg.x[up_start] >= ecg.x[
                i] and up_start > i - slopesize:  # make sure start is in trough, not still on peak / plateau
            up_start -= 1
        up_start -= slopesize
        while ecg.x[up_start + 1] <= ecg.x[
                up_start] and up_start < i - 1:  # climb past noise (need to go up)
            up_start += 1
        up_end = i + 2
        while ecg.x[up_end - 1] >= ecg.x[
                up_end] and up_end > i + 1:  # climb past noise (need to go up)
            up_end -= 1
        upidx = np.arange(up_start, up_end)  # indices of upslope

        down_start = i
        down_end = i
        while ecg.x[down_end] >= ecg.x[
                i] and down_end < i + slopesize:  # make sure end is in trough, not still on peak / plateau
            down_end += 1
        down_end += slopesize
        while ecg.x[down_start + 1] >= ecg.x[down_start] or ecg.x[
                down_start + 2] >= ecg.x[
                    down_start] and down_start < down_end:  # climb past noise (need to go down)
            down_start += 1
        while ecg.x[down_end - 1] <= ecg.x[
                down_end] and down_end > down_start:  # climb past noise (need to go down)
            down_end -= 1
        downidx = np.arange(down_start, down_end)  # indices of downslope

        if len(ecg.t[upidx]) <= 1 or len(
                ecg.t[downidx]
        ) <= 1:  # one or both flanks missing. just use max
            reali = i
            bestt = ecg.t[i]
        else:
            # interpolate flanks
            model1 = TheilSenRegressor().fit(ecg.t[upidx].reshape(-1, 1),
                                             ecg.x[upidx])
            model2 = TheilSenRegressor().fit(ecg.t[downidx].reshape(-1, 1),
                                             ecg.x[downidx])
            k1, d1 = model1.coef_[0], model1.intercept_
            k2, d2 = model2.coef_[0], model2.intercept_
            angle1, angle2 = np.arctan(k1), np.arctan(k2)
            if False:
                pass
            else:
                bestt = (d2 - d1) / (
                    k1 - k2)  # obtain intersection point (noise robust peak)

                if np.abs(bestt - ecg.t[i]) > slopesize or np.abs(
                        angle1
                ) < 0.1 or np.abs(
                        angle2
                ) < 0.1:  # calculated intersection point is very far from max - something went wrong - reset
                    print(
                        "fix_ecg_peaks WARNING: fixed beat is very far from actual maximum, or slopes suspiciously unsteep. Taking actual maximum to be safe"
                    )
                    i = max(i - slopesize, 0) + np.argmax(
                        ecg.x[max(i -
                                  slopesize, 0):min(i + slopesize,
                                                    len(ecg.x) - 1)])
                    if plt != None:
                        reali = i - window + np.argmin(
                            np.abs(ecg.t[(i - window):(i + window)] - bestt))
                        plt.scatter(bestt, ecg.x[reali], 200, 'y')
                        plt.scatter(ecg.t[i], ecg.x[i], 200, 'g')
                        plt.plot([bestt, ecg.t[i]], [ecg.x[reali], ecg.x[i]],
                                 'r',
                                 linewidth=2)

                    reali = i
                    bestt = ecg.t[i]
                else:
                    reali = i - window + np.argmin(
                        np.abs(ecg.t[(i - window):(i + window)] - bestt))

        # store fixed times and indices
        fixed_indices.append(reali)
        fixed_times.append(bestt)

        if plt != None:
            # plot
            plt.plot(ecg.t[upidx], ecg.x[upidx], 'g')
            plt.plot(ecg.t[downidx], ecg.x[downidx], 'm')

            if len(upidx) > 1 and len(downidx) > 1:
                plt.plot(ecg.t[upidx],
                         model1.predict(ecg.t[upidx].reshape(-1, 1)), '--k')
                plt.plot(ecg.t[downidx],
                         model2.predict(ecg.t[downidx].reshape(-1, 1)), '--y')

            plt.scatter(ecg.t[reali], ecg.x[reali], 60, 'r')
            plt.scatter(bestt, ecg.x[reali], 90, 'k')

    ecg.tbeats = np.ravel(fixed_times)
    ecg.ibeats = np.ravel(fixed_indices).astype(int)

    return ecg
Esempio n. 20
0
X = vec.fit_transform(x_train).toarray()
Y = np.asarray(train.CLOSE)
Y = Y.astype('int')

#Pre-Processing Test data
X_test = test[['HIGH', 'LOW', 'OPEN', 'TOTTRDQTY', 'TOTTRDVAL', 'TOTALTRADES']]
x_test = X_test.to_dict(orient='records')
vec = DictVectorizer()
x = vec.fit_transform(x_test).toarray()
y = np.asarray(test.CLOSE)
y = y.astype('int')

#Classifier
clf = TheilSenRegressor()
clf.fit(X, Y)
print("Accuracy of this Statistical Arbitrage model is: ", clf.score(x, y))
predict = clf.predict(x)

test['predict'] = predict

#Ploting
train.index = train.Date
test.index = test.Date
train['CLOSE'].plot()
test['CLOSE'].plot()
test['predict'].plot()
plt.legend(loc='best')
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()
Esempio n. 21
0
def fit_TheilSen(features_train, labels_train, features_pred):
	model = TheilSenRegressor()
	model.fit(features_train, labels_train)
	labels_pred = model.predict(features_pred)
	print "TheilSen - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train)
	return labels_pred
Esempio n. 22
0
# author: David Ruddell
# contact: [email protected], [email protected]

import pandas as pd
from sklearn import svm
from sklearn.linear_model import TheilSenRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

hyper_data = pd.read_csv('../Data/headers3mgperml.csv', sep=',')

X = hyper_data.values[:, 16:]
y1 = hyper_data.values[:, 5]
y2 = hyper_data.values[:, 6]

X_train, X_test, y_train, y_test = train_test_split(X, y1, random_state=100, test_size=0.3)

clf = TheilSenRegressor()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_pred))

X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y2, random_state=100, test_size=0.3)

clf2 = TheilSenRegressor()
clf2.fit(X_train2, y_train2)

y_pred2 = clf2.predict(X_test)
print(accuracy_score(y_test2, y_pred2))
def main():
    df = getTableVidrieria()
    filtrado = df[(df['idproducto'] == 38) & (pd.to_datetime(df['fecha'],format='%Y-%m-%d') < '2018-03-01')]
    #print(filtrado.tail(10))
    agrupado = filtrado.groupby(['mes','cuatrimestre','anho'] ).aggregate(
        {'precioproducto': {'precioproducto_mean':np.mean, 'precioproducto_max':np.max, 'precioproducto_min':np.min},
         'cantidad': {'cantidad_sum':np.sum}})

    agrupado = agrupado.reset_index(col_level=1)

    agrupado.columns = agrupado.columns.get_level_values(1)

    agrupado = agrupado.sort_values(by=['anho', 'mes'])

    x = pd.DataFrame(agrupado,columns=['precioproducto_mean', 'precioproducto_min', 'precioproducto_max'])
    y = pd.DataFrame(agrupado,columns=['cantidad_sum'])

    #ventana de 22
    #ventana de 15
    #hasta = 23 - ventana + 1
    #[14, ('anho',), 0.03849277569925645]
    #[[19, ('precioproducto_mean', 'precioproducto_min', 'precioproducto_max'), 0.020839253014839243],
    # [16, ('precioproducto_mean', 'precioproducto_min', 'precioproducto_max'), 0.023759777216876814],
    # [17, ('anho', 'precioproducto_mean', 'precioproducto_min', 'precioproducto_max'), 0.028666478132123124],
    # [17, ('anho', 'precioproducto_min', 'precioproducto_max'), 0.03180598120259058],
    # [15, ('anho', 'precioproducto_mean', 'precioproducto_min', 'precioproducto_max'), 0.03220899665300666]]
    aList = []
    nameList= []
    ventana = 17
    hasta = 23 - ventana + 1
    CV = ventana - 1
    for i in range(0, hasta):
        x_new = x[i:(i+ventana)]
        y_new = y[i:(i+ventana)]

        x_train = x_new[:CV]
        x_test = x_new[CV:]

        y_train = y_new[:CV]
        y_test = y_new[CV:]

        cv_lr = np.mean(cross_val_score(LinearRegression(),x_train,y_train.values.ravel(),cv=CV,scoring='neg_mean_absolute_error'))
        cv_tsr = np.mean(cross_val_score(TheilSenRegressor(),x_train,y_train.values.ravel(),cv=CV,scoring='neg_mean_absolute_error'))
        cv_gbr = np.mean(cross_val_score(GradientBoostingRegressor(n_estimators=N_ESTIMATORS),x_train,y_train.values.ravel(),cv=CV,scoring='neg_mean_absolute_error'))
        cv_ext = np.mean(cross_val_score(ExtraTreesRegressor(n_estimators=N_ESTIMATORS), x_train, y_train.values.ravel(), cv=CV, scoring='neg_mean_absolute_error'))
        cv_ab = np.mean(cross_val_score(AdaBoostRegressor(n_estimators=N_ESTIMATORS),x_train,y_train.values.ravel(),cv=CV,scoring='neg_mean_absolute_error'))
        cv_bag = np.mean(cross_val_score(BaggingRegressor(n_estimators=N_ESTIMATORS),x_train,y_train.values.ravel(),cv=CV,scoring='neg_mean_absolute_error'))
        #cv_mlp = np.mean(cross_val_score(MLPRegressor(),x_train,y_train.values.ravel(),cv=CV,scoring='neg_mean_absolute_error'))

        myList = (cv_lr,cv_tsr,cv_gbr,cv_ext,cv_ab,cv_bag)
        xi = myList.index(max(myList))
        if(xi == 0):
            regr = LinearRegression().fit(x_train, y_train)
            nameList.append('Linear Regression')
            #aList.append(mean_absolute_error(y_test, regr.predict(x_test)))
            aList.append(np.absolute(y_test.iloc[0]['cantidad_sum'] - np.array(regr.predict(x_test)).item())/y_test.iloc[0]['cantidad_sum'])

        elif(xi == 1):
            tsr = TheilSenRegressor().fit(x_train, y_train)
            nameList.append('Theil-Sen Regression')
            #aList.append(mean_absolute_error(y_test, tsr.predict(x_test)))
            aList.append(np.absolute(y_test.iloc[0]['cantidad_sum'] - np.array(tsr.predict(x_test)[0]).item()) / y_test.iloc[0]['cantidad_sum'])

        elif(xi == 2):
            gbr = GradientBoostingRegressor(n_estimators=N_ESTIMATORS).fit(x_train, y_train)
            nameList.append('Gradient Boosting Regression')
            #aList.append(mean_absolute_error(y_test, gbr.predict(x_test)))
            aList.append(np.absolute(y_test.iloc[0]['cantidad_sum'] - np.array(gbr.predict(x_test)[0]).item()) / y_test.iloc[0]['cantidad_sum'])

        elif(xi == 3):
            ext = ExtraTreesRegressor(n_estimators=N_ESTIMATORS).fit(x_train, y_train)
            nameList.append('Extra Trees Regression')
            #aList.append(mean_absolute_error(y_test, ext.predict(x_test)))
            aList.append(np.absolute(y_test.iloc[0]['cantidad_sum'] - np.array(ext.predict(x_test)[0]).item()) / y_test.iloc[0]['cantidad_sum'])

        elif(xi == 4):
            ab = AdaBoostRegressor(n_estimators=N_ESTIMATORS).fit(x_train, y_train)
            nameList.append('Ada Boost Regression')
            #aList.append(mean_absolute_error(y_test, ab.predict(x_test)))
            aList.append(np.absolute(y_test.iloc[0]['cantidad_sum'] - np.array(ab.predict(x_test)[0]).item()) / y_test.iloc[0]['cantidad_sum'])

        elif(xi == 5):
            bag = BaggingRegressor(n_estimators=N_ESTIMATORS).fit(x_train, y_train)
            nameList.append('Bagging Regression')
            #aList.append(mean_absolute_error(y_test, bag.predict(x_test)))
            aList.append(np.absolute(y_test.iloc[0]['cantidad_sum'] - np.array(bag.predict(x_test)[0]).item()) / y_test.iloc[0]['cantidad_sum'])

    print(aList)
    print(nameList)
    print(np.var(aList))

    fig, ax = plt.subplots()
    data_line = ax.plot(aList, label='% Error', marker='o')
    mean_line = ax.plot([np.mean(aList)]*len(aList), label='Media', linestyle='--')
    legend = ax.legend(loc='upper right')
    plt.show()
Esempio n. 24
0
# Lasso Lars

lassolars_reg = LassoLars()
lassolars_reg.fit(X_train, Y_train)
Y_pred = lassolars_reg.predict(X_test)
lassolars_r2 = r2_score(Y_expected, Y_pred)
lassolars_mse = mean_squared_error(Y_expected, Y_pred)
print("Lasso Lars Regression\n", "R2: ", lassolars_r2, "MSE:", lassolars_mse)
plot_prediction("Lasso Lars Regression", Y_pred, test['close'])

# Theil Sen Regressor

theil_reg = TheilSenRegressor()
theil_reg.fit(X_train, Y_train)
Y_pred = theil_reg.predict(X_test)
theil_r2 = r2_score(Y_expected, Y_pred)
theil_mse = mean_squared_error(Y_expected, Y_pred)
print("Theil Sen Regression\n", "R2: ", theil_r2, "MSE:", theil_mse)
plot_prediction("Theil Sen Regression", Y_pred, test['close'])

# Bayesian Ridge

bayesian_reg = BayesianRidge()
bayesian_reg.fit(X_train, Y_train)
Y_pred = bayesian_reg.predict(X_test)
bayesian_r2 = r2_score(Y_expected, Y_pred)
bayesian_mse = mean_squared_error(Y_expected, Y_pred)
print("Bayesian Ridge Regression\n", "R2: ", bayesian_r2, "MSE:", bayesian_mse)
plot_prediction("Bayesian Ridge Regression", Y_pred, test['close'])
Esempio n. 25
0
for document in range(0, len(documents)):
    plt.subplot(3, 1, document + 1)
    estimators = TheilSenRegressor()

    result = pd.read_csv(documents[document]).iloc[:5001]
    timestamp = numpy.array(result['Time']).reshape(-1, 1)
    time_offset = result['TimeOffset']
    time_result = [
        datetime.datetime.fromtimestamp(each).strftime('%H:%M')
        for each in list(result['Time'])
    ]

    estimators.fit(timestamp, time_offset)

    plt.xticks(
        range(0, len(time_result), 1000),
        [time_result[each] for each in range(0, len(time_result), 1000)])
    plt.yticks(
        np.arange(min(time_offset) // 1.0,
                  max(time_offset) // 1.0 + 1, 0.25))
    plt.plot(time_offset, label='NTP Records')
    plt.plot(estimators.predict(timestamp), label='Regression Result')

    time_predicted = time_shift.get_time_offset()
    result = estimators.predict(numpy.array(time_predicted[0]).reshape(-1, 1))
    print('Prediction(Predicted, NTPlib):', result, time_predicted[1])
    print('Daily time shifting', estimators.coef_[0] * 60 * 60 * 24)
    plt.legend()

plt.show()
Esempio n. 26
0
    def quantify_beat(self, beatnumber):
        beatindex = self.ibeats[beatnumber]
        # approx expected ibi
        meanibi = np.mean(np.diff(self.tbeats))
        # downslope is less than half of full beat. look for peaks on either side
        downslopewindow = int((meanibi / 2.5) * self.fps)
        # pick preceding maximum
        try:
            maxindex = np.where(
                heartbeat_localmax(self.x[(beatindex -
                                           downslopewindow):beatindex]))[0][-1]
        except:
            maxindex = np.argmax(self.x[(beatindex -
                                         downslopewindow):beatindex])
        peaki = beatindex - downslopewindow + maxindex
        # double check we didn't go beyond prev. beat
        if beatnumber > 0 and peaki <= self.ibeats[beatnumber - 1]:
            peaki = self.ibeats[beatnumber - 1] + downslopewindow + np.argmax(
                self.x[(self.ibeats[beatnumber - 1] +
                        downslopewindow):beatindex])
        # pick succeeding minimum
        troughi = beatindex + np.argmin(
            self.x[beatindex:(beatindex + downslopewindow)])
        # double check we didn't go beyond next beat
        if beatnumber < len(
                self.ibeats) - 1 and troughi >= self.ibeats[beatnumber + 1]:
            troughi = beatindex + np.argmin(
                self.x[beatindex:(self.ibeats[beatnumber + 1] - 1)])
        # robust regression on downslope
        downslopemodel = TheilSenRegressor().fit(
            self.t[peaki:troughi].reshape(-1, 1), self.x[peaki:troughi])
        r2 = downslopemodel.score(self.t[peaki:troughi].reshape(-1, 1),
                                  self.x[peaki:troughi])
        # count which points are close enough to prediction
        predicted_downslope = downslopemodel.predict(
            self.t[peaki:troughi].reshape(-1, 1))
        amplitude = self.x[peaki] - self.x[troughi]
        m, k = downslopemodel.coef_[0], downslopemodel.intercept_
        point_to_line_distances = np.abs(k + m * self.t[peaki:troughi] -
                                         self.x[peaki:troughi]) / np.sqrt(
                                             1 + m * m)
        point_to_line_distance_percentages = 100.0 / amplitude * point_to_line_distances
        ok_points = np.where(point_to_line_distance_percentages <
                             BeatQuality.ACCEPTED_DEVIATION_PERCENTAGE)[0]
        fraction_acceptable = 1.0 / (troughi - peaki) * len(ok_points)
        # numerically characterize non-crap portion of the slope
        ok_slope_length = fraction_acceptable * np.sqrt(
            (troughi - peaki)**2 + (self.x[peaki] - self.x[troughi])**2)
        ok_slope_angle = np.arctan(downslopemodel.coef_[0])
        # numerically characterize beat placement
        beat_downslope_orthogonal_distance = 0 if ok_slope_length == 0 else 1.0 / ok_slope_length * (
            np.abs(k + m * self.t[beatindex] - self.x[beatindex]) /
            np.sqrt(1 + m * m))
        beat_downslope_peak_distance = 0 if ok_slope_length == 0 else 1.0 / ok_slope_length * np.sqrt(
            (beatindex - peaki)**2 + (self.x[peaki] - self.x[beatindex])**2)

        # check if certain to be bad fit
        iscrap = False
        if np.abs(
                r2
        ) < BeatQuality.MINIMUM_R2 or fraction_acceptable < BeatQuality.MINIMUM_LINEARITY:
            print "crap! ", beatnumber, r2, fraction_acceptable
            iscrap = True

        return ok_slope_length, ok_slope_angle, beat_downslope_orthogonal_distance, beat_downslope_peak_distance, iscrap
Esempio n. 27
0
xValues['medHighIncome'] = incomeDF['50to75k']
# xValues['highIncome'] = incomeDF['above75k']  # overspecified
xValues['P_married'] = marriageDF['marriedPercent']
xValues['P_noCar'] = carDF['TotalNoVehicle']
xValues['P_1Car'] = carDF['Total1Vehicle']
# xValues['P_2+Car'] = carDF['Total2orMoreVehicle']  # overspecified
xValues['P_homeOwner'] = homeOwnerDF['OWN']
# make sure all feature vectors are the same length
# for col in xValues.columns:
#     print(xValues[col].shape)
xValues.head(1)

# %%
# predict values
linPredictions = linModel.predict(xValues)
tsPredictions = tsModel.predict(xValues)
hrPredictions = hrModel.predict(xValues)
# bardPredictions = bardModel.predict(xValues)
brPredictions = brModel.predict(xValues)
# enPredictions = enModel.predict(xValues)
ridgePredictions = ridgeModel.predict(xValues)
# logPredictions = logModel.predict(xValues)
print('Features:')
print(xTrain.columns.values, '\n')
print("Linear coefficients:", '\n', linModel.coef_, '...Intercept:', linModel.intercept_, '\n')
print("TS coefficients:", '\n', tsModel.coef_, '...Intercept:', tsModel.intercept_, '\n')
print("HR coefficients:", '\n', hrModel.coef_, '...Intercept:', hrModel.intercept_, '\n')
# print("BARD coefficients:", '\n', bardModel.coef_, '...Intercept:', bardModel.intercept_, '\n')
print("BR coefficients:", '\n', brModel.coef_, '...Intercept:', brModel.intercept_, '\n')
# print("EN coefficients:", '\n', enModel.coef_, '...Intercept:', enModel.intercept_, '\n')
print("Ridge coefficients:", '\n', ridgeModel.coef_, '...Intercept:', ridgeModel.intercept_, '\n')
# 5.1.5.1 RANSAC regression
ransac = RANSACRegressor()
pred_ransac = ransac.fit(X_train, y_train).predict(
    X_test
)  #train the algorithm on training data and predict using the testing data
y_predransac = ransac.predict(X_test)
print('Betas: ', list(zip(ransac.coef_, X)))
print('Beta0: %.2f' % ransac.intercept_)  #Beta0

# 5.1.5.2 Theil-Sen regression
ts = TheilSenRegressor()
pred_ts = ts.fit(X_train, y_train).predict(
    X_test
)  #train the algorithm on training data and predict using the testing data
y_predts = ts.predict(X_test)
print('Betas: ', list(zip(ts.coef_, X)))
print('Beta0: %.2f' % ts.intercept_)  #Beta0

# 5.1.5.3 Huber regression
huber = HuberRegressor(alpha=0.0)
pred_huber = huber.fit(X_train, y_train).predict(
    X_test
)  #train the algorithm on training data and predict using the testing data
y_predhuber = huber.predict(X_test)
print('Betas: ', list(zip(huber.coef_, X)))
print('Beta0: %.2f' % huber.intercept_)  #Beta0
"""# Regression Model selection
After calculating different regression models it is necessary to compare models and evaluate which is the best given the database.
- MAE
- MSE
Esempio n. 29
0
                                epsilons)  # assumes 3 dims
        results.append({
            'epsilon':
            epsilon,
            'num_boxes':
            len(all_floors),
            'filled_boxes':
            count_boxes(points, all_floors, epsilons)
        })
    return results


if __name__ == "__main__":
    data = pd.read_csv('CapDimData.dat', header=None)
    data = get_capacity_dimension(data)
    print(data)
    y = [log(i['filled_boxes']) for i in data]
    x = [log(1 / i['epsilon']) for i in data]
    regressor = TheilSenRegressor(random_state=42)
    regressor.fit(np.array(x)[:, np.newaxis], y)
    print(regressor.coef_)

    plt.plot(x, y)
    plt.plot(x, [regressor.predict(xx) for xx in x], color='red')
    plt.xlabel('log(1/epsilon)')
    plt.ylabel('log(num boxes)')
    plt.legend(['Data', 'Fit: slope {:.2}'.format(regressor.coef_[0])])
    plt.show()

    # 2 dims - slope 1.7

#########
# Theil sen model


from sklearn.linear_model import TheilSenRegressor # Theil Sen Regressor Model

# Instantiate
ts_reg = TheilSenRegressor(random_state = 508)

# Fit
ts_reg.fit(X_train, y_train)

# Predict
y_pred = ts_reg.predict(X_test)

# Score
y_score_ts = ts_reg.score(X_test, y_test)

print(y_score_ts)

#############
# Regression tree

from sklearn.tree import DecisionTreeRegressor # Regression trees

# Instantiate
tree_reg = DecisionTreeRegressor(criterion = 'mse',
                                 min_samples_leaf = 14,
                                 random_state = 508)
def main():
    df = getTableVidrieria()
    filtrado = df.loc[df['idproducto'] == 38]

    agrupado = filtrado.groupby(['cuatrimestre', 'anho']).aggregate({
        'precioproducto': {
            'precioproducto_mean': np.mean,
            'precioproducto_max': np.max,
            'precioproducto_min': np.min
        },
        'cantidad': {
            'cantidad_sum': np.sum
        }
    })

    agrupado = agrupado.reset_index(col_level=1)

    agrupado.columns = agrupado.columns.get_level_values(1)

    agrupado = agrupado.sort_values(by=['anho', 'cuatrimestre'])

    x = pd.DataFrame(agrupado, columns=['cuatrimestre', 'precioproducto_min'])
    y = pd.DataFrame(agrupado, columns=['cantidad_sum'])
    aList = []
    nameList = []
    for i in range(0, 4):
        x_new = x[i:(i + 5)]
        y_new = y[i:(i + 5)]

        x_train = x_new[:4]
        x_test = x_new[4:]

        y_train = y_new[:4]
        y_test = y_new[4:]

        cv_lr = np.mean(
            cross_val_score(LinearRegression(),
                            x_train,
                            y_train.values.ravel(),
                            cv=CV,
                            scoring='neg_mean_absolute_error'))
        cv_tsr = np.mean(
            cross_val_score(TheilSenRegressor(),
                            x_train,
                            y_train.values.ravel(),
                            cv=CV,
                            scoring='neg_mean_absolute_error'))
        cv_gbr = np.mean(
            cross_val_score(
                GradientBoostingRegressor(n_estimators=N_ESTIMATORS),
                x_train,
                y_train.values.ravel(),
                cv=CV,
                scoring='neg_mean_absolute_error'))
        cv_ext = np.mean(
            cross_val_score(ExtraTreesRegressor(n_estimators=N_ESTIMATORS),
                            x_train,
                            y_train.values.ravel(),
                            cv=CV,
                            scoring='neg_mean_absolute_error'))
        cv_ab = np.mean(
            cross_val_score(AdaBoostRegressor(n_estimators=N_ESTIMATORS),
                            x_train,
                            y_train.values.ravel(),
                            cv=CV,
                            scoring='neg_mean_absolute_error'))
        cv_bag = np.mean(
            cross_val_score(BaggingRegressor(n_estimators=N_ESTIMATORS),
                            x_train,
                            y_train.values.ravel(),
                            cv=CV,
                            scoring='neg_mean_absolute_error'))
        # cv_mlp = np.mean(cross_val_score(MLPRegressor(),x_train,y_train.values.ravel(),cv=CV,scoring='neg_mean_absolute_error'))

        myList = (cv_lr, cv_tsr, cv_gbr, cv_ext, cv_ab, cv_bag)
        xi = myList.index(max(myList))

        if (xi == 0):
            regr = LinearRegression().fit(x_train, y_train)
            nameList.append('Linear Regression')
            aList.append(mean_absolute_error(y_test, regr.predict(x_test)))

        elif (xi == 1):
            tsr = TheilSenRegressor().fit(x_train, y_train)
            nameList.append('Theil-Sen Regression')
            aList.append(mean_absolute_error(y_test, tsr.predict(x_test)))

        elif (xi == 2):
            gbr = GradientBoostingRegressor(n_estimators=N_ESTIMATORS).fit(
                x_train, y_train)
            nameList.append('Gradient Boosting Regression')
            aList.append(mean_absolute_error(y_test, gbr.predict(x_test)))

        elif (xi == 3):
            ext = ExtraTreesRegressor(n_estimators=N_ESTIMATORS).fit(
                x_train, y_train)
            nameList.append('Extra Trees Regression')
            aList.append(mean_absolute_error(y_test, ext.predict(x_test)))

        elif (xi == 4):
            ab = AdaBoostRegressor(n_estimators=N_ESTIMATORS).fit(
                x_train, y_train)
            nameList.append('Ada Boost Regression')
            aList.append(mean_absolute_error(y_test, ab.predict(x_test)))

        elif (xi == 5):
            bag = BaggingRegressor(n_estimators=N_ESTIMATORS).fit(
                x_train, y_train)
            nameList.append('Bagging Regression')
            aList.append(mean_absolute_error(y_test, bag.predict(x_test)))

    print(aList)
    print(nameList)
    print(np.var(aList))

    fig, ax = plt.subplots()
    data_line = ax.plot(aList, label='Data', marker='o')
    mean_line = ax.plot([np.mean(aList)] * len(aList),
                        label='Mean',
                        linestyle='--')
    legend = ax.legend(loc='upper right')
    plt.show()