Esempio n. 1
0
class Simple:
    def __init__(self, a, b, c, d):
        self.model = TheilSenRegressor()

    def update_a_b(self, x, y):
        self.model.fit(x.reshape(-1, 1), y)

    def set_c_d(self, c, d):
        pass

    def get_y(self, x):
        return self.model.predict(x.reshape(-1, 1))

    def get_likelihood(self, x, y):
        return 1 / float(x.shape[0]) * np.sum(np.abs(y - self.get_y(x)))

    def to_string(self):
        return "a:{}, b:{}".format(self.model.coef_, self.model.intercept_)

    def get_a_b(self):
        return self.model.coef_, self.model.intercept_

    @staticmethod
    def var_to_weight(v):
        return 1

    @staticmethod
    def get_c_d(x, r):
        return None, None
Esempio n. 2
0
def getscore_getnext(df, days_ahead, coin):

    forecast_val = days_ahead

    forecast_col = 'close'
    df.fillna(value=-99999, inplace=True)
    df['label'] = df[forecast_col].shift(-forecast_val)

    #X = X[:-forecast_val]

    X = np.array(df.drop(['label', 'date'], 1))

    X = preprocessing.scale(X)

    futureX = X[-1:]
    X = X[:-forecast_val]
    df.dropna(inplace=True)

    y = np.array(df['label'])

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.15)
    '''
    inPickle = open('%s.pickle' %(coin), 'rb')
    clf = pickle.load(inPickle)
    '''
    clf = TheilSenRegressor()

    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    #print "accuracy with 1.0 being perfect:", (confidence)
    futureval = clf.predict(futureX)
    return (confidence, futureval)
Esempio n. 3
0
def compute_quantal_size(scan):
    """ Estimate the unit change in calcium response corresponding to a unit change in
    pixel intensity (dubbed quantal size, lower is better).

    Assumes images are stationary from one timestep to the next. Uses it to calculate a
    measure of noise per bright intensity (which increases linearly given that imaging
    noise is poisson), fits a line to it and uses the slope as the estimate.

    :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames).

    :returns: int minimum pixel value in the scan (that appears a min number of times)
    :returns: int maximum pixel value in the scan (that appears a min number of times)
    :returns: np.array pixel intensities used for the estimation.
    :returns: np.array noise variances used for the estimation.
    :returns: float the estimated quantal size
    :returns: float the estimated zero value
    """
    # Set some params
    num_frames = scan.shape[2]
    min_count = num_frames * 0.1  # pixel values with fewer appearances will be ignored
    max_acceptable_intensity = 3000  # pixel values higher than this will be ignored

    # Make sure field is at least 32 bytes (int16 overflows if summed to itself)
    scan = scan.astype(np.float32, copy=False)

    # Create pixel values at each position in field
    eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.)
    pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps)
    pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32)

    # Compute a good range of pixel values (common, not too bright values)
    unique_pixels, counts = np.unique(pixels, return_counts=True)
    min_intensity = min(unique_pixels[counts > min_count])
    max_intensity = max(unique_pixels[counts > min_count])
    max_acceptable_intensity = min(max_intensity, max_acceptable_intensity)
    pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity)

    # Select pixels in good range
    pixels = pixels[pixels_mask]
    unique_pixels, counts = np.unique(pixels, return_counts=True)

    # Compute noise variance
    variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask]
    pixels -= min_intensity
    variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value
    for i in range(0, len(pixels), int(1e8)):  # chunk it for memory efficiency
        variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)],
                                    minlength=len(unique_pixels))[unique_pixels - min_intensity]
    unique_variances = variance_sum / counts # average variance per intensity

    # Compute quantal size (by fitting a linear regressor to predict the variance from intensity)
    X = unique_pixels.reshape(-1, 1)
    y = unique_variances
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    quantal_size = model.coef_[0]
    zero_level = - model.intercept_ / model.coef_[0]

    return (min_intensity, max_intensity, unique_pixels, unique_variances,
           quantal_size, zero_level)
Esempio n. 4
0
def robust_cor(x, y):
    if isinstance(x[0], list):
        x = list(map(list, zip(*x)))
    else:
        x = np.array(x).reshape(-1, 1)
    X = np.array(x)
    Y = np.array(y)
    theil_regr = TheilSenRegressor(random_state=42)
    theil_regr.fit(X, Y)
    y_pred = theil_regr.predict(X)
    res = y_pred - y
    tot_dev = y - np.mean(y)
    SSres = np.dot(res, res)
    SStot = np.dot(tot_dev, tot_dev)
    adjR2 = 1 - (SSres / SStot) * (X.shape[0] - 1) / (X.shape[0] - X.shape[1] -
                                                      1)
    sgn = np.sign(theil_regr.coef_)[0]
    if adjR2 > 0:
        corr_val = sgn * np.sqrt(adjR2)
    else:
        corr_val = 0
    return [
        corr_val, theil_regr.coef_, theil_regr.intercept_,
        theil_regr.breakdown_
    ]
Esempio n. 5
0
def compute_quantal_size(scan):
    """ Estimate the unit change in calcium response corresponding to a unit change in
    pixel intensity (dubbed quantal size, lower is better).

    Assumes images are stationary from one timestep to the next. Uses it to calculate a
    measure of noise per bright intensity (which increases linearly given that imaging
    noise is poisson), fits a line to it and uses the slope as the estimate.

    :param np.array scan: 3-dimensional scan (image_height, image_width, num_frames).

    :returns: int minimum pixel value in the scan (that appears a min number of times)
    :returns: int maximum pixel value in the scan (that appears a min number of times)
    :returns: np.array pixel intensities used for the estimation.
    :returns: np.array noise variances used for the estimation.
    :returns: float the estimated quantal size
    :returns: float the estimated zero value
    """
    # Set some params
    num_frames = scan.shape[2]
    min_count = num_frames * 0.1  # pixel values with fewer appearances will be ignored
    max_acceptable_intensity = 3000  # pixel values higher than this will be ignored

    # Make sure field is at least 32 bytes (int16 overflows if summed to itself)
    scan = scan.astype(np.float32, copy=False)

    # Create pixel values at each position in field
    eps = 1e-4 # needed for np.round to not be biased towards even numbers (0.5 -> 1, 1.5 -> 2, 2.5 -> 3, etc.)
    pixels = np.round((scan[:, :, :-1] + scan[:, :, 1:]) / 2 + eps)
    pixels = pixels.astype(np.int16 if np.max(abs(pixels)) < 2 ** 15 else np.int32)

    # Compute a good range of pixel values (common, not too bright values)
    unique_pixels, counts = np.unique(pixels, return_counts=True)
    min_intensity = min(unique_pixels[counts > min_count])
    max_intensity = max(unique_pixels[counts > min_count])
    max_acceptable_intensity = min(max_intensity, max_acceptable_intensity)
    pixels_mask = np.logical_and(pixels >= min_intensity, pixels <= max_acceptable_intensity)

    # Select pixels in good range
    pixels = pixels[pixels_mask]
    unique_pixels, counts = np.unique(pixels, return_counts=True)

    # Compute noise variance
    variances = ((scan[:, :, :-1] - scan[:, :, 1:]) ** 2 / 2)[pixels_mask]
    pixels -= min_intensity
    variance_sum = np.zeros(len(unique_pixels)) # sum of variances per pixel value
    for i in range(0, len(pixels), int(1e8)):  # chunk it for memory efficiency
        variance_sum += np.bincount(pixels[i: i + int(1e8)], weights=variances[i: i + int(1e8)],
                                    minlength=len(unique_pixels))[unique_pixels - min_intensity]
    unique_variances = variance_sum / counts # average variance per intensity

    # Compute quantal size (by fitting a linear regressor to predict the variance from intensity)
    X = unique_pixels.reshape(-1, 1)
    y = unique_variances
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    quantal_size = model.coef_[0]
    zero_level = - model.intercept_ / model.coef_[0]

    return (min_intensity, max_intensity, unique_pixels, unique_variances,
           quantal_size, zero_level)
Esempio n. 6
0
def _fit_theil_sen_one_track(x_coords_metres, y_coords_metres,
                             valid_times_unix_sec):
    """Fits Theil-Sen model for one storm track.

    P = number of points in track

    :param x_coords_metres: length-P numpy array of x-coordinates.
    :param y_coords_metres: length-P numpy array of y-coordinates.
    :param valid_times_unix_sec: length-P numpy array of times.
    :return: theil_sen_dict: Dictionary with the following keys.
    theil_sen_dict['x_intercept_metres']: x-intercept.
    theil_sen_dict['x_velocity_m_s01']: x-velocity (metres per second).
    theil_sen_dict['y_intercept_metres']: y-intercept.
    theil_sen_dict['y_velocity_m_s01']: y-velocity (metres per second).
    """

    num_points = len(x_coords_metres)
    valid_times_unix_sec = numpy.reshape(valid_times_unix_sec, (num_points, 1))

    model_object_for_x = TheilSenRegressor(fit_intercept=True)
    model_object_for_x.fit(valid_times_unix_sec, x_coords_metres)
    model_object_for_y = TheilSenRegressor(fit_intercept=True)
    model_object_for_y.fit(valid_times_unix_sec, y_coords_metres)

    return {
        X_INTERCEPT_KEY: model_object_for_x.intercept_,
        X_VELOCITY_KEY: model_object_for_x.coef_,
        Y_INTERCEPT_KEY: model_object_for_y.intercept_,
        Y_VELOCITY_KEY: model_object_for_y.coef_
    }
def calculate_scaling_params(events, kmer_mean_levels):
    events = pd.DataFrame(events)
    events['pos'] = events['move'].cumsum()
    jump_positions = events[events['move'] > 1]['pos']
    jump_positions = set(jump_positions - 1) | set(jump_positions)
    nonjump_positions = set(events['pos']) - jump_positions
    if len(nonjump_positions) < MINIMUM_NONJUMP_POSITIONS:
        return

    statelevels = []
    statelevels_jump = []
    for pos, posevents in events.groupby('pos'):
        state = posevents['model_state'].iloc[0]
        if '_' in state:
            continue

        medlevel = posevents['mean'].median()
        if pos in nonjump_positions:
            statelevels.append([medlevel, kmer_mean_levels[state]])
        else:
            statelevels_jump.append([medlevel, kmer_mean_levels[state]])

    statelevels_jump = np.array(statelevels_jump)
    statelevels = np.array(statelevels)
    regr = TheilSenRegressor(random_state=922)
    regr.fit(statelevels[:, 0][:, np.newaxis], statelevels[:, 1])

    return regr.coef_[0], regr.intercept_
def estimate_txty(cluster, k=20):
    xs = []
    ys = []
    zs = []
    tx = []
    ty = []

    for i, node in cluster.nodes(data=True):
        xs.append(node['features']['SX'])
        ys.append(node['features']['SY'])
        zs.append(node['features']['SZ'])
        tx.append(node['features']['TX'])
        ty.append(node['features']['TY'])

    xs = np.array(xs)
    ys = np.array(ys)
    zs = np.array(zs)
    tx = np.array(tx)
    ty = np.array(ty)

    argosorted_z = np.argsort(zs)

    lr = TheilSenRegressor()
    lr.fit(zs[argosorted_z][:k].reshape((-1, 1)), xs[argosorted_z][:k])
    TX = lr.coef_[0]

    lr.fit(zs[argosorted_z][:k].reshape((-1, 1)), ys[argosorted_z][:k])
    TY = lr.coef_[0]

    return TX, TY
Esempio n. 9
0
def test_checksubparams_n_subsamples_if_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
    with pytest.raises(ValueError):
        theil_sen.fit(X, y)
Esempio n. 10
0
class Regressor(BaseEstimator):
    def __init__(self):

        self.regressorName="linear"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=30, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":

            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression()
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()

    def fit(self, X, y):
        X=csc_matrix(X)
        print "Training Algorithm"
        self.clf.fit(X, y)
        #print self.clf.best_estimator_

    def predict(self, X):
        X=csr_matrix(X)
        print "Testing Algorithm"
        return self.clf.predict(X)

    def getRegressor(self):
        return self.clf

    def getRegressorName(self):
        return self.regressorName

    def getParamGrid(self):
        if self.regressorName=="rf":
            defaultGrid=[None]
            maxDepthGrid=np.arange(10,70,7)
            maxFeaturesGrid=["sqrt","log2",None]
            maxTreesGrid=np.arange(10,100,10)
            param_grid = {'max_features': defaultGrid}
        elif self.regressorName == "gb":
            #maxDepthGrid=np.arange(3,20,5)
            learningRateGrid=np.arange(50,100,10)
            #param_grid = {'max_depth': maxDepthGrid}
            #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']}
            param_grid={'alpha':[0.9]}
        return param_grid
Esempio n. 11
0
class Regressor(BaseEstimator):
    def __init__(self):
 
        self.regressorName="gb"
        if self.regressorName=="rf":
            self.clf= RandomForestRegressor(n_estimators=400, max_depth=63,max_features=50, n_jobs=-1)
        elif self.regressorName=="gb":
 
            self.clf= GradientBoostingRegressor(alpha=0.9, init=None,max_depth=3, learning_rate=0.2, loss='ls'
                                ,max_features=None,min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0
                                ,n_estimators=2500,presort='auto', random_state=None, subsample=1.0, verbose=0,warm_start=True)
            #self.clf =GridSearchCV(estimator=gb, param_grid=self.getParamGrid(),scoring='mean_squared_error',cv=3,n_jobs=-1)
            #self.clf=gb
        elif self.regressorName=="ridge":
            self.clf = RidgeCV(alphas=(0.01, 0.1), fit_intercept=True, normalize=False, scoring=None, cv=5, gcv_mode=None, store_cv_values=False)
        elif self.regressorName=="linear":
            self.clf = LinearRegression(alpha=0.01,max_iter=5000)
        elif self.regressorName=="lasso":
            self.clf = LassoCV(cv=10)
        elif self.regressorName=="svr":
             self.clf = SVR(kernel='rbf',C=0.2, gamma=0.01)
        elif self.regressorName=="knn":
            self.clf = neighbors.KNeighborsRegressor(1, weights='distance',n_jobs=-1)
        elif self.regressorName=="gauss":
            self.clf = TheilSenRegressor()
 
    def fit(self, X, y):
        #X=csc_matrix(X)
        self.clf.fit(X, y)
        #print self.clf.best_estimator_
 
    def predict(self, X):
        #X=csr_matrix(X)
        return self.clf.predict(X)
 
    def getRegressor(self):
        return self.clf
 
    def getRegressorName(self):
        return self.regressorName
 
    def getParamGrid(self):
        if self.regressorName=="rf":
            defaultGrid=[None]
            maxDepthGrid=np.arange(10,70,7)
            maxFeaturesGrid=["sqrt","log2",None]
            maxTreesGrid=np.arange(10,100,10)
            param_grid = {'max_features': defaultGrid}
        elif self.regressorName == "gb":
            #maxDepthGrid=np.arange(3,20,5)
            learningRateGrid=np.arange(50,100,10)
            #param_grid = {'max_depth': maxDepthGrid}
            #param_grid={'loss':['ls', 'lad', 'huber', 'quantile']}
            param_grid={'alpha':[0.9]}
        return param_grid
Esempio n. 12
0
def _fit_robust_line(shifts):
    """ Use a robust linear regression algorithm to fit a line to the data."""
    from sklearn.linear_model import TheilSenRegressor

    X = np.arange(len(shifts)).reshape(-1, 1)
    y = shifts
    model = TheilSenRegressor() # robust regression
    model.fit(X, y)
    line = model.predict(X)

    return line
Esempio n. 13
0
class _TheilSenRegressorImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Esempio n. 14
0
def theilsen_regress_predict(var):
    """
    Input:-
    var: 1-D array var
    regressortype = LinearRegression, TheilSenRegressor

    Output: regression coefficient

    """
    regressor = TheilSenRegressor()
    y = np.asarray(var).reshape(-1, 1)
    X = np.arange(len(y)).reshape(-1, 1)
    regressor.fit(X, y)
    return regressor.predict(X)
Esempio n. 15
0
def theilsen_regress_coeff(var, a):
    """
    Input:-
    var: 1-D array var
    a: 1-D array index
    regressortype = LinearRegression, TheilSenRegressor

    Output: regression coefficient

    """
    regressor = TheilSenRegressor()
    y = np.asarray(var).reshape(-1, 1)
    X = a.reshape(-1, 1)
    regressor.fit(X, y)
    return np.array([regressor.coef_])
Esempio n. 16
0
    def _regress_a(X, y, robust, n_jobs):
        """
        Calculates the slope and intercept
        """

        if robust:
            model = TheilSenRegressor(n_jobs=n_jobs)
        else:
            model = LinearRegression(n_jobs=n_jobs)

        model.fit(X, y)

        slope_m = model.coef_[0]
        intercept_b = model.intercept_

        return slope_m, intercept_b
Esempio n. 17
0
    def _cfunc_theilsen(x, y):
        """
        Get Theil-Sen regression score for data set.

        Args:
            x: (list<float>) independent property (x-axis)
            y: (list<float>) dependent property (y-axis)

        Returns: (float) Theil-Sen score

        """
        from sklearn.linear_model import TheilSenRegressor
        r = TheilSenRegressor(random_state=21)
        x_coeff = np.array(x)[:, np.newaxis]
        r.fit(x_coeff, y)
        return r.score(x_coeff, y)
class r07522507_TheilSenRegressor(regression):
    def trainAlgo(self):
        self.model = TheilSenRegressor(
            fit_intercept=self.param['fit_intercept'],
            copy_X=self.param['copy_X'],
            max_subpopulation=self.param['max_subpopulation'],
            n_subsamples=self.param['n_subsamples'],
            max_iter=self.param['max_iter'],
            tol=self.param['tol'],
            random_state=self.param['random_state'],
            verbose=self.param['verbose'],
        )
        self.model.fit(self.inputData['X'], self.outputData['Y'])

    def predictAlgo(self):
        self.result['Y'] = self.model.predict(self.inputData['X'])
Esempio n. 19
0
    def fit(self, X, y, random_state=None):
        """
        Train ENOLS on the given training set.

        Parameters
        ----------
        X: an input array of shape (n_sample, n_features)
        y: an array of shape (n_sample,) containing the classes for the input examples

        Return
        ------
        self: the fitted model
        """

        # use random instead of np.random to sample random numbers below
        random = check_random_state(random_state)

        estimators = [('lr', LinearRegression())]

        if isinstance(self.sample_size, int):
            self.sample_size = 'reservoir_sampling'

        # add all the trained OLS models to this list
        self.estimators_lr, self.estimators_TSR, self.estimators_enols = [], [], []
        for i in range(self.n_estimators):
            samples = sample_without_replacement(n_population=random.choice([50, 100]),
                                                 n_samples=random.choice([10, 20]),
                                                 random_state=random_state, method=self.sample_size)

            X_train, y_train = [], []
            for i in samples:
                X_train.append(X[i]), y_train.append(y[i])

            reg = LinearRegression()
            reg.fit(np.array(X_train), np.array(y_train))

            tsr = TheilSenRegressor()
            tsr.fit(np.array(X_train), np.array(y_train))

            enol = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())
            enol.fit(np.array(X_train), np.array(y_train))

            self.estimators_lr.append(reg), self.estimators_TSR.append(tsr), self.estimators_enols.append(enol)

        return self
Esempio n. 20
0
    def learn_a_b(x, y, lamb, alpha, a0=-0.5, b0=3.4):
        (c, d) = lamb
        (e, f) = alpha
        if (a0 == 0.0) or (b0 == 0.0):
            model = TheilSenRegressor()
            model.fit(x.reshape(-1, 1), y)
            a0 = model.coef_[0]
            b0 = model.intercept_
        if (d == 0) and (c == 0):
            r = a0 * x + b0 - y
            d = np.log(np.min(np.abs(r))) - 1e-8

        r = minimize(Pareto2.obj_a_b, [a0, b0],
                     args=(x, y, (c, d), (e, f)),
                     method='Nelder-Mead',
                     options={
                         'maxiter': 10000,
                         'disp': False
                     })
        # print r
        if not r.success:
            print "Optimization Failed", r
        return r.x[0], r.x[1]
Esempio n. 21
0
def test_checksubparams_too_many_subsamples():
    X, y, w, c = gen_toy_problem_1d()
    theil_sen = TheilSenRegressor(n_subsamples=101, random_state=0)
    with pytest.raises(ValueError):
        theil_sen.fit(X, y)
Esempio n. 22
0
def test_checksubparams_negative_subpopulation():
    X, y, w, c = gen_toy_problem_1d()
    theil_sen = TheilSenRegressor(max_subpopulation=-1, random_state=0)

    with pytest.raises(ValueError):
        theil_sen.fit(X, y)
 def theilsen_regressor(self):
     x_train, x_test, y_train, y_test = self.preprocessing()
     model = TheilSenRegressor()
     y_pred = model.fit(x_train, y_train).predict(x_test)
     self.printing(y_test, y_pred, 'Theilsen')
Esempio n. 24
0
 def TSReg(X,Y):
     model = TheilSenRegressor()
     trained_model = model.fit(X,Y)
     return trained_model
'''

# 5.1.5.1 RANSAC regression
ransac = RANSACRegressor()
pred_ransac = ransac.fit(X_train, y_train).predict(
    X_test
)  #train the algorithm on training data and predict using the testing data
y_predransac = ransac.predict(X_test)
print('Betas: ', list(zip(ransac.coef_, X)))
print('Beta0: %.2f' % ransac.intercept_)  #Beta0

# 5.1.5.2 Theil-Sen regression
ts = TheilSenRegressor()
pred_ts = ts.fit(X_train, y_train).predict(
    X_test
)  #train the algorithm on training data and predict using the testing data
y_predts = ts.predict(X_test)
print('Betas: ', list(zip(ts.coef_, X)))
print('Beta0: %.2f' % ts.intercept_)  #Beta0

# 5.1.5.3 Huber regression
huber = HuberRegressor(alpha=0.0)
pred_huber = huber.fit(X_train, y_train).predict(
    X_test
)  #train the algorithm on training data and predict using the testing data
y_predhuber = huber.predict(X_test)
print('Betas: ', list(zip(huber.coef_, X)))
print('Beta0: %.2f' % huber.intercept_)  #Beta0
"""# Regression Model selection
After calculating different regression models it is necessary to compare models and evaluate which is the best given the database.
# Model via linear regression
from sklearn.linear_model import TheilSenRegressor

reg = TheilSenRegressor()
reg.fit(X_train, y_train)
Esempio n. 27
0
    data_train = pd.read_csv('Data/train_copy.csv')
    train = normalize(preprocess(data_train))
​
    Xdata = train.drop(columns='Survived')
​
    ydata = train['Survived']
​
    X_train, X_test, y_train, y_true = train_test_split(Xdata, ydata, test_size=0.1, random_state=42, stratify=ydata)
#New classifiers
    Class1 = RANSACRegressor(random_state=42)
    Class1.fit(X_train, y_train)
    Class1_predictions = Class1.predict(X_test)
    Class1_accuracy = accuracy_score(y_true, Class1_predictions, normalize=True, sample_weight=None)

    Class2 = TheilSenRegressor(random_state=42)
    Class2.fit(X_train, y_train)
    Class2_predictions = Class1.predict(X_test)
    Class2_accuracy = accuracy_score(y_true, Class2_predictions, normalize=True, sample_weight=None)

    Class3 = LinearRegression()
    Class3.fit(X_train, y_train)
    Class3_predictions = Class3.predict(X_test)
    Class3_accuracy = accuracy_score(y_true, Class3_predictions, normalize=True, sample_weight=None)

    Class4 = HuberRegressor(alpha=0.0, epsilon=epsilon)
    Class4.fit(X_train, y_train)
    Class4_predictions = Class4.predict(X_test)
    Class4_accuracy = accuracy_score(y_true, Class4_predictions, normalize=True, sample_weight=None)

​
#Print different accuracies
#!/usr/bin/env python 

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import TheilSenRegressor

data = pd.read_csv("dataset.csv",header=0)

X = data.loc[:,["Commune","Etage","Superficie","Piece"]].values
Y = data.loc[:,"Prix"].values

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2)

regressor = TheilSenRegressor(random_state=0)
regressor.fit(X_train,Y_train)
score = regressor.score(X_test,Y_test)
print(score)
Esempio n. 29
0
def fit_TheilSen(features_train, labels_train, features_pred):
	model = TheilSenRegressor()
	model.fit(features_train, labels_train)
	labels_pred = model.predict(features_pred)
	print "TheilSen - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train)
	return labels_pred
Esempio n. 30
0
plot_prediction("Linear Regression", Y_pred, test['close'])

# Lasso Lars

lassolars_reg = LassoLars()
lassolars_reg.fit(X_train, Y_train)
Y_pred = lassolars_reg.predict(X_test)
lassolars_r2 = r2_score(Y_expected, Y_pred)
lassolars_mse = mean_squared_error(Y_expected, Y_pred)
print("Lasso Lars Regression\n", "R2: ", lassolars_r2, "MSE:", lassolars_mse)
plot_prediction("Lasso Lars Regression", Y_pred, test['close'])

# Theil Sen Regressor

theil_reg = TheilSenRegressor()
theil_reg.fit(X_train, Y_train)
Y_pred = theil_reg.predict(X_test)
theil_r2 = r2_score(Y_expected, Y_pred)
theil_mse = mean_squared_error(Y_expected, Y_pred)
print("Theil Sen Regression\n", "R2: ", theil_r2, "MSE:", theil_mse)
plot_prediction("Theil Sen Regression", Y_pred, test['close'])

# Bayesian Ridge

bayesian_reg = BayesianRidge()
bayesian_reg.fit(X_train, Y_train)
Y_pred = bayesian_reg.predict(X_test)
bayesian_r2 = r2_score(Y_expected, Y_pred)
bayesian_mse = mean_squared_error(Y_expected, Y_pred)
print("Bayesian Ridge Regression\n", "R2: ", bayesian_r2, "MSE:", bayesian_mse)
plot_prediction("Bayesian Ridge Regression", Y_pred, test['close'])
Esempio n. 31
0
vec = DictVectorizer()
X = vec.fit_transform(x_train).toarray()
Y = np.asarray(train.CLOSE)
Y = Y.astype('int')

#Pre-Processing Test data
X_test = test[['HIGH', 'LOW', 'OPEN', 'TOTTRDQTY', 'TOTTRDVAL', 'TOTALTRADES']]
x_test = X_test.to_dict(orient='records')
vec = DictVectorizer()
x = vec.fit_transform(x_test).toarray()
y = np.asarray(test.CLOSE)
y = y.astype('int')

#Classifier
clf = TheilSenRegressor()
clf.fit(X, Y)
print("Accuracy of this Statistical Arbitrage model is: ", clf.score(x, y))
predict = clf.predict(x)

test['predict'] = predict

#Ploting
train.index = train.Date
test.index = test.Date
train['CLOSE'].plot()
test['CLOSE'].plot()
test['predict'].plot()
plt.legend(loc='best')
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()
# The score is directly comparable to R-Square
print(y_score)



#########
# Theil sen model


from sklearn.linear_model import TheilSenRegressor # Theil Sen Regressor Model

# Instantiate
ts_reg = TheilSenRegressor(random_state = 508)

# Fit
ts_reg.fit(X_train, y_train)

# Predict
y_pred = ts_reg.predict(X_test)

# Score
y_score_ts = ts_reg.score(X_test, y_test)

print(y_score_ts)

#############
# Regression tree

from sklearn.tree import DecisionTreeRegressor # Regression trees

# Instantiate
Esempio n. 33
0
# Show the RANSAC fit
plt.plot(x, line_ransac, color='yellow', label='RANSAC')

# plt.show()


# Theil-Sen estimator: 
# General info: https://en.wikipedia.org/wiki/Theil%E2%80%93Sen_estimator
# Good ONLY for LINEAR REGRESSION
# Sci-kit learn implementation: http://scikit-learn.org/stable/auto_examples/linear_model/plot_theilsen.html

# Init the Theil-Sen estimator instance
theil = TheilSenRegressor()

# Fit with the Theil-Sen estimator
theil.fit(x, line_data)

# Get the fitted data result
line_theil = theil.predict(x)

# Plot Theil-Sen results
plt.plot(x, line_theil, color='red', label='Theil-Sen')

plt.legend(loc='lower right')

plt.show()

plt.clf()

###################################