Ejemplo n.º 1
0
def createPassiveAggressiveRegressor(params):
    info("Creating Passive Aggressive Regressor", ind=4)

    ## Params
    params = mergeParams(PassiveAggressiveRegressor(), params)
    tuneParams = getPassiveAggressiveRegressorParams()
    grid = tuneParams['grid']

    info("With Parameters", ind=4)
    C = setParam('C', params, grid, force=False)
    info("Param: C = {0}".format(C), ind=6)

    loss = setParam('loss', params, grid, force=False)
    info("Param: loss = {0}".format(loss), ind=6)

    max_iter = setParam('max_iter', params, grid, force=False)
    info("Param: max_iter = {0}".format(max_iter), ind=6)

    tol = setParam('tol', params, grid, force=False)
    info("Param: tol = {0}".format(tol), ind=6)

    ## Estimator
    reg = PassiveAggressiveRegressor(C=C,
                                     loss=loss,
                                     max_iter=max_iter,
                                     tol=tol)

    return {"estimator": reg, "params": tuneParams}
Ejemplo n.º 2
0
def mcFadden_R2(y_true, y_pred):
    constant_feature = pd.DataFrame(np.full(len(y_true), 1))
    logistic_regression = PassiveAggressiveRegressor()
    logistic_regression.fit(constant_feature, y_true)
    null_model_prediction = logistic_regression.predict(constant_feature)
    print('avg log-likelihood null-model: {}'.format(
        log_likelihood(y_true, null_model_prediction)))

    L = log_likelihood(y_true, y_pred)
    L_null = log_likelihood(y_true, null_model_prediction)
    return 1 - L / L_null
Ejemplo n.º 3
0
def test_regressor_mse():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for fit_intercept in (True, False):
            reg = PassiveAggressiveRegressor(C=1.0, n_iter=50,
                                             fit_intercept=fit_intercept,
                                             random_state=0)
            reg.fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
def test_regressor_mse():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for fit_intercept in (True, False):
            reg = PassiveAggressiveRegressor(C=1.0, n_iter=50,
                                             fit_intercept=fit_intercept,
                                             random_state=0)
            reg.fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
            reg = PassiveAggressiveRegressor(C=1.0,
                                             fit_intercept=True,
                                             random_state=0)
            for t in range(50):
                reg.partial_fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
Ejemplo n.º 6
0
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        reg = PassiveAggressiveRegressor(C=1.0,
                                         fit_intercept=True,
                                         random_state=0)
        for t in xrange(50):
            reg.partial_fit(data, y_bin)
        pred = reg.predict(data)
        assert_less(np.mean((pred - y_bin)**2), 1.7)
Ejemplo n.º 7
0
def test_regressor_correctness(loss):
    y_bin = y.copy()
    y_bin[y != 1] = -1

    reg1 = MyPassiveAggressive(loss=loss, n_iter=2)
    reg1.fit(X, y_bin)

    for data in (X, X_csr):
        reg2 = PassiveAggressiveRegressor(tol=None, loss=loss, max_iter=2,
            shuffle=False)
        reg2.fit(data, y_bin)

        assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
class _PassiveAggressiveRegressorImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
def test_regressor_correctness(loss):
    y_bin = y.copy()
    y_bin[y != 1] = -1

    reg1 = MyPassiveAggressive(
        C=1.0, loss=loss, fit_intercept=True, n_iter=2)
    reg1.fit(X, y_bin)

    for data in (X, X_csr):
        reg2 = PassiveAggressiveRegressor(
            C=1.0, tol=None, loss=loss, fit_intercept=True, max_iter=2,
            shuffle=False)
        reg2.fit(data, y_bin)

        assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
Ejemplo n.º 10
0
def fancy_text_model(x_train, y_train, x_test, x_valid, cache_name, use_cache=False):
    if use_cache:
        fhand = open(cache_name, 'r')
        data_dict = pickle.load(fhand)
        return data_dict['test_pred'], data_dict['valid_pred']
    np.random.seed(seed=123)
    model = PassiveAggressiveRegressor(n_iter=100, C=1, shuffle=True, random_state=123)
    model.fit(x_train, y_train)
    test_pred = model.predict(x_test)
    valid_pred = model.predict(x_valid)
    data_dict = {'test_pred': test_pred, 'valid_pred': valid_pred}
    fhand = open(cache_name, 'w')
    pickle.dump(data_dict, fhand)
    fhand.close()
    return test_pred, valid_pred
Ejemplo n.º 11
0
    def __init__(self, spar_type, spar_penalty):
        # We create a separate model for each action in the environment's
        # action space. Alternatively we could somehow encode the action
        # into the features, but this way it's easier to code up.
        self.models = []
        for _ in range(env.action_space.n):
            #model=Lasso(alpha=0.01)
            model = SGDRegressor(learning_rate='constant',
                                 penalty=spar_type,
                                 l1_ratio=spar_penalty,
                                 max_iter=1000)
            model1 = PassiveAggressiveRegressor()
            model2 = Lasso(alpha=0.1, normalize=True, warm_start=True)
            model3 = FTRL(alpha=1.0,
                          beta=1.0,
                          L1=0.00001,
                          L2=1.0,
                          D=2**25,
                          iters=1)
            #l2,l1,none,elasticnet
            #,penalty='l1',l1_ratio=0)
            #learning_rate="constant"

            # We need to call partial_fit once to initialize the model
            # or we get a NotFittedError when trying to make a prediction
            # This is quite hacky.
            #model2.fit([self.featurize_state(env.reset())], [0])
            #X = np.array([self.featurize_state(env.reset())])
            #Y = np.array([0])
            #print X.shape, Y.shape
            #model.partial_fit(X,Y)

            model.partial_fit([self.featurize_state(env.reset())], [0])
            self.models.append(model)
Ejemplo n.º 12
0
def get_model_from_name(model_name):
    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(n_jobs=-2),
        'RandomForestClassifier': RandomForestClassifier(n_jobs=-2),
        'RidgeClassifier': RidgeClassifier(),
        'XGBClassifier': xgb.XGBClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'SGDClassifier': SGDClassifier(n_jobs=-1),
        'Perceptron': Perceptron(n_jobs=-1),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),

        # Regressors
        'LinearRegression': LinearRegression(n_jobs=-2),
        'RandomForestRegressor': RandomForestRegressor(n_jobs=-2),
        'Ridge': Ridge(),
        'XGBRegressor': xgb.XGBRegressor(),
        'ExtraTreesRegressor': ExtraTreesRegressor(n_jobs=-1),
        'AdaBoostRegressor': AdaBoostRegressor(n_estimators=5),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(presort=False),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),
        'SGDRegressor': SGDRegressor(shuffle=False),
        'PassiveAggressiveRegressor':
        PassiveAggressiveRegressor(shuffle=False),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans(n_clusters=8)
    }
    return model_map[model_name]
Ejemplo n.º 13
0
def runmodel_sklearn(chromosome, train, test, modelname, feature, label):
    model = {
        'GBRT': GradientBoostingRegressor(max_depth=7, loss='huber'),
        #'xgb': xgb.XGBRegressor(nthread = 10,objective='reg:linear', n_estimators = 10, max_depth = 3),
        'SVR': SVR(),
        'Lasso': Lasso(),
        'Linear': LinearRegression(),
        'DecisionTree': DecisionTreeRegressor(max_depth=6),
        'RandomForest': RandomForestRegressor(random_state=1, n_jobs=12),
        'Ridge': Ridge(),
        'AdaBoost': AdaBoostRegressor(),
        'BayesianRidge': BayesianRidge(compute_score=True),
        'KNN': KNeighborsRegressor(n_neighbors=12),
        'ExtraTrees': ExtraTreesRegressor(random_state=1, n_jobs=12),
        'SGD': SGDRegressor(loss='huber', penalty='elasticnet',
                            random_state=1),
        'PassiveAggressive': PassiveAggressiveRegressor(),
        'ElasticNet': ElasticNet(),
        'Lars': Lars(),
        #'lgm': lgb.LGBMRegressor(objective='regression',num_leaves=40, learning_rate=0.1,n_estimators=20, num_threads = 10),
        #'xgb_parallel': xgb.XGBRegressor(objective='reg:linear', n_estimators = 10, max_depth = 3, nthread = 4)
    }

    newtrain = make_dataframe(chromosome, train)
    if len(newtrain) == 0:
        return 1000000000
    estimator = model[modelname]
    #return pearsonr(estimator.fit(newtrain[feature], newtrain[label]).predict(test[feature]), test[label])[0]
    estimator.fit(newtrain[feature], newtrain[label])
    return np.sqrt(
        np.power(estimator.predict(test[feature]) - test[label],
                 2).mean()) / np.sqrt(np.power(test[label], 2).mean())
Ejemplo n.º 14
0
 def __init__(self, env, feature_transformer):
     self.env = env
     self.models = {}
     self.feature_transformer = feature_transformer
     for a in env.actions_available:
         self.models[a] = PassiveAggressiveRegressor(C=1.0, fit_intercept=True, n_iter=10)
     self.eligibilities = np.zeros((env.n_actions, feature_transformer.dimensions))
Ejemplo n.º 15
0
 def __init__(self, env, feature_transformer):
     self.env = env
     self.models = {}
     self.feature_transformer = feature_transformer
     for a in env.actions_available:
         self.models[a] = PassiveAggressiveRegressor(C=1.0,
                                                     fit_intercept=True,
                                                     n_iter=10)
def cross_validate(params):
    global test_data_products, model_products

    _C, _epsilon = params

    data = test_data_products[1].dropna()
    X = data[[
        'amount_of_all_competitors', 'average_price_on_market',
        'distance_to_cheapest_competitor', 'price_rank', 'quality_rank'
    ]]
    y = data['sold'].copy()
    y[y > 1] = 1
    model = PassiveAggressiveRegressor(max_iter=1000, tol=0.0001)
    model.set_params(C=_C, epsilon=_epsilon)

    score = -np.mean(cross_val_score(model, X, y, cv=3, scoring='r2'))
    return score
Ejemplo n.º 17
0
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for average in (False, True):
            reg = PassiveAggressiveRegressor(random_state=0,
                average=average, max_iter=100)
            for t in range(50):
                reg.partial_fit(data, y_bin)
            pred = reg.predict(data)
            assert np.mean((pred - y_bin) ** 2) < 1.7
            if average:
                assert hasattr(reg, 'average_coef_')
                assert hasattr(reg, 'average_intercept_')
                assert hasattr(reg, 'standard_intercept_')
                assert hasattr(reg, 'standard_coef_')
Ejemplo n.º 18
0
 def __init__(self, X, y, par_params, nfolds=3, n_jobs=1, scoring=None,random_grid=False, n_iter=10, verbose=True):
     
     self._code="par"
     
     if verbose:
         print ("Constructed PassiveAggressiveRegressor: " +self._code)
     
     AbstractRegressorPredictiveModel.__init__(self, "regressor", X, y, par_params, nfolds, n_jobs, scoring,random_grid, n_iter, verbose)
     self._model = self.constructRegressor(PassiveAggressiveRegressor(), self._random_grid)
Ejemplo n.º 19
0
	def go(self, all_data, totalCols, test_ID, colsP, RFEcv, XGBestCols):
		train = all_data.loc[all_data.SalePrice>0 , list(totalCols)].reset_index(drop=True, inplace=False)
		y_train = all_data.SalePrice[all_data.SalePrice>0].reset_index(drop=True, inplace=False)
		test = all_data.loc[all_data.SalePrice==0 , list(totalCols)].reset_index(drop=True, inplace=False)

		scale = RobustScaler() 
		df = scale.fit_transform(train)

		pca = PCA().fit(df) # whiten=True
		print('With only 120 features: {:6.4%}'.format(sum(pca.explained_variance_ratio_[:120])),"%\n")

		print('After PCA, {:3} features only not explained {:6.4%} of variance ratio from the original {:3}'.format(120,
																							(sum(pca.explained_variance_ratio_[120:])),
																							df.shape[1]))
		

		y_train = np.expm1(y_train)

		#Common parameters
		unionedColumns = list(set(RFEcv).union(set(colsP)))
		lengthOfUnionedColumns = len(unionedColumns)

		#XGBRegressor
		model = Pipeline([('pca', PCA(random_state = self.randomState)), ('model', XGBRegressor(random_state = self.randomState, silent=True))])
		gridSearch = self.createGridSearch(model, "XGB", lengthOfUnionedColumns)
		xgbRegressor = Pipeline([('sel', select_fetaures(select_cols = unionedColumns)), ('scl', RobustScaler()), ('gs', gridSearch)])
		xgbRegressor.fit(train, y_train)


		#bayesian ridge
		model = Pipeline([('pca', PCA(random_state = self.randomState)), ('model', BayesianRidge())])
		gridSearch = self.createGridSearch(model, "Bayesian", lengthOfUnionedColumns)
		bayesianRidge = Pipeline([('sel', select_fetaures(select_cols = unionedColumns)), ('scl', RobustScaler()), ('gs', gridSearch)])
		bayesianRidge.fit(train, y_train)

		#Passive Aggressive Regressor
		model = Pipeline([('pca', PCA(random_state = self.randomState)), ('model', PassiveAggressiveRegressor(random_state = self.randomState))])
		gridSearch = self.createGridSearch(model, "PassiveAggressive", lengthOfUnionedColumns)
		passiveAggressiveRegressor = Pipeline([('sel', select_fetaures(select_cols = unionedColumns)), ('scl', RobustScaler()), ('gs', gridSearch)])
		passiveAggressiveRegressor.fit(train, y_train)
		
		averagingModels = AveragingModels(models = (xgbRegressor, bayesianRidge, passiveAggressiveRegressor))
		averagingModels.fit(train, y_train) 
		averagedModelTrainingDataPredictions = averagingModels.predict(train)
		averagedModelTestDataPredictions = (averagingModels.predict(test))
		meanSquaredError = (np.sqrt(mean_squared_error(y_train, averagedModelTrainingDataPredictions)))
		averageModelScore = averagingModels.score(train, y_train)
		
		print('RMSLE score on the train data: {:.4f}'.format(meanSquaredError))
		print('Accuracy score: {:.6%}'.format(averageModelScore))
		
		ensemble = averagedModelTestDataPredictions *1
		submit = pd.DataFrame()
		submit['id'] = test_ID
		submit['SalePrice'] = ensemble
		
		return(submit)
Ejemplo n.º 20
0
def test_regressor_correctness():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for loss in ("epsilon_insensitive", "squared_epsilon_insensitive"):
        reg1 = MyPassiveAggressive(C=1.0,
                                   loss=loss,
                                   fit_intercept=True,
                                   n_iter=2)
        reg1.fit(X, y_bin)

        reg2 = PassiveAggressiveRegressor(C=1.0,
                                          loss=loss,
                                          fit_intercept=True,
                                          n_iter=2)
        reg2.fit(X, y_bin)

        assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
def test_regressor_mse():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for fit_intercept in (True, False):
            for average in (False, True):
                reg = PassiveAggressiveRegressor(
                    C=1.0, fit_intercept=fit_intercept,
                    random_state=0, average=average, max_iter=5)
                reg.fit(data, y_bin)
                pred = reg.predict(data)
                assert_less(np.mean((pred - y_bin) ** 2), 1.7)
                if average:
                    assert_true(hasattr(reg, 'average_coef_'))
                    assert_true(hasattr(reg, 'average_intercept_'))
                    assert_true(hasattr(reg, 'standard_intercept_'))
                    assert_true(hasattr(reg, 'standard_coef_'))
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for average in (False, True):
            reg = PassiveAggressiveRegressor(
                C=1.0, fit_intercept=True, random_state=0,
                average=average, max_iter=100)
            for t in range(50):
                reg.partial_fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
            if average:
                assert hasattr(reg, 'average_coef_')
                assert hasattr(reg, 'average_intercept_')
                assert hasattr(reg, 'standard_intercept_')
                assert hasattr(reg, 'standard_coef_')
Ejemplo n.º 23
0
def test_regressor_mse():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for fit_intercept in (True, False):
            for average in (False, True):
                reg = PassiveAggressiveRegressor(
                    C=1.0, fit_intercept=fit_intercept,
                    random_state=0, average=average, max_iter=5)
                reg.fit(data, y_bin)
                pred = reg.predict(data)
                assert np.mean((pred - y_bin) ** 2) < 1.7
                if average:
                    assert hasattr(reg, 'average_coef_')
                    assert hasattr(reg, 'average_intercept_')
                    assert hasattr(reg, 'standard_intercept_')
                    assert hasattr(reg, 'standard_coef_')
def test_regressor_correctness():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for loss in ("epsilon_insensitive", "squared_epsilon_insensitive"):
        reg1 = MyPassiveAggressive(C=1.0,
                                   loss=loss,
                                   fit_intercept=True,
                                   n_iter=2)
        reg1.fit(X, y_bin)

        reg2 = PassiveAggressiveRegressor(C=1.0,
                                          loss=loss,
                                          fit_intercept=True,
                                          n_iter=2)
        reg2.fit(X, y_bin)

        assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
Ejemplo n.º 25
0
 def __init__(self,
              C=1.0,
              fit_intercept=True,
              max_iter=1000,
              tol=None,
              shuffle=True,
              verbose=0,
              loss="epsilon_insensitive",
              epsilon=DEFAULT_EPSILON,
              random_state=None,
              warm_start=False,
              average=False,
              n_iter=None):
     _PassiveAggressiveRegressor.__init__(self, C, fit_intercept, max_iter,
                                          tol, shuffle, verbose, loss,
                                          epsilon, random_state, warm_start,
                                          average, n_iter)
     BaseWrapperReg.__init__(self)
Ejemplo n.º 26
0
 def __init__(self, env, feature_transformer):
     self.env = env
     self.models = {}
     self.feature_transformer = feature_transformer
     for a in env.actions_available:
         self.models[a] = PassiveAggressiveRegressor(C=1.0,
                                                     fit_intercept=True,
                                                     shuffle=False)
     self.bloom_states = BloomFilter(max_elements=256**2)
     self.nonseen_states = 0
Ejemplo n.º 27
0
    def test_isclassifier(self):
        """
        Assert that only classifiers can be used with the visualizer.
        """
        model = PassiveAggressiveRegressor()
        message = ('This estimator is not a classifier; '
                   'try a regression or clustering score visualizer instead!')

        with pytest.raises(yb.exceptions.YellowbrickError, match=message):
            ConfusionMatrix(model)
Ejemplo n.º 28
0
 def __init__(self, env, feature_transformer):
     self.env = env
     self.models = {}
     self.models_elite = {}
     self.feature_transformer = feature_transformer
     for a in env.actions_available:
         self.models[a] = PassiveAggressiveRegressor(
             C=1.0,
             fit_intercept=True,
             shuffle=False,
             loss='epsilon_insensitive',
             epsilon=0.1)
         self.models_elite[a] = PassiveAggressiveRegressor(
             C=1.0,
             fit_intercept=True,
             shuffle=False,
             loss='epsilon_insensitive',
             epsilon=0.1)
     self.bloom_states = BloomFilter(max_elements=256**2)
Ejemplo n.º 29
0
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for average in (False, True):
            reg = PassiveAggressiveRegressor(C=1.0,
                                             fit_intercept=True,
                                             random_state=0,
                                             average=average)
            for t in range(50):
                reg.partial_fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
            if average:
                assert_true(hasattr(reg, 'average_coef_'))
                assert_true(hasattr(reg, 'average_intercept_'))
                assert_true(hasattr(reg, 'standard_intercept_'))
                assert_true(hasattr(reg, 'standard_coef_'))
Ejemplo n.º 30
0
    def test_isclassifier(self):
        model = PassiveAggressiveRegressor()
        message = 'This estimator is not a classifier; try a regression or clustering score visualizer instead!'
        classes = [
            'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven',
            'eight', 'nine'
        ]

        with self.assertRaisesRegexp(yellowbrick.exceptions.YellowbrickError,
                                     message):
            ConfusionMatrix(model, classes=classes)
Ejemplo n.º 31
0
 def test_model_passive_aggressive_regressor(self):
     model, X = fit_regression_model(PassiveAggressiveRegressor())
     model_onnx = convert_sklearn(
         model, "passive aggressive regressor",
         [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         verbose=False,
                         basename="SklearnPassiveAggressiveRegressor-Dec4")
Ejemplo n.º 32
0
def get_latent_matrix(_x, _y, _z):
    latent_matrix = np.array(
        zip(LinearRegression().fit(_x, _y).predict(_z),
            BayesianRidge(compute_score=True).fit(_x, _y).predict(_z),
            ElasticNet().fit(_x, _y).predict(_z),
            PassiveAggressiveRegressor().fit(_x, _y).predict(_z),
            RANSACRegressor().fit(_x, _y).predict(_z),
            LogisticRegression().fit(_x, _y).predict(_z)))
    #SVR(kernel='linear', C=1e3).fit(_x,_y).predict(_z),
    #SVR(kernel='poly', C=1e3, degree=2).fit(_x,_y).predict(_z),
    #SVR(kernel='rbf', C=1e3, gamma=0.1).fit(_x,_y).predict(_z)))
    return latent_matrix
Ejemplo n.º 33
0
def select_regressor(X, y, scoring='neg_mean_squared_error', show=True):
    regressors = [
        AdaBoostRegressor(),
        # ARDRegression(),
        BaggingRegressor(),
        DecisionTreeRegressor(),
        ElasticNet(),
        ExtraTreeRegressor(),
        ExtraTreesRegressor(),
        # GaussianProcessRegressor(),
        GradientBoostingRegressor(),
        HuberRegressor(),
        KNeighborsRegressor(),
        Lasso(),
        LinearRegression(),
        # LogisticRegression(),
        MLPRegressor(),
        PassiveAggressiveRegressor(),
        PLSRegression(),
        # RadiusNeighborsRegressor(),
        RandomForestRegressor(),
        RANSACRegressor(),
        Ridge(),
        SGDRegressor(),
        TheilSenRegressor(),
    ]
    names = [reg.__class__.__name__ for reg in regressors]
    # cv = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state)
    scores = {}
    for i, (name, reg) in enumerate(zip(names, regressors)):
        print('Processing {}...'.format(name))
        ss = cross_val_score(reg, X, y, scoring=scoring, cv=10)
        scores[name] = ss
        # for train_index, test_index in cv.split(X, y):
        #     X_train, X_test = X[train_index], X[test_index]
        #     y_train, y_test = y[train_index], y[test_index]
        #     try:
        #         clf.fit(X_train, y_train)
        #         train_predictions = clf.predict(X_test)
        #         rmse = np.sqrt(mean_squared_error(y_test, train_predictions))
        #     except:
        #         rmse = 0
        #     s = scores.get(name, [])
        #     s.append(acc)
        #     scores[name] = s
    scores = [[n, np.sqrt(-s).mean()] for n, s in scores.items()]
    scores = pd.DataFrame(scores,
                          columns=['Regressor',
                                   'Score']).sort_values(by='Score',
                                                         ascending=True)
    if show:
        print(scores)
    return scores.iloc[0, 0], regressors[scores.iloc[0].name], scores
def get_hyperparameters_model():
    param_dist = {}

    clf = PassiveAggressiveRegressor()

    model = {
        'passive_aggressive_regressor': {
            'model': clf,
            'param_distributions': param_dist
        }
    }
    return model
Ejemplo n.º 35
0
def get_models(models=dict()):
    # linear models
    models['lr'] = LinearRegression()
    models['lasso'] = Lasso()
    models['ridge'] = Ridge()
    models['en'] = ElasticNet()
    models['huber'] = HuberRegressor()
    models['llars'] = LassoLars()
    models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
    models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3)
    print('Defined %d models' % len(models))
    return models
Ejemplo n.º 36
0
    def ensure_many_models(self, clip_min=None, clip_max=None):
        from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
        from sklearn.neural_network import MLPRegressor
        from sklearn.linear_model import ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor
        from sklearn.neighbors import KNeighborsRegressor
        from sklearn.svm import SVR, LinearSVR

        from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
        from sklearn.neural_network import MLPClassifier
        from sklearn.neighbors import KNeighborsClassifier

        from sklearn.exceptions import ConvergenceWarning
        warnings.filterwarnings('ignore', category=ConvergenceWarning)

        data = self.create_uninformative_ox_dataset()
        for propensity_learner in [
                GradientBoostingClassifier(n_estimators=10),
                RandomForestClassifier(n_estimators=100),
                MLPClassifier(hidden_layer_sizes=(5, )),
                KNeighborsClassifier(n_neighbors=20)
        ]:
            weight_model = IPW(propensity_learner,
                               clip_min=clip_min,
                               clip_max=clip_max)
            propensity_learner_name = str(propensity_learner).split(
                "(", maxsplit=1)[0]
            for outcome_learner in [
                    GradientBoostingRegressor(n_estimators=10),
                    RandomForestRegressor(n_estimators=10),
                    MLPRegressor(hidden_layer_sizes=(5, )),
                    ElasticNet(),
                    RANSACRegressor(),
                    HuberRegressor(),
                    PassiveAggressiveRegressor(),
                    KNeighborsRegressor(),
                    SVR(),
                    LinearSVR()
            ]:
                outcome_learner_name = str(outcome_learner).split(
                    "(", maxsplit=1)[0]
                outcome_model = Standardization(outcome_learner)

                with self.subTest("Test fit & predict using {} & {}".format(
                        propensity_learner_name, outcome_learner_name)):
                    model = self.estimator.__class__(outcome_model,
                                                     weight_model)
                    model.fit(data["X"],
                              data["a"],
                              data["y"],
                              refit_weight_model=False)
                    model.estimate_individual_outcome(data["X"], data["a"])
                    self.assertTrue(True)  # Fit did not crash
Ejemplo n.º 37
0
def build_linear_model():
	"""
	Creates a pipeline consisting of feature transformer and passive
	aggressive regressor
	"""

	ft = FeatureTransformer()
	scaler = StandardScaler()

	reg = PassiveAggressiveRegressor(C=0.1)
	pipeline = Pipeline([('ft', ft), ('scaler', scaler), ('reg', reg)])

	return pipeline
Ejemplo n.º 38
0
    def refit_from_scratch(self):
        temp_model = PassiveAggressiveRegressor()
        temp_enc   = CountVectorizer()
        X = []   # binary matrix the presence of tags
        Z = []   # additional numerical data
        Y = []   # target (to predict) values
        db_size = self.db.size()
        for data in self.db.yield_all():
            feedback = data["feedback"]
            tags     = data[  "tags"  ]
            if feedback and tags:
                Y.append(   feedback   )
                X.append(" ".join(tags))
                Z.append(self.fmt_numerical(data))

        X = temp_enc.fit_transform(X)
        X = hstack((X, coo_matrix(Z)))
        self.allX = X
        for i in range(X.shape[0]):
            temp_model.partial_fit(X.getrow(i), [Y[0]])
        self.model = temp_model
        self.enc = temp_enc
Ejemplo n.º 39
0
# Filter by coeficient variation
var_thres = VarianceThreshold(best_var).fit(X_train_pre)
X_train_pre = var_thres.transform(X_train_pre)
X_test_pre = var_thres.transform(X_test_pre)

for gene in genes:
    # Assemble prediction variables
    X_train = X_train_pre
    y_train = train_ess.ix[:, gene]
    X_test = X_test_pre

    # Feature selection
    fs = SelectKBest(f_regression, k=best_k).fit(X_train, y_train)
    X_train = fs.transform(X_train)
    X_test = fs.transform(X_test)

    # Estimation
    clf = PassiveAggressiveRegressor(epsilon=best_epsilon, n_iter=best_n_iter).fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # Store results
    predictions.ix[gene] = y_pred

    print gene

filename = save_gct_data(predictions, submission_filename_prefix)
print '[DONE]: Saved to file ' + filename

submit_solution(filename, filename.split('/')[1], ev_code_sc1)
print '[SUBMITED]'
Ejemplo n.º 40
0
spearman = make_scorer(spearm_cor_func, greater_is_better=True)

# Assemble prediction variables
X_train = X_train_pre.loc[:, important_features_top_100]
X_test = X_test_pre.loc[:, important_features_top_100]

for gene in prioritized_genes:
    y_train = train_ess.ix[:, gene]

    y_preds_test = []
    y_preds_scores = []

    # Training
    cv = ShuffleSplit(len(y_train), n_iter=5)
    for train_i, test_i in cv:
        clf = PassiveAggressiveRegressor(epsilon=0.01, n_iter=7).fit(X_train.ix[train_i, :], y_train[train_i])
        y_preds_scores.append(spearm_cor_func(clf.predict(X_train.ix[test_i, :]), y_train[test_i]))
        y_preds_test.append(clf.predict(X_test))

    y_preds_scores = Series(y_preds_scores)
    y_preds_test = DataFrame(y_preds_test)

    # Predict
    y_pred = np.mean(y_preds_test[y_preds_scores.notnull()], axis=0).values

    print gene, X_train.shape

    # Store results
    predictions.ix[gene] = y_pred

filename_gct = save_gct_data(predictions, submission_filename_prefix)
Ejemplo n.º 41
0
br.fit(x, y)
br_sts_scores = br.predict(xt)


# Elastic Net
print 'elastic net'
enr = ElasticNet()
#enr.fit(x[:, np.newaxis], y)
#enr_sts_scores = enr.predict(xt[:, np.newaxis])
enr.fit(x, y)
enr_sts_scores = enr.predict(xt)


# Passive Aggressive Regression
print 'passive aggressive'
par = PassiveAggressiveRegressor()
par.fit(x, y)
par_sts_scores = par.predict(xt)
#par.fit(x[:, np.newaxis], y)
#par_sts_scores = par.predict(xt[:, np.newaxis])

# RANSAC Regression
print 'ransac'
ransac = RANSACRegressor()
#ransac.fit(x[:, np.newaxis], y)
#ransac_sts_scores = ransac.predict(xt[:, np.newaxis])
ransac.fit(x, y)
ransac_sts_scores = ransac.predict(xt)


# Logistic Regression
Ejemplo n.º 42
0
def main():
    X, y, coef = make_regression(1000, 200, 10, 1, noise=0.05, coef=True,
                                 random_state=42)

    # X = np.column_stack((X, np.ones(X.shape[0])))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                        random_state=42)

    # sca = StandardScaler()
    # sca.fit(X_train)
    # X_train = sca.transform(X_train)
    # X_test = sca.transform(X_test)

    # print X.shape
    # print y.shape
    # print coef.shape

    param_grid = {
        "C": [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 10,
              100, 1000],
        "epsilon": [0.0001, 0.001, 0.01, 0.1]}

    param_grid_kern = {
        "C": [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 10,
              100, 1000],
        "epsilon": [0.0001, 0.001, 0.01, 0.1],
        "gamma": [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100]}
    # "loss": ["pa", "pai", "paii"]}}

    my_pa = PARegressor(loss="paii", C=1, epsilon=0.001, n_iter=1,
                        fit_intercept=False)
    #
    # search = GridSearchCV(my_pa, param_grid,
    #                       scoring='mean_absolute_error', n_jobs=8, iid=True, refit=True, cv=5,
    #                       verbose=1)
    # search.fit(X_train, y_train)
    # print search.best_params_

    my_pa.fit(X_train, y_train)
    print my_pa.coef_

    # y_preds = search.predict(X_test)
    y_preds = my_pa.predict(X_test)

    mae_my_pa = mean_absolute_error(y_test, y_preds)
    print "My PA MAE = %2.4f" % mae_my_pa

    my_kpa_linear = KernelPARegressor(kernel="linear", loss="paii", C=1, epsilon=0.001, n_iter=1, fit_intercept=False)
    my_kpa_linear.fit(X_train, y_train)
    print "alphas", len(my_kpa_linear.alphas_), my_kpa_linear.alphas_
    y_preds = my_kpa_linear.predict(X_test)
    mae_kpa_linear = mean_absolute_error(y_test, y_preds)
    print "My KPA linear MAE = %2.4f" % mae_kpa_linear

    my_kpa_rbf = KernelPARegressor(kernel="rbf", loss="paii", gamma=0.001, C=1, epsilon=0.001, n_iter=1, fit_intercept=False)
    # search = GridSearchCV(my_kpa_rbf, param_grid_kern,
    #                       scoring='mean_absolute_error', n_jobs=8, iid=True, refit=True, cv=5,
    #                       verbose=1)
    # search.fit(X_train, y_train)

    my_kpa_rbf.fit(X_train, y_train)
    print "alphas", len(my_kpa_rbf.alphas_), my_kpa_rbf.alphas_
    print "support", len(my_kpa_rbf.support_)
    # print "alphas", len(search.best_estimator_.alphas_)  # , my_kpa_rbf.alphas_
    # print "support", len(search.best_estimator_.support_)
    # print search.best_params_
    y_preds = my_kpa_rbf.predict(X_test)
    # y_preds = search.predict(X_test)
    mae_my_kpa = mean_absolute_error(y_test, y_preds)
    print "My Kernel PA MAE = %2.4f" % mae_my_kpa

    # print search.best_estimator_
    # print np.corrcoef(search.best_estimator_.coef_, coef)

    # param_grid = {
    # "C": [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 10,
    #           100, 1000, 10000],
    #     "epsilon": [0.0001, 0.001, 0.01, 0.1],
    #     # "loss": ["epsilon_insensitive", "squared_epsilon_insensitive"]}
    #     "loss": ["squared_epsilon_insensitive"]}


    # search = GridSearchCV(PassiveAggressiveRegressor(fit_intercept=True),
    # param_grid, scoring='mean_absolute_error', n_jobs=8, iid=True,
    # refit=True, cv=5, verbose=1)
    # search.fit(X_train, y_train)

    sk_pa = PassiveAggressiveRegressor(loss="squared_epsilon_insensitive", C=1,
                                       epsilon=0.001, n_iter=1,
                                       fit_intercept=False,
                                       warm_start=True)
    for i in xrange(X_train.shape[0]):
        # for x_i, y_i in zip(X_train, y_train):
        x = np.array(X_train[i], ndmin=2)
        y = np.array(y_train[i], ndmin=1)
        # print x.shape
        # print y
        sk_pa.partial_fit(x, y)

    # sk_pa.fit(X_train, y_train)

    # y_preds = search.predict(X_test)
    y_preds = sk_pa.predict(X_test)
    mae_sk_pa = mean_absolute_error(y_preds, y_test)
    print "Sklearn PA MAE = %2.4f" % mae_sk_pa
Ejemplo n.º 43
0
Xtrain = sp.hstack((Xtrain, sp.csr_matrix(sent_df[['polarity', 'subjectivity']].values)))

Xtest = sp.hstack((sp.coo_matrix(test_category_df.values), comm_test))
Xtest = sp.hstack((Xtest, sp.csr_matrix(test_sent_df[['polarity', 'subjectivity']].values)))
Ytrain = np.ravel(quality_df['quality'])
#Ytest = np.ravel(test_quality_df['quality'])
Xtr, Xte, Ytr, Yte = train_test_split(Xtrain, Ytrain,test_size=.25, random_state=0)

ids = test_ids.id

print("Training Models")

m1 = Ridge(normalize=True, alpha=0.001, solver='auto')
m2 = Lasso(normalize=False, alpha=0.0001, selection='cyclic',positive=False)
m3 = ElasticNet(normalize=False, alpha=0.0001,positive=False, l1_ratio = 0.2)
m4 = PassiveAggressiveRegressor(epsilon=0.001, C=100, shuffle=True)
m5 = LinearRegression()

m1.fit(Xtrain, Ytrain)
print("Model 1 Finished")
m2.fit(Xtrain, Ytrain)
print("Model 2 Finished")
m3.fit(Xtrain, Ytrain)
print("Model 3 Finished")
m4.fit(Xtrain, Ytrain)
print("Model 4 Finished")
m5.fit(Xtrain, Ytrain)
print("Model 5 Finished")


models = [m1, m2, m3, m4, m5]
Ejemplo n.º 44
0
tfscaler = preprocessing.StandardScaler().fit(topicsfollowers)


quesparse = quevectorizer.fit_transform(question)
topsparse = topvectorizer.fit_transform(topics)
cfscaled = cfscaler.transform(contextfollowers)
tfscaled = tfscaler.transform(topicsfollowers)

tquesparse = quevectorizer.transform(tquestion)
ttopsparse = topvectorizer.transform(ttopics)
tcfscaled = cfscaler.transform(tcontextfollowers)
ttfscaled = tfscaler.transform(ttopicsfollowers)



par = PassiveAggressiveRegressor()
par.fit(topsparse,y)
pred = par.predict(ttopsparse)
pred[pred<0] = 0


temp = pl.figure("train y")
temp = pl.subplot(2,1,1)
temp = pl.hist(y,1000)
temp = pl.subplot(2,1,2)
yy = y.copy()
yy[yy==0] = 1
temp = pl.hist(np.log10(yy),1000)

temp = pl.figure("test y")
temp = pl.subplot(4,1,1)