def createPassiveAggressiveRegressor(params): info("Creating Passive Aggressive Regressor", ind=4) ## Params params = mergeParams(PassiveAggressiveRegressor(), params) tuneParams = getPassiveAggressiveRegressorParams() grid = tuneParams['grid'] info("With Parameters", ind=4) C = setParam('C', params, grid, force=False) info("Param: C = {0}".format(C), ind=6) loss = setParam('loss', params, grid, force=False) info("Param: loss = {0}".format(loss), ind=6) max_iter = setParam('max_iter', params, grid, force=False) info("Param: max_iter = {0}".format(max_iter), ind=6) tol = setParam('tol', params, grid, force=False) info("Param: tol = {0}".format(tol), ind=6) ## Estimator reg = PassiveAggressiveRegressor(C=C, loss=loss, max_iter=max_iter, tol=tol) return {"estimator": reg, "params": tuneParams}
def mcFadden_R2(y_true, y_pred): constant_feature = pd.DataFrame(np.full(len(y_true), 1)) logistic_regression = PassiveAggressiveRegressor() logistic_regression.fit(constant_feature, y_true) null_model_prediction = logistic_regression.predict(constant_feature) print('avg log-likelihood null-model: {}'.format( log_likelihood(y_true, null_model_prediction))) L = log_likelihood(y_true, y_pred) L_null = log_likelihood(y_true, null_model_prediction) return 1 - L / L_null
def test_regressor_mse(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for fit_intercept in (True, False): reg = PassiveAggressiveRegressor(C=1.0, n_iter=50, fit_intercept=fit_intercept, random_state=0) reg.fit(data, y_bin) pred = reg.predict(data) assert_less(np.mean((pred - y_bin) ** 2), 1.7)
def test_regressor_partial_fit(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): reg = PassiveAggressiveRegressor(C=1.0, fit_intercept=True, random_state=0) for t in range(50): reg.partial_fit(data, y_bin) pred = reg.predict(data) assert_less(np.mean((pred - y_bin) ** 2), 1.7)
def test_regressor_partial_fit(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): reg = PassiveAggressiveRegressor(C=1.0, fit_intercept=True, random_state=0) for t in xrange(50): reg.partial_fit(data, y_bin) pred = reg.predict(data) assert_less(np.mean((pred - y_bin)**2), 1.7)
def test_regressor_correctness(loss): y_bin = y.copy() y_bin[y != 1] = -1 reg1 = MyPassiveAggressive(loss=loss, n_iter=2) reg1.fit(X, y_bin) for data in (X, X_csr): reg2 = PassiveAggressiveRegressor(tol=None, loss=loss, max_iter=2, shuffle=False) reg2.fit(data, y_bin) assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
class _PassiveAggressiveRegressorImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def test_regressor_correctness(loss): y_bin = y.copy() y_bin[y != 1] = -1 reg1 = MyPassiveAggressive( C=1.0, loss=loss, fit_intercept=True, n_iter=2) reg1.fit(X, y_bin) for data in (X, X_csr): reg2 = PassiveAggressiveRegressor( C=1.0, tol=None, loss=loss, fit_intercept=True, max_iter=2, shuffle=False) reg2.fit(data, y_bin) assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
def fancy_text_model(x_train, y_train, x_test, x_valid, cache_name, use_cache=False): if use_cache: fhand = open(cache_name, 'r') data_dict = pickle.load(fhand) return data_dict['test_pred'], data_dict['valid_pred'] np.random.seed(seed=123) model = PassiveAggressiveRegressor(n_iter=100, C=1, shuffle=True, random_state=123) model.fit(x_train, y_train) test_pred = model.predict(x_test) valid_pred = model.predict(x_valid) data_dict = {'test_pred': test_pred, 'valid_pred': valid_pred} fhand = open(cache_name, 'w') pickle.dump(data_dict, fhand) fhand.close() return test_pred, valid_pred
def __init__(self, spar_type, spar_penalty): # We create a separate model for each action in the environment's # action space. Alternatively we could somehow encode the action # into the features, but this way it's easier to code up. self.models = [] for _ in range(env.action_space.n): #model=Lasso(alpha=0.01) model = SGDRegressor(learning_rate='constant', penalty=spar_type, l1_ratio=spar_penalty, max_iter=1000) model1 = PassiveAggressiveRegressor() model2 = Lasso(alpha=0.1, normalize=True, warm_start=True) model3 = FTRL(alpha=1.0, beta=1.0, L1=0.00001, L2=1.0, D=2**25, iters=1) #l2,l1,none,elasticnet #,penalty='l1',l1_ratio=0) #learning_rate="constant" # We need to call partial_fit once to initialize the model # or we get a NotFittedError when trying to make a prediction # This is quite hacky. #model2.fit([self.featurize_state(env.reset())], [0]) #X = np.array([self.featurize_state(env.reset())]) #Y = np.array([0]) #print X.shape, Y.shape #model.partial_fit(X,Y) model.partial_fit([self.featurize_state(env.reset())], [0]) self.models.append(model)
def get_model_from_name(model_name): model_map = { # Classifiers 'LogisticRegression': LogisticRegression(n_jobs=-2), 'RandomForestClassifier': RandomForestClassifier(n_jobs=-2), 'RidgeClassifier': RidgeClassifier(), 'XGBClassifier': xgb.XGBClassifier(), 'GradientBoostingClassifier': GradientBoostingClassifier(), 'SGDClassifier': SGDClassifier(n_jobs=-1), 'Perceptron': Perceptron(n_jobs=-1), 'PassiveAggressiveClassifier': PassiveAggressiveClassifier(), # Regressors 'LinearRegression': LinearRegression(n_jobs=-2), 'RandomForestRegressor': RandomForestRegressor(n_jobs=-2), 'Ridge': Ridge(), 'XGBRegressor': xgb.XGBRegressor(), 'ExtraTreesRegressor': ExtraTreesRegressor(n_jobs=-1), 'AdaBoostRegressor': AdaBoostRegressor(n_estimators=5), 'RANSACRegressor': RANSACRegressor(), 'GradientBoostingRegressor': GradientBoostingRegressor(presort=False), 'Lasso': Lasso(), 'ElasticNet': ElasticNet(), 'LassoLars': LassoLars(), 'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(), 'BayesianRidge': BayesianRidge(), 'ARDRegression': ARDRegression(), 'SGDRegressor': SGDRegressor(shuffle=False), 'PassiveAggressiveRegressor': PassiveAggressiveRegressor(shuffle=False), # Clustering 'MiniBatchKMeans': MiniBatchKMeans(n_clusters=8) } return model_map[model_name]
def runmodel_sklearn(chromosome, train, test, modelname, feature, label): model = { 'GBRT': GradientBoostingRegressor(max_depth=7, loss='huber'), #'xgb': xgb.XGBRegressor(nthread = 10,objective='reg:linear', n_estimators = 10, max_depth = 3), 'SVR': SVR(), 'Lasso': Lasso(), 'Linear': LinearRegression(), 'DecisionTree': DecisionTreeRegressor(max_depth=6), 'RandomForest': RandomForestRegressor(random_state=1, n_jobs=12), 'Ridge': Ridge(), 'AdaBoost': AdaBoostRegressor(), 'BayesianRidge': BayesianRidge(compute_score=True), 'KNN': KNeighborsRegressor(n_neighbors=12), 'ExtraTrees': ExtraTreesRegressor(random_state=1, n_jobs=12), 'SGD': SGDRegressor(loss='huber', penalty='elasticnet', random_state=1), 'PassiveAggressive': PassiveAggressiveRegressor(), 'ElasticNet': ElasticNet(), 'Lars': Lars(), #'lgm': lgb.LGBMRegressor(objective='regression',num_leaves=40, learning_rate=0.1,n_estimators=20, num_threads = 10), #'xgb_parallel': xgb.XGBRegressor(objective='reg:linear', n_estimators = 10, max_depth = 3, nthread = 4) } newtrain = make_dataframe(chromosome, train) if len(newtrain) == 0: return 1000000000 estimator = model[modelname] #return pearsonr(estimator.fit(newtrain[feature], newtrain[label]).predict(test[feature]), test[label])[0] estimator.fit(newtrain[feature], newtrain[label]) return np.sqrt( np.power(estimator.predict(test[feature]) - test[label], 2).mean()) / np.sqrt(np.power(test[label], 2).mean())
def __init__(self, env, feature_transformer): self.env = env self.models = {} self.feature_transformer = feature_transformer for a in env.actions_available: self.models[a] = PassiveAggressiveRegressor(C=1.0, fit_intercept=True, n_iter=10) self.eligibilities = np.zeros((env.n_actions, feature_transformer.dimensions))
def __init__(self, env, feature_transformer): self.env = env self.models = {} self.feature_transformer = feature_transformer for a in env.actions_available: self.models[a] = PassiveAggressiveRegressor(C=1.0, fit_intercept=True, n_iter=10)
def cross_validate(params): global test_data_products, model_products _C, _epsilon = params data = test_data_products[1].dropna() X = data[[ 'amount_of_all_competitors', 'average_price_on_market', 'distance_to_cheapest_competitor', 'price_rank', 'quality_rank' ]] y = data['sold'].copy() y[y > 1] = 1 model = PassiveAggressiveRegressor(max_iter=1000, tol=0.0001) model.set_params(C=_C, epsilon=_epsilon) score = -np.mean(cross_val_score(model, X, y, cv=3, scoring='r2')) return score
def test_regressor_partial_fit(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for average in (False, True): reg = PassiveAggressiveRegressor(random_state=0, average=average, max_iter=100) for t in range(50): reg.partial_fit(data, y_bin) pred = reg.predict(data) assert np.mean((pred - y_bin) ** 2) < 1.7 if average: assert hasattr(reg, 'average_coef_') assert hasattr(reg, 'average_intercept_') assert hasattr(reg, 'standard_intercept_') assert hasattr(reg, 'standard_coef_')
def __init__(self, X, y, par_params, nfolds=3, n_jobs=1, scoring=None,random_grid=False, n_iter=10, verbose=True): self._code="par" if verbose: print ("Constructed PassiveAggressiveRegressor: " +self._code) AbstractRegressorPredictiveModel.__init__(self, "regressor", X, y, par_params, nfolds, n_jobs, scoring,random_grid, n_iter, verbose) self._model = self.constructRegressor(PassiveAggressiveRegressor(), self._random_grid)
def go(self, all_data, totalCols, test_ID, colsP, RFEcv, XGBestCols): train = all_data.loc[all_data.SalePrice>0 , list(totalCols)].reset_index(drop=True, inplace=False) y_train = all_data.SalePrice[all_data.SalePrice>0].reset_index(drop=True, inplace=False) test = all_data.loc[all_data.SalePrice==0 , list(totalCols)].reset_index(drop=True, inplace=False) scale = RobustScaler() df = scale.fit_transform(train) pca = PCA().fit(df) # whiten=True print('With only 120 features: {:6.4%}'.format(sum(pca.explained_variance_ratio_[:120])),"%\n") print('After PCA, {:3} features only not explained {:6.4%} of variance ratio from the original {:3}'.format(120, (sum(pca.explained_variance_ratio_[120:])), df.shape[1])) y_train = np.expm1(y_train) #Common parameters unionedColumns = list(set(RFEcv).union(set(colsP))) lengthOfUnionedColumns = len(unionedColumns) #XGBRegressor model = Pipeline([('pca', PCA(random_state = self.randomState)), ('model', XGBRegressor(random_state = self.randomState, silent=True))]) gridSearch = self.createGridSearch(model, "XGB", lengthOfUnionedColumns) xgbRegressor = Pipeline([('sel', select_fetaures(select_cols = unionedColumns)), ('scl', RobustScaler()), ('gs', gridSearch)]) xgbRegressor.fit(train, y_train) #bayesian ridge model = Pipeline([('pca', PCA(random_state = self.randomState)), ('model', BayesianRidge())]) gridSearch = self.createGridSearch(model, "Bayesian", lengthOfUnionedColumns) bayesianRidge = Pipeline([('sel', select_fetaures(select_cols = unionedColumns)), ('scl', RobustScaler()), ('gs', gridSearch)]) bayesianRidge.fit(train, y_train) #Passive Aggressive Regressor model = Pipeline([('pca', PCA(random_state = self.randomState)), ('model', PassiveAggressiveRegressor(random_state = self.randomState))]) gridSearch = self.createGridSearch(model, "PassiveAggressive", lengthOfUnionedColumns) passiveAggressiveRegressor = Pipeline([('sel', select_fetaures(select_cols = unionedColumns)), ('scl', RobustScaler()), ('gs', gridSearch)]) passiveAggressiveRegressor.fit(train, y_train) averagingModels = AveragingModels(models = (xgbRegressor, bayesianRidge, passiveAggressiveRegressor)) averagingModels.fit(train, y_train) averagedModelTrainingDataPredictions = averagingModels.predict(train) averagedModelTestDataPredictions = (averagingModels.predict(test)) meanSquaredError = (np.sqrt(mean_squared_error(y_train, averagedModelTrainingDataPredictions))) averageModelScore = averagingModels.score(train, y_train) print('RMSLE score on the train data: {:.4f}'.format(meanSquaredError)) print('Accuracy score: {:.6%}'.format(averageModelScore)) ensemble = averagedModelTestDataPredictions *1 submit = pd.DataFrame() submit['id'] = test_ID submit['SalePrice'] = ensemble return(submit)
def test_regressor_correctness(): y_bin = y.copy() y_bin[y != 1] = -1 for loss in ("epsilon_insensitive", "squared_epsilon_insensitive"): reg1 = MyPassiveAggressive(C=1.0, loss=loss, fit_intercept=True, n_iter=2) reg1.fit(X, y_bin) reg2 = PassiveAggressiveRegressor(C=1.0, loss=loss, fit_intercept=True, n_iter=2) reg2.fit(X, y_bin) assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
def test_regressor_mse(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for fit_intercept in (True, False): for average in (False, True): reg = PassiveAggressiveRegressor( C=1.0, fit_intercept=fit_intercept, random_state=0, average=average, max_iter=5) reg.fit(data, y_bin) pred = reg.predict(data) assert_less(np.mean((pred - y_bin) ** 2), 1.7) if average: assert_true(hasattr(reg, 'average_coef_')) assert_true(hasattr(reg, 'average_intercept_')) assert_true(hasattr(reg, 'standard_intercept_')) assert_true(hasattr(reg, 'standard_coef_'))
def test_regressor_partial_fit(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for average in (False, True): reg = PassiveAggressiveRegressor( C=1.0, fit_intercept=True, random_state=0, average=average, max_iter=100) for t in range(50): reg.partial_fit(data, y_bin) pred = reg.predict(data) assert_less(np.mean((pred - y_bin) ** 2), 1.7) if average: assert hasattr(reg, 'average_coef_') assert hasattr(reg, 'average_intercept_') assert hasattr(reg, 'standard_intercept_') assert hasattr(reg, 'standard_coef_')
def test_regressor_mse(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for fit_intercept in (True, False): for average in (False, True): reg = PassiveAggressiveRegressor( C=1.0, fit_intercept=fit_intercept, random_state=0, average=average, max_iter=5) reg.fit(data, y_bin) pred = reg.predict(data) assert np.mean((pred - y_bin) ** 2) < 1.7 if average: assert hasattr(reg, 'average_coef_') assert hasattr(reg, 'average_intercept_') assert hasattr(reg, 'standard_intercept_') assert hasattr(reg, 'standard_coef_')
def __init__(self, C=1.0, fit_intercept=True, max_iter=1000, tol=None, shuffle=True, verbose=0, loss="epsilon_insensitive", epsilon=DEFAULT_EPSILON, random_state=None, warm_start=False, average=False, n_iter=None): _PassiveAggressiveRegressor.__init__(self, C, fit_intercept, max_iter, tol, shuffle, verbose, loss, epsilon, random_state, warm_start, average, n_iter) BaseWrapperReg.__init__(self)
def __init__(self, env, feature_transformer): self.env = env self.models = {} self.feature_transformer = feature_transformer for a in env.actions_available: self.models[a] = PassiveAggressiveRegressor(C=1.0, fit_intercept=True, shuffle=False) self.bloom_states = BloomFilter(max_elements=256**2) self.nonseen_states = 0
def test_isclassifier(self): """ Assert that only classifiers can be used with the visualizer. """ model = PassiveAggressiveRegressor() message = ('This estimator is not a classifier; ' 'try a regression or clustering score visualizer instead!') with pytest.raises(yb.exceptions.YellowbrickError, match=message): ConfusionMatrix(model)
def __init__(self, env, feature_transformer): self.env = env self.models = {} self.models_elite = {} self.feature_transformer = feature_transformer for a in env.actions_available: self.models[a] = PassiveAggressiveRegressor( C=1.0, fit_intercept=True, shuffle=False, loss='epsilon_insensitive', epsilon=0.1) self.models_elite[a] = PassiveAggressiveRegressor( C=1.0, fit_intercept=True, shuffle=False, loss='epsilon_insensitive', epsilon=0.1) self.bloom_states = BloomFilter(max_elements=256**2)
def test_regressor_partial_fit(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for average in (False, True): reg = PassiveAggressiveRegressor(C=1.0, fit_intercept=True, random_state=0, average=average) for t in range(50): reg.partial_fit(data, y_bin) pred = reg.predict(data) assert_less(np.mean((pred - y_bin) ** 2), 1.7) if average: assert_true(hasattr(reg, 'average_coef_')) assert_true(hasattr(reg, 'average_intercept_')) assert_true(hasattr(reg, 'standard_intercept_')) assert_true(hasattr(reg, 'standard_coef_'))
def test_isclassifier(self): model = PassiveAggressiveRegressor() message = 'This estimator is not a classifier; try a regression or clustering score visualizer instead!' classes = [ 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine' ] with self.assertRaisesRegexp(yellowbrick.exceptions.YellowbrickError, message): ConfusionMatrix(model, classes=classes)
def test_model_passive_aggressive_regressor(self): model, X = fit_regression_model(PassiveAggressiveRegressor()) model_onnx = convert_sklearn( model, "passive aggressive regressor", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, verbose=False, basename="SklearnPassiveAggressiveRegressor-Dec4")
def get_latent_matrix(_x, _y, _z): latent_matrix = np.array( zip(LinearRegression().fit(_x, _y).predict(_z), BayesianRidge(compute_score=True).fit(_x, _y).predict(_z), ElasticNet().fit(_x, _y).predict(_z), PassiveAggressiveRegressor().fit(_x, _y).predict(_z), RANSACRegressor().fit(_x, _y).predict(_z), LogisticRegression().fit(_x, _y).predict(_z))) #SVR(kernel='linear', C=1e3).fit(_x,_y).predict(_z), #SVR(kernel='poly', C=1e3, degree=2).fit(_x,_y).predict(_z), #SVR(kernel='rbf', C=1e3, gamma=0.1).fit(_x,_y).predict(_z))) return latent_matrix
def select_regressor(X, y, scoring='neg_mean_squared_error', show=True): regressors = [ AdaBoostRegressor(), # ARDRegression(), BaggingRegressor(), DecisionTreeRegressor(), ElasticNet(), ExtraTreeRegressor(), ExtraTreesRegressor(), # GaussianProcessRegressor(), GradientBoostingRegressor(), HuberRegressor(), KNeighborsRegressor(), Lasso(), LinearRegression(), # LogisticRegression(), MLPRegressor(), PassiveAggressiveRegressor(), PLSRegression(), # RadiusNeighborsRegressor(), RandomForestRegressor(), RANSACRegressor(), Ridge(), SGDRegressor(), TheilSenRegressor(), ] names = [reg.__class__.__name__ for reg in regressors] # cv = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state) scores = {} for i, (name, reg) in enumerate(zip(names, regressors)): print('Processing {}...'.format(name)) ss = cross_val_score(reg, X, y, scoring=scoring, cv=10) scores[name] = ss # for train_index, test_index in cv.split(X, y): # X_train, X_test = X[train_index], X[test_index] # y_train, y_test = y[train_index], y[test_index] # try: # clf.fit(X_train, y_train) # train_predictions = clf.predict(X_test) # rmse = np.sqrt(mean_squared_error(y_test, train_predictions)) # except: # rmse = 0 # s = scores.get(name, []) # s.append(acc) # scores[name] = s scores = [[n, np.sqrt(-s).mean()] for n, s in scores.items()] scores = pd.DataFrame(scores, columns=['Regressor', 'Score']).sort_values(by='Score', ascending=True) if show: print(scores) return scores.iloc[0, 0], regressors[scores.iloc[0].name], scores
def get_hyperparameters_model(): param_dist = {} clf = PassiveAggressiveRegressor() model = { 'passive_aggressive_regressor': { 'model': clf, 'param_distributions': param_dist } } return model
def get_models(models=dict()): # linear models models['lr'] = LinearRegression() models['lasso'] = Lasso() models['ridge'] = Ridge() models['en'] = ElasticNet() models['huber'] = HuberRegressor() models['llars'] = LassoLars() models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3) models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3) print('Defined %d models' % len(models)) return models
def ensure_many_models(self, clip_min=None, clip_max=None): from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor from sklearn.neural_network import MLPRegressor from sklearn.linear_model import ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.svm import SVR, LinearSVR from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.neural_network import MLPClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings('ignore', category=ConvergenceWarning) data = self.create_uninformative_ox_dataset() for propensity_learner in [ GradientBoostingClassifier(n_estimators=10), RandomForestClassifier(n_estimators=100), MLPClassifier(hidden_layer_sizes=(5, )), KNeighborsClassifier(n_neighbors=20) ]: weight_model = IPW(propensity_learner, clip_min=clip_min, clip_max=clip_max) propensity_learner_name = str(propensity_learner).split( "(", maxsplit=1)[0] for outcome_learner in [ GradientBoostingRegressor(n_estimators=10), RandomForestRegressor(n_estimators=10), MLPRegressor(hidden_layer_sizes=(5, )), ElasticNet(), RANSACRegressor(), HuberRegressor(), PassiveAggressiveRegressor(), KNeighborsRegressor(), SVR(), LinearSVR() ]: outcome_learner_name = str(outcome_learner).split( "(", maxsplit=1)[0] outcome_model = Standardization(outcome_learner) with self.subTest("Test fit & predict using {} & {}".format( propensity_learner_name, outcome_learner_name)): model = self.estimator.__class__(outcome_model, weight_model) model.fit(data["X"], data["a"], data["y"], refit_weight_model=False) model.estimate_individual_outcome(data["X"], data["a"]) self.assertTrue(True) # Fit did not crash
def build_linear_model(): """ Creates a pipeline consisting of feature transformer and passive aggressive regressor """ ft = FeatureTransformer() scaler = StandardScaler() reg = PassiveAggressiveRegressor(C=0.1) pipeline = Pipeline([('ft', ft), ('scaler', scaler), ('reg', reg)]) return pipeline
def refit_from_scratch(self): temp_model = PassiveAggressiveRegressor() temp_enc = CountVectorizer() X = [] # binary matrix the presence of tags Z = [] # additional numerical data Y = [] # target (to predict) values db_size = self.db.size() for data in self.db.yield_all(): feedback = data["feedback"] tags = data[ "tags" ] if feedback and tags: Y.append( feedback ) X.append(" ".join(tags)) Z.append(self.fmt_numerical(data)) X = temp_enc.fit_transform(X) X = hstack((X, coo_matrix(Z))) self.allX = X for i in range(X.shape[0]): temp_model.partial_fit(X.getrow(i), [Y[0]]) self.model = temp_model self.enc = temp_enc
# Filter by coeficient variation var_thres = VarianceThreshold(best_var).fit(X_train_pre) X_train_pre = var_thres.transform(X_train_pre) X_test_pre = var_thres.transform(X_test_pre) for gene in genes: # Assemble prediction variables X_train = X_train_pre y_train = train_ess.ix[:, gene] X_test = X_test_pre # Feature selection fs = SelectKBest(f_regression, k=best_k).fit(X_train, y_train) X_train = fs.transform(X_train) X_test = fs.transform(X_test) # Estimation clf = PassiveAggressiveRegressor(epsilon=best_epsilon, n_iter=best_n_iter).fit(X_train, y_train) y_pred = clf.predict(X_test) # Store results predictions.ix[gene] = y_pred print gene filename = save_gct_data(predictions, submission_filename_prefix) print '[DONE]: Saved to file ' + filename submit_solution(filename, filename.split('/')[1], ev_code_sc1) print '[SUBMITED]'
spearman = make_scorer(spearm_cor_func, greater_is_better=True) # Assemble prediction variables X_train = X_train_pre.loc[:, important_features_top_100] X_test = X_test_pre.loc[:, important_features_top_100] for gene in prioritized_genes: y_train = train_ess.ix[:, gene] y_preds_test = [] y_preds_scores = [] # Training cv = ShuffleSplit(len(y_train), n_iter=5) for train_i, test_i in cv: clf = PassiveAggressiveRegressor(epsilon=0.01, n_iter=7).fit(X_train.ix[train_i, :], y_train[train_i]) y_preds_scores.append(spearm_cor_func(clf.predict(X_train.ix[test_i, :]), y_train[test_i])) y_preds_test.append(clf.predict(X_test)) y_preds_scores = Series(y_preds_scores) y_preds_test = DataFrame(y_preds_test) # Predict y_pred = np.mean(y_preds_test[y_preds_scores.notnull()], axis=0).values print gene, X_train.shape # Store results predictions.ix[gene] = y_pred filename_gct = save_gct_data(predictions, submission_filename_prefix)
br.fit(x, y) br_sts_scores = br.predict(xt) # Elastic Net print 'elastic net' enr = ElasticNet() #enr.fit(x[:, np.newaxis], y) #enr_sts_scores = enr.predict(xt[:, np.newaxis]) enr.fit(x, y) enr_sts_scores = enr.predict(xt) # Passive Aggressive Regression print 'passive aggressive' par = PassiveAggressiveRegressor() par.fit(x, y) par_sts_scores = par.predict(xt) #par.fit(x[:, np.newaxis], y) #par_sts_scores = par.predict(xt[:, np.newaxis]) # RANSAC Regression print 'ransac' ransac = RANSACRegressor() #ransac.fit(x[:, np.newaxis], y) #ransac_sts_scores = ransac.predict(xt[:, np.newaxis]) ransac.fit(x, y) ransac_sts_scores = ransac.predict(xt) # Logistic Regression
def main(): X, y, coef = make_regression(1000, 200, 10, 1, noise=0.05, coef=True, random_state=42) # X = np.column_stack((X, np.ones(X.shape[0]))) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # sca = StandardScaler() # sca.fit(X_train) # X_train = sca.transform(X_train) # X_test = sca.transform(X_test) # print X.shape # print y.shape # print coef.shape param_grid = { "C": [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 10, 100, 1000], "epsilon": [0.0001, 0.001, 0.01, 0.1]} param_grid_kern = { "C": [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 10, 100, 1000], "epsilon": [0.0001, 0.001, 0.01, 0.1], "gamma": [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100]} # "loss": ["pa", "pai", "paii"]}} my_pa = PARegressor(loss="paii", C=1, epsilon=0.001, n_iter=1, fit_intercept=False) # # search = GridSearchCV(my_pa, param_grid, # scoring='mean_absolute_error', n_jobs=8, iid=True, refit=True, cv=5, # verbose=1) # search.fit(X_train, y_train) # print search.best_params_ my_pa.fit(X_train, y_train) print my_pa.coef_ # y_preds = search.predict(X_test) y_preds = my_pa.predict(X_test) mae_my_pa = mean_absolute_error(y_test, y_preds) print "My PA MAE = %2.4f" % mae_my_pa my_kpa_linear = KernelPARegressor(kernel="linear", loss="paii", C=1, epsilon=0.001, n_iter=1, fit_intercept=False) my_kpa_linear.fit(X_train, y_train) print "alphas", len(my_kpa_linear.alphas_), my_kpa_linear.alphas_ y_preds = my_kpa_linear.predict(X_test) mae_kpa_linear = mean_absolute_error(y_test, y_preds) print "My KPA linear MAE = %2.4f" % mae_kpa_linear my_kpa_rbf = KernelPARegressor(kernel="rbf", loss="paii", gamma=0.001, C=1, epsilon=0.001, n_iter=1, fit_intercept=False) # search = GridSearchCV(my_kpa_rbf, param_grid_kern, # scoring='mean_absolute_error', n_jobs=8, iid=True, refit=True, cv=5, # verbose=1) # search.fit(X_train, y_train) my_kpa_rbf.fit(X_train, y_train) print "alphas", len(my_kpa_rbf.alphas_), my_kpa_rbf.alphas_ print "support", len(my_kpa_rbf.support_) # print "alphas", len(search.best_estimator_.alphas_) # , my_kpa_rbf.alphas_ # print "support", len(search.best_estimator_.support_) # print search.best_params_ y_preds = my_kpa_rbf.predict(X_test) # y_preds = search.predict(X_test) mae_my_kpa = mean_absolute_error(y_test, y_preds) print "My Kernel PA MAE = %2.4f" % mae_my_kpa # print search.best_estimator_ # print np.corrcoef(search.best_estimator_.coef_, coef) # param_grid = { # "C": [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 10, # 100, 1000, 10000], # "epsilon": [0.0001, 0.001, 0.01, 0.1], # # "loss": ["epsilon_insensitive", "squared_epsilon_insensitive"]} # "loss": ["squared_epsilon_insensitive"]} # search = GridSearchCV(PassiveAggressiveRegressor(fit_intercept=True), # param_grid, scoring='mean_absolute_error', n_jobs=8, iid=True, # refit=True, cv=5, verbose=1) # search.fit(X_train, y_train) sk_pa = PassiveAggressiveRegressor(loss="squared_epsilon_insensitive", C=1, epsilon=0.001, n_iter=1, fit_intercept=False, warm_start=True) for i in xrange(X_train.shape[0]): # for x_i, y_i in zip(X_train, y_train): x = np.array(X_train[i], ndmin=2) y = np.array(y_train[i], ndmin=1) # print x.shape # print y sk_pa.partial_fit(x, y) # sk_pa.fit(X_train, y_train) # y_preds = search.predict(X_test) y_preds = sk_pa.predict(X_test) mae_sk_pa = mean_absolute_error(y_preds, y_test) print "Sklearn PA MAE = %2.4f" % mae_sk_pa
Xtrain = sp.hstack((Xtrain, sp.csr_matrix(sent_df[['polarity', 'subjectivity']].values))) Xtest = sp.hstack((sp.coo_matrix(test_category_df.values), comm_test)) Xtest = sp.hstack((Xtest, sp.csr_matrix(test_sent_df[['polarity', 'subjectivity']].values))) Ytrain = np.ravel(quality_df['quality']) #Ytest = np.ravel(test_quality_df['quality']) Xtr, Xte, Ytr, Yte = train_test_split(Xtrain, Ytrain,test_size=.25, random_state=0) ids = test_ids.id print("Training Models") m1 = Ridge(normalize=True, alpha=0.001, solver='auto') m2 = Lasso(normalize=False, alpha=0.0001, selection='cyclic',positive=False) m3 = ElasticNet(normalize=False, alpha=0.0001,positive=False, l1_ratio = 0.2) m4 = PassiveAggressiveRegressor(epsilon=0.001, C=100, shuffle=True) m5 = LinearRegression() m1.fit(Xtrain, Ytrain) print("Model 1 Finished") m2.fit(Xtrain, Ytrain) print("Model 2 Finished") m3.fit(Xtrain, Ytrain) print("Model 3 Finished") m4.fit(Xtrain, Ytrain) print("Model 4 Finished") m5.fit(Xtrain, Ytrain) print("Model 5 Finished") models = [m1, m2, m3, m4, m5]
tfscaler = preprocessing.StandardScaler().fit(topicsfollowers) quesparse = quevectorizer.fit_transform(question) topsparse = topvectorizer.fit_transform(topics) cfscaled = cfscaler.transform(contextfollowers) tfscaled = tfscaler.transform(topicsfollowers) tquesparse = quevectorizer.transform(tquestion) ttopsparse = topvectorizer.transform(ttopics) tcfscaled = cfscaler.transform(tcontextfollowers) ttfscaled = tfscaler.transform(ttopicsfollowers) par = PassiveAggressiveRegressor() par.fit(topsparse,y) pred = par.predict(ttopsparse) pred[pred<0] = 0 temp = pl.figure("train y") temp = pl.subplot(2,1,1) temp = pl.hist(y,1000) temp = pl.subplot(2,1,2) yy = y.copy() yy[yy==0] = 1 temp = pl.hist(np.log10(yy),1000) temp = pl.figure("test y") temp = pl.subplot(4,1,1)