class LinearSVRImpl(): def __init__(self, epsilon=0.0, tol=0.0001, C=1.0, loss='epsilon_insensitive', fit_intercept=True, intercept_scaling=1.0, dual=True, verbose=0, random_state=None, max_iter=1000): self._hyperparams = { 'epsilon': epsilon, 'tol': tol, 'C': C, 'loss': loss, 'fit_intercept': fit_intercept, 'intercept_scaling': intercept_scaling, 'dual': dual, 'verbose': verbose, 'random_state': random_state, 'max_iter': max_iter } def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def __init__(self, epsilon=0.0, tol=0.0001, C=1.0, loss='epsilon_insensitive', fit_intercept=True, intercept_scaling=1.0, dual=True, verbose=0, random_state=None, max_iter=1000): self._hyperparams = { 'epsilon': epsilon, 'tol': tol, 'C': C, 'loss': loss, 'fit_intercept': fit_intercept, 'intercept_scaling': intercept_scaling, 'dual': dual, 'verbose': verbose, 'random_state': random_state, 'max_iter': max_iter} self._wrapped_model = Op(**self._hyperparams)
#### Saving the models to the system #LinearSVR was found to be the best model for process_step, problem_type and contributing_factor datasets. ## Process Step X_ps_train = pandas.read_csv('../out/train/X_PS_train.csv', delimiter=',', encoding='latin-1') Y_ps_train = pandas.read_csv('../out/train/Y_PS_train.csv', delimiter=',', encoding='latin-1') ps_model = MultiOutputRegressor( LinearSVR(C=0.2, dual=True, epsilon=0.4, fit_intercept=False, loss='squared_epsilon_insensitive', max_iter=1000, tol=0.01)) ps_model.fit(X_ps_train, Y_ps_train) dump(ps_model, '../out/Process-step_Model') ## Problem type X_pt_train = pandas.read_csv('../out/train/X_PT_train.csv', delimiter=',', encoding='latin-1') Y_pt_train = pandas.read_csv('.../out/train/Y_PT_train.csv', delimiter=',', encoding='latin-1')
'LSHForest':LSHForest(), 'LabelPropagation':LabelPropagation(), 'LabelSpreading':LabelSpreading(), 'Lars':Lars(), 'LarsCV':LarsCV(), 'Lasso':Lasso(), 'LassoCV':LassoCV(), 'LassoLars':LassoLars(), 'LassoLarsCV':LassoLarsCV(), 'LassoLarsIC':LassoLarsIC(), 'LatentDirichletAllocation':LatentDirichletAllocation(), 'LedoitWolf':LedoitWolf(), 'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(), 'LinearRegression':LinearRegression(), 'LinearSVC':LinearSVC(), 'LinearSVR':LinearSVR(), 'LocallyLinearEmbedding':LocallyLinearEmbedding(), 'LogisticRegression':LogisticRegression(), 'LogisticRegressionCV':LogisticRegressionCV(), 'MDS':MDS(), 'MLPClassifier':MLPClassifier(), 'MLPRegressor':MLPRegressor(), 'MaxAbsScaler':MaxAbsScaler(), 'MeanShift':MeanShift(), 'MinCovDet':MinCovDet(), 'MinMaxScaler':MinMaxScaler(), 'MiniBatchDictionaryLearning':MiniBatchDictionaryLearning(), 'MiniBatchKMeans':MiniBatchKMeans(), 'MiniBatchSparsePCA':MiniBatchSparsePCA(), 'MultiTaskElasticNet':MultiTaskElasticNet(), 'MultiTaskElasticNetCV':MultiTaskElasticNetCV(),
scalerNorm = Normalizer(norm='l2') scalerStandard = StandardScaler().fit(features) #scalerX.fit(features) #features = scalerX.transform(features) features = scalerStandard.transform(features) print(features.shape) Lars_cv = linearmodels.LarsCV(cv=6).fit(features, y) Lasso_cv = linearmodels.LassoCV(cv=6).fit(features, y) alphas = np.linspace(Lars_cv.alphas_[0], .1 * Lars_cv.alphas_[0], 6) Randomized_lasso = linearmodels.RandomizedLasso(alpha=alphas, random_state=42) linear_regression = linearmodels.LinearRegression() linear_SVR = LinearSVR(loss='squared_epsilon_insensitive') featureselector_Lars = feature_selection.SelectFromModel(Lars_cv, prefit=True) featureselector_Lasso = feature_selection.SelectFromModel(Lasso_cv, prefit=True) featureselector_RLasso = Randomized_lasso.fit(features, y) print(Lars_cv.coef_) print(Lasso_cv.coef_) print(Randomized_lasso.scores_) scoreoffeature = pd.DataFrame( [Lars_cv.coef_, Lasso_cv.coef_, Randomized_lasso.scores_], columns=featurenames, index=['Lars', 'Lasso', 'Randomized_lasso'])
features = ['ope','con','ext','agr','neu'] featureSize = [40,40,40,40,40] costs = [1,1,1,1,1] ga = [0.0001,0.0001,0.00001,0.0001,'auto'] userDF.featureData.rename(columns={'userId':'userid'},inplace=True) userFeature = pd.merge(userDF.featureData,userDF.userData,on='userid',how= 'right') i=0 for feature in features: '''selector = feature_selection.SelectKBest(score_func=feature_selection.f_regression ,k=featureSize[i])''' clff = LinearSVR(loss='squared_epsilon_insensitive',C=costs[i]) X = userFeature.ix[:,'WC':'AllPct'] X = scalerX.transform(X) #print(userDF.userData) y =userFeature.ix[:,feature] lars_cv = linear_model.LassoLarsCV(cv=6).fit(X,y) selector = feature_selection.SelectFromModel(lars_cv,prefit=True) X = selector.transform(X) selectors.append(selector) print(feature)
#X=treeSelector.transform(X) X2=treeSelector.transform(X) X=treeScore.fit_transform(X,yf) #print(lars_cv.coef_)\ print(X2.shape) print(X.shape) #print(yf.shape) sumAcc = 0 count = 0 MNBfeature = MultinomialNB() linearsvrfeature = LinearSVR(loss='squared_epsilon_insensitive',C=testcost) linearsvcfeature = LinearSVC(loss='squared_epsilon_insensitive',C=testcost) lasso = linear_model.Lasso(alpha=alphas[0]) print(alphas) #linearsvrfeature.fit(X, yf) #print(cross_val_score(MNBfeature,X,ya,cv=10).sum()/10) #print(cross_val_score(linearsvcfeature,X,ya,cv=10).sum()/10) #print(cross_val_score(MNBfeature,X,yg,cv=10).sum()/10) #print(cross_val_score(linearsvcfeature,X,yg,cv=10).sum()/10) #print(cross_val_score(svcGender,X,yg,cv=5).sum()/5) print(testfeature) print('SVM') print('larsCV') print(X2.shape)
for correct_option in correct_options ]) + 1 #check how far is that index in the dropdown list and return that value def average_lowest_correct(list_of_trues, list_of_preds): length = len(list_of_trues) # number of data points return np.mean([ lowest_correct(list(list_of_trues.iloc[i]), list(list_of_preds[i])) for i in range(length) ]) # Top four models selected formatted as a pipteline to be used for gridsearch model_1 = Pipeline([('md1', MultiOutputRegressor(Ridge()))]) model_2 = Pipeline([('md2', MultiOutputRegressor(KernelRidge()))]) model_3 = Pipeline([('md3', MultiOutputRegressor(LinearSVR()))]) model_4 = Pipeline([('md4', MultiOutputRegressor(SGDRegressor()))]) # Dictionary of all the variable hyperparameters for all four models. Except of the SGD regressor, the hyperparameter list is complete. model_params = { 'Multi_Ridge': { 'model': model_1, 'params': { 'md1__estimator__normalize': [True, False], 'md1__estimator__fit_intercept': [True, False], 'md1__estimator__solver': ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'], 'md1__estimator__alpha': [i for i in range(10, 110, 10)], 'md1__estimator__max_iter': [1000, 2000, 3000] } },