Example #1
0
class LinearSVRImpl():
    def __init__(self,
                 epsilon=0.0,
                 tol=0.0001,
                 C=1.0,
                 loss='epsilon_insensitive',
                 fit_intercept=True,
                 intercept_scaling=1.0,
                 dual=True,
                 verbose=0,
                 random_state=None,
                 max_iter=1000):
        self._hyperparams = {
            'epsilon': epsilon,
            'tol': tol,
            'C': C,
            'loss': loss,
            'fit_intercept': fit_intercept,
            'intercept_scaling': intercept_scaling,
            'dual': dual,
            'verbose': verbose,
            'random_state': random_state,
            'max_iter': max_iter
        }

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)
Example #2
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Example #3
0
 def __init__(self, epsilon=0.0, tol=0.0001, C=1.0, loss='epsilon_insensitive', fit_intercept=True, intercept_scaling=1.0, dual=True, verbose=0, random_state=None, max_iter=1000):
     self._hyperparams = {
         'epsilon': epsilon,
         'tol': tol,
         'C': C,
         'loss': loss,
         'fit_intercept': fit_intercept,
         'intercept_scaling': intercept_scaling,
         'dual': dual,
         'verbose': verbose,
         'random_state': random_state,
         'max_iter': max_iter}
     self._wrapped_model = Op(**self._hyperparams)
Example #4
0
#### Saving the models to the system
#LinearSVR was found to be the best model for process_step, problem_type and contributing_factor datasets.

## Process Step
X_ps_train = pandas.read_csv('../out/train/X_PS_train.csv',
                             delimiter=',',
                             encoding='latin-1')
Y_ps_train = pandas.read_csv('../out/train/Y_PS_train.csv',
                             delimiter=',',
                             encoding='latin-1')

ps_model = MultiOutputRegressor(
    LinearSVR(C=0.2,
              dual=True,
              epsilon=0.4,
              fit_intercept=False,
              loss='squared_epsilon_insensitive',
              max_iter=1000,
              tol=0.01))
ps_model.fit(X_ps_train, Y_ps_train)

dump(ps_model, '../out/Process-step_Model')

## Problem type
X_pt_train = pandas.read_csv('../out/train/X_PT_train.csv',
                             delimiter=',',
                             encoding='latin-1')
Y_pt_train = pandas.read_csv('.../out/train/Y_PT_train.csv',
                             delimiter=',',
                             encoding='latin-1')
Example #5
0
			'LSHForest':LSHForest(),
			'LabelPropagation':LabelPropagation(),
			'LabelSpreading':LabelSpreading(),
			'Lars':Lars(),
			'LarsCV':LarsCV(),
			'Lasso':Lasso(),
			'LassoCV':LassoCV(),
			'LassoLars':LassoLars(),
			'LassoLarsCV':LassoLarsCV(),
			'LassoLarsIC':LassoLarsIC(),
			'LatentDirichletAllocation':LatentDirichletAllocation(),
			'LedoitWolf':LedoitWolf(),
			'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(),
			'LinearRegression':LinearRegression(),
			'LinearSVC':LinearSVC(),
			'LinearSVR':LinearSVR(),
			'LocallyLinearEmbedding':LocallyLinearEmbedding(),
			'LogisticRegression':LogisticRegression(),
			'LogisticRegressionCV':LogisticRegressionCV(),
			'MDS':MDS(),
			'MLPClassifier':MLPClassifier(),
			'MLPRegressor':MLPRegressor(),
			'MaxAbsScaler':MaxAbsScaler(),
			'MeanShift':MeanShift(),
			'MinCovDet':MinCovDet(),
			'MinMaxScaler':MinMaxScaler(),
			'MiniBatchDictionaryLearning':MiniBatchDictionaryLearning(),
			'MiniBatchKMeans':MiniBatchKMeans(),
			'MiniBatchSparsePCA':MiniBatchSparsePCA(),
			'MultiTaskElasticNet':MultiTaskElasticNet(),
			'MultiTaskElasticNetCV':MultiTaskElasticNetCV(),
scalerNorm = Normalizer(norm='l2')
scalerStandard = StandardScaler().fit(features)
#scalerX.fit(features)
#features = scalerX.transform(features)
features = scalerStandard.transform(features)

print(features.shape)

Lars_cv = linearmodels.LarsCV(cv=6).fit(features, y)
Lasso_cv = linearmodels.LassoCV(cv=6).fit(features, y)
alphas = np.linspace(Lars_cv.alphas_[0], .1 * Lars_cv.alphas_[0], 6)
Randomized_lasso = linearmodels.RandomizedLasso(alpha=alphas, random_state=42)

linear_regression = linearmodels.LinearRegression()
linear_SVR = LinearSVR(loss='squared_epsilon_insensitive')

featureselector_Lars = feature_selection.SelectFromModel(Lars_cv, prefit=True)
featureselector_Lasso = feature_selection.SelectFromModel(Lasso_cv,
                                                          prefit=True)
featureselector_RLasso = Randomized_lasso.fit(features, y)

print(Lars_cv.coef_)
print(Lasso_cv.coef_)
print(Randomized_lasso.scores_)

scoreoffeature = pd.DataFrame(
    [Lars_cv.coef_, Lasso_cv.coef_, Randomized_lasso.scores_],
    columns=featurenames,
    index=['Lars', 'Lasso', 'Randomized_lasso'])
    features = ['ope','con','ext','agr','neu']
    featureSize = [40,40,40,40,40]
    costs = [1,1,1,1,1]
    ga = [0.0001,0.0001,0.00001,0.0001,'auto']    
    
    userDF.featureData.rename(columns={'userId':'userid'},inplace=True)
    userFeature = pd.merge(userDF.featureData,userDF.userData,on='userid',how= 'right')
    
    i=0
    for feature in features:
        
        '''selector = feature_selection.SelectKBest(score_func=feature_selection.f_regression
                                                 ,k=featureSize[i])'''
        

        clff = LinearSVR(loss='squared_epsilon_insensitive',C=costs[i])
        
        X = userFeature.ix[:,'WC':'AllPct']
        X = scalerX.transform(X)
        
        #print(userDF.userData)

        y =userFeature.ix[:,feature]
        
        lars_cv = linear_model.LassoLarsCV(cv=6).fit(X,y)
        selector = feature_selection.SelectFromModel(lars_cv,prefit=True)
        
        X = selector.transform(X)
        
        selectors.append(selector)
        print(feature)
#X=treeSelector.transform(X)

X2=treeSelector.transform(X)
X=treeScore.fit_transform(X,yf)

#print(lars_cv.coef_)\
print(X2.shape)
print(X.shape)
#print(yf.shape)  

sumAcc = 0
count = 0

MNBfeature = MultinomialNB()
linearsvrfeature = LinearSVR(loss='squared_epsilon_insensitive',C=testcost)
linearsvcfeature = LinearSVC(loss='squared_epsilon_insensitive',C=testcost)
lasso = linear_model.Lasso(alpha=alphas[0])
print(alphas)
#linearsvrfeature.fit(X, yf)

#print(cross_val_score(MNBfeature,X,ya,cv=10).sum()/10)
#print(cross_val_score(linearsvcfeature,X,ya,cv=10).sum()/10)
#print(cross_val_score(MNBfeature,X,yg,cv=10).sum()/10)
#print(cross_val_score(linearsvcfeature,X,yg,cv=10).sum()/10)
#print(cross_val_score(svcGender,X,yg,cv=5).sum()/5)

print(testfeature)
print('SVM')
print('larsCV')
print(X2.shape)
        for correct_option in correct_options
    ]) + 1  #check how far is that index in the dropdown list and return that value


def average_lowest_correct(list_of_trues, list_of_preds):
    length = len(list_of_trues)  # number of data points
    return np.mean([
        lowest_correct(list(list_of_trues.iloc[i]), list(list_of_preds[i]))
        for i in range(length)
    ])


# Top four models selected formatted as a pipteline to be used for gridsearch
model_1 = Pipeline([('md1', MultiOutputRegressor(Ridge()))])
model_2 = Pipeline([('md2', MultiOutputRegressor(KernelRidge()))])
model_3 = Pipeline([('md3', MultiOutputRegressor(LinearSVR()))])
model_4 = Pipeline([('md4', MultiOutputRegressor(SGDRegressor()))])

# Dictionary of all the variable hyperparameters for all four models. Except of the SGD regressor, the hyperparameter list is complete.
model_params = {
    'Multi_Ridge': {
        'model': model_1,
        'params': {
            'md1__estimator__normalize': [True, False],
            'md1__estimator__fit_intercept': [True, False],
            'md1__estimator__solver':
            ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'],
            'md1__estimator__alpha': [i for i in range(10, 110, 10)],
            'md1__estimator__max_iter': [1000, 2000, 3000]
        }
    },