예제 #1
0
    def fit(self, data, args):
        self.model = MaxAbsScaler()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
예제 #2
0
def test_maxabs_scaler_large_negative_value():
    """Check MaxAbsScaler on toy data with a large negative value"""
    X = [[0., 1., +0.5, -1.0], [0., 1., -0.3, -0.5], [0., 1., -100.0, 0.0],
         [0., 0., +0.0, -2.0]]

    scaler = MaxAbsScaler()
    X_trans = scaler.fit_transform(X)
    X_expected = [[0., 1., 0.005, -0.5], [0., 1., -0.003, -0.25],
                  [0., 1., -1.0, 0.0], [0., 0., 0.0, -1.0]]
    assert_array_almost_equal(X_trans, X_expected)
예제 #3
0
class MaxAbsScalerImpl():
    def __init__(self, copy=True):
        self._hyperparams = {'copy': copy}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
예제 #4
0
def test_maxabs_scaler_large_negative_value():
    """Check MaxAbsScaler on toy data with a large negative value"""
    X = [[0., 1.,   +0.5, -1.0],
         [0., 1.,   -0.3, -0.5],
         [0., 1., -100.0,  0.0],
         [0., 0.,   +0.0, -2.0]]

    scaler = MaxAbsScaler()
    X_trans = scaler.fit_transform(X)
    X_expected = [[0., 1.,  0.005,    -0.5],
                  [0., 1., -0.003,    -0.25],
                  [0., 1., -1.0,       0.0],
                  [0., 0.,  0.0,      -1.0]]
    assert_array_almost_equal(X_trans, X_expected)
예제 #5
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
예제 #6
0
def test_maxabs_scaler_zero_variance_features():
    """Check MaxAbsScaler on toy data with zero variance features"""
    X = [[0., 1., +0.5], [0., 1., -0.3], [0., 1., +1.5], [0., 0., +0.0]]

    scaler = MaxAbsScaler()
    X_trans = scaler.fit_transform(X)
    X_expected = [[0., 1., 1.0 / 3.0], [0., 1., -0.2], [0., 1., 1.0],
                  [0., 0., 0.0]]
    assert_array_almost_equal(X_trans, X_expected)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # make sure new data gets transformed correctly
    X_new = [[+0., 2., 0.5], [-1., 1., 0.0], [+0., 1., 1.5]]
    X_trans_new = scaler.transform(X_new)
    X_expected_new = [[+0., 2.0, 1.0 / 3.0], [-1., 1.0, 0.0], [+0., 1.0, 1.0]]

    assert_array_almost_equal(X_trans_new, X_expected_new, decimal=2)

    # sparse data
    X_csr = sparse.csr_matrix(X)
    X_trans = scaler.fit_transform(X_csr)
    X_expected = [[0., 1., 1.0 / 3.0], [0., 1., -0.2], [0., 1., 1.0],
                  [0., 0., 0.0]]
    assert_array_almost_equal(X_trans.A, X_expected)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv.A)
예제 #7
0
def test_warning_scaling_integers():
    # Check warning when scaling integer data
    X = np.array([[1, 2, 0], [0, 0, 0]], dtype=np.uint8)

    w = "Data with input dtype uint8 was converted to float64"

    clean_warning_registry()
    assert_warns_message(DataConversionWarning, w, scale, X)
    assert_warns_message(DataConversionWarning, w, StandardScaler().fit, X)
    assert_warns_message(DataConversionWarning, w, MinMaxScaler().fit, X)
    assert_warns_message(DataConversionWarning, w, MaxAbsScaler().fit, X)
예제 #8
0
class CreateMaxAbsScaler(CreateModel):
    def fit(self, data, args):
        self.model = MaxAbsScaler()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval

    def test(self, data):
        assert self.model is not None

        return self.model.transform(data.X_test)

    def predict(self, data):
        with Timer() as t:
            self.predictions = self.test(data)

        data.learning_task = LearningTask.REGRESSION
        return t.interval
예제 #9
0
def make_models(X, y, y_bin):
    return dict(ols=LinearRegression().fit(X, y),
                lr_bin=LogisticRegression().fit(X, y_bin),
                lr_ovr=LogisticRegression(multi_class='ovr').fit(X, y),
                lr_mn=LogisticRegression(solver='lbfgs',
                                         multi_class='multinomial').fit(X, y),
                svc=SVC(kernel='linear').fit(X, y_bin),
                svr=SVR(kernel='linear').fit(X, y),
                dtc=DecisionTreeClassifier(max_depth=4).fit(X, y),
                dtr=DecisionTreeRegressor(max_depth=4).fit(X, y),
                rfc=RandomForestClassifier(n_estimators=3,
                                           max_depth=3,
                                           random_state=1).fit(X, y),
                rfr=RandomForestRegressor(n_estimators=3,
                                          max_depth=3,
                                          random_state=1).fit(X, y),
                gbc=GradientBoostingClassifier(n_estimators=3,
                                               max_depth=3,
                                               random_state=1).fit(X, y),
                gbr=GradientBoostingRegressor(n_estimators=3,
                                              max_depth=3,
                                              random_state=1).fit(X, y),
                abc=AdaBoostClassifier(algorithm='SAMME',
                                       n_estimators=3,
                                       random_state=1).fit(X, y),
                abc2=AdaBoostClassifier(algorithm='SAMME.R',
                                        n_estimators=3,
                                        random_state=1).fit(X, y),
                abc3=AdaBoostClassifier(algorithm='SAMME',
                                        n_estimators=3,
                                        random_state=1).fit(X, y_bin),
                abc4=AdaBoostClassifier(algorithm='SAMME.R',
                                        n_estimators=3,
                                        random_state=1).fit(X, y_bin),
                km=KMeans(1).fit(X),
                km2=KMeans(5).fit(X),
                pc1=PCA(1).fit(X),
                pc2=PCA(2).fit(X),
                pc3=PCA(2, whiten=True).fit(X),
                mlr1=MLPRegressor([2], 'relu').fit(X, y),
                mlr2=MLPRegressor([2, 1], 'tanh').fit(X, y),
                mlr3=MLPRegressor([2, 2, 2], 'identity').fit(X, y),
                mlc=MLPClassifier([2, 2], 'tanh').fit(X, y),
                mlc_bin=MLPClassifier([2, 2], 'identity').fit(X, y_bin),
                bin=Binarizer(0.5),
                mms=MinMaxScaler().fit(X),
                mas=MaxAbsScaler().fit(X),
                ss1=StandardScaler().fit(X),
                ss2=StandardScaler(with_mean=False).fit(X),
                ss3=StandardScaler(with_std=False).fit(X),
                n1=Normalizer('l1'),
                n2=Normalizer('l2'),
                n3=Normalizer('max'))
예제 #10
0
def test_maxabs_scaler_zero_variance_features():
    """Check MaxAbsScaler on toy data with zero variance features"""
    X = [[0., 1., +0.5],
         [0., 1., -0.3],
         [0., 1., +1.5],
         [0., 0., +0.0]]

    scaler = MaxAbsScaler()
    X_trans = scaler.fit_transform(X)
    X_expected = [[0., 1., 1.0 / 3.0],
                  [0., 1., -0.2],
                  [0., 1., 1.0],
                  [0., 0., 0.0]]
    assert_array_almost_equal(X_trans, X_expected)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # make sure new data gets transformed correctly
    X_new = [[+0., 2., 0.5],
             [-1., 1., 0.0],
             [+0., 1., 1.5]]
    X_trans_new = scaler.transform(X_new)
    X_expected_new = [[+0., 2.0, 1.0 / 3.0],
                      [-1., 1.0, 0.0],
                      [+0., 1.0, 1.0]]

    assert_array_almost_equal(X_trans_new, X_expected_new, decimal=2)

    # sparse data
    X_csr = sparse.csr_matrix(X)
    X_trans = scaler.fit_transform(X_csr)
    X_expected = [[0., 1., 1.0 / 3.0],
                  [0., 1., -0.2],
                  [0., 1., 1.0],
                  [0., 0., 0.0]]
    assert_array_almost_equal(X_trans.A, X_expected)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv.A)
예제 #11
0
            ('w2v_title_content', Word2VecTitleContent()),
            ('sw', StopWordsCount()),
            ('sw_title', StopWordsTitle()),
            ('pmi', PMI()),
            # ('lda', LDAVectorContent()),
            ('CountingWords', CountingWords()),
            # ('readability', ReadabilityFeatures()),
            ('typos', TyposCount()),
            ('english', EnglishInTitle()),
            ('pos', POSFeatures())
            # ('fastext_sup', FastTextSupervised()),
            # ('fast_text', FastTextAverageContentVector()),
            # ('dicts', Dicts()),
            # ('wmd', WMDDistance()),
        ])),
    ('scaler', MaxAbsScaler()),
    ('clf', LinearSVC(random_state=42))
])

# grid_search = GridSearchCV(pipe, tuned_parameters, cv=5,
#                         scoring='accuracy', verbose=1, n_jobs=-1)
# grid_search.fit(train, train['click_bait_score'])

# print(grid_search.best_params_)

print("training...")
pipe.fit_transform(train, train['click_bait_score'])
print("testing...")
score = pipe.score(test, test['click_bait_score'])

print(score)
예제 #12
0
			'LassoLars':LassoLars(),
			'LassoLarsCV':LassoLarsCV(),
			'LassoLarsIC':LassoLarsIC(),
			'LatentDirichletAllocation':LatentDirichletAllocation(),
			'LedoitWolf':LedoitWolf(),
			'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(),
			'LinearRegression':LinearRegression(),
			'LinearSVC':LinearSVC(),
			'LinearSVR':LinearSVR(),
			'LocallyLinearEmbedding':LocallyLinearEmbedding(),
			'LogisticRegression':LogisticRegression(),
			'LogisticRegressionCV':LogisticRegressionCV(),
			'MDS':MDS(),
			'MLPClassifier':MLPClassifier(),
			'MLPRegressor':MLPRegressor(),
			'MaxAbsScaler':MaxAbsScaler(),
			'MeanShift':MeanShift(),
			'MinCovDet':MinCovDet(),
			'MinMaxScaler':MinMaxScaler(),
			'MiniBatchDictionaryLearning':MiniBatchDictionaryLearning(),
			'MiniBatchKMeans':MiniBatchKMeans(),
			'MiniBatchSparsePCA':MiniBatchSparsePCA(),
			'MultiTaskElasticNet':MultiTaskElasticNet(),
			'MultiTaskElasticNetCV':MultiTaskElasticNetCV(),
			'MultiTaskLasso':MultiTaskLasso(),
			'MultiTaskLassoCV':MultiTaskLassoCV(),
			'MultinomialNB':MultinomialNB(),
			'NMF':NMF(),
			'NearestCentroid':NearestCentroid(),
			'NearestNeighbors':NearestNeighbors(),
			'Normalizer':Normalizer(),
예제 #13
0
 def __init__(self, copy=True):
     self._hyperparams = {'copy': copy}
     self._wrapped_model = SKLModel(**self._hyperparams)