コード例 #1
0
ファイル: FeatureSelectionkBest.py プロジェクト: BigDaMa/DFS
    def run(self):

        # generate all candidates
        self.generate()
        #starting_feature_matrix = self.create_starting_features()
        self.generate_target()

        self.global_starting_time = time.time()

        for k in range(1, len(self.raw_features)+1):


            all_f = CandidateFeature(IdentityTransformation(len(self.raw_features)), self.raw_features)


            t = CandidateFeature(SelectKBestTransformer(len(self.raw_features),k), [all_f])

            t.pipeline.fit(self.dataset.splitted_values['train'], self.current_target)
            X = t.transform(self.dataset.splitted_values['train'])
            X_test = t.transform(self.dataset.splitted_values['test'])

            print("time: " + str(time.time() - self.global_starting_time))

            clf = GridSearchCV(self.classifier(), self.grid_search_parameters, cv=self.preprocessed_folds, scoring=self.score, iid=False,
                               error_score='raise')
            clf.fit(X, self.current_target)

            print('test score: ' + str(clf.score(X_test, self.test_target)))
            print("\n\n")
コード例 #2
0
ファイル: Sisso.py プロジェクト: BigDaMa/DFS
    def run(self):
        self.global_starting_time = time.time()

        # generate all candidates
        self.generate()
        #starting_feature_matrix = self.create_starting_features()
        self.generate_target()

        all_f = CandidateFeature(
            IdentityTransformation(len(self.raw_features)), self.raw_features)

        feature_names = [str(r) for r in self.raw_features]

        t = CandidateFeature(
            SissoTransformer(len(self.raw_features), feature_names,
                             ["^2", "^3", "1/"]), [all_f])

        t.pipeline.fit(self.dataset.splitted_values['train'],
                       self.train_y_all_target)
        X = t.transform(self.dataset.splitted_values['train'])
        X_test = t.transform(self.dataset.splitted_values['test'])

        print("time: " + str(time.time() - self.global_starting_time))

        clf = GridSearchCV(self.classifier(),
                           self.grid_search_parameters,
                           cv=self.preprocessed_folds,
                           scoring=self.score,
                           iid=False,
                           error_score='raise')
        clf.fit(X, self.train_y_all_target)

        print(X_test)

        print('test score: ' + str(clf.score(X_test, self.test_target)))
        print("\n\n")
コード例 #3
0
ファイル: test_group.py プロジェクト: BigDaMa/DFS
import numpy as np
from fastsklearnfeature.reader.Reader import Reader
from fastsklearnfeature.splitting.Splitter import Splitter
from fastsklearnfeature.configuration.Config import Config
from fastsklearnfeature.transformations.GroupByThenTransformation import GroupByThenTransformation
from fastsklearnfeature.candidates.CandidateFeature import CandidateFeature
from fastsklearnfeature.candidates.RawFeature import RawFeature

f0 = RawFeature('col0', 0, {})
f1 = RawFeature('col1', 1, {})

training = np.array([[6, 1], [5, 1], [4, 2], [3, 2]])

print(training[0, 1])

print(training.shape)

c = CandidateFeature(GroupByThenTransformation(np.sum, 2), [f0, f1])

c.fit(training)

print(c.transform(training))
'''
raw_features[1].fit(training)
print(raw_features[1].transform(training))

raw_features[0].fit(training)
print(raw_features[0].transform(training))
'''