コード例 #1
0
    def generate(self, seed=42):
        if type(self.reader) == type(None):
            s = None
            if isinstance(self.classifier(), ClassifierMixin):
                s = Splitter(train_fraction=[0.6, 10000000], valid_fraction=0.0, test_fraction=0.4, seed=seed)
            elif isinstance(self.classifier(), RegressorMixin):
                s = RandomSplitter(train_fraction=[0.6, 10000000], valid_fraction=0.0, test_fraction=0.4, seed=seed)
            else:
                pass

            self.dataset = Reader(self.dataset_config[0], self.dataset_config[1], s)
        else:
            self.dataset = self.reader
        self.raw_features = self.dataset.read()

        print("training:" + str(len(self.dataset.splitted_target['train'])))
        print("test:" + str(len(self.dataset.splitted_target['test'])))

        if Config.get_default('instance.selection', 'False') == 'True':
            self.train_X_all = copy.deepcopy(self.dataset.splitted_values['train'])
            self.train_y_all = copy.deepcopy(self.dataset.splitted_target['train'])

            self.dataset.splitted_values['train'], self.dataset.splitted_target['train'] = sample_data_by_cnn(self.dataset.splitted_values['train'], self.dataset.splitted_target['train'])
            print("training:" + str(len(self.dataset.splitted_target['train'])))
        else:
            self.train_X_all = self.dataset.splitted_values['train']
            self.train_y_all = self.dataset.splitted_target['train']
コード例 #2
0
    def generate(self):

        s = Splitter(train_fraction=[0.6, 10000000], seed=42)
        #s = Splitter(train_fraction=[0.1, 10000000], seed=42)

        self.dataset = Reader(self.dataset_config[0], self.dataset_config[1], s)
        raw_features = self.dataset.read()
コード例 #3
0
ファイル: Feature_Runtime.py プロジェクト: BigDaMa/DFS
    def generate(self):

        s = Splitter(train_fraction=[0.6, 10000000], seed=42)
        #s = Splitter(train_fraction=[0.1, 10000000], seed=42)

        self.dataset = Reader(self.dataset_config[0], self.dataset_config[1],
                              s)
        raw_features = self.dataset.read()

        g = Generator(raw_features)
        self.candidates = g.generate_all_candidates()
        print("Number candidates: " + str(len(self.candidates)))
コード例 #4
0
all_data = pickle.load(open(file, "rb"))

feature_predictions = pickle.load(
    open('/home/felix/phd/feature_predictions/all_data_predictions.p', "rb"))

name2result_predictions = {}
for result in feature_predictions:
    name2result_predictions[str(result['candidate'])] = result

dataset_config = (Config.get('statlog_heart.csv'),
                  int(Config.get('statlog_heart.target')))

s = Splitter(train_fraction=[0.6, 10000000], seed=42)

dataset = Reader(dataset_config[0], dataset_config[1], s)
raw_features = dataset.read()
X = dataset.splitted_values['train']

#delta mean -> avg, min, max gain


def calculate_MSE(candidate: CandidateFeature, X):
    ys = []
    for p in candidate.parents:
        p.fit(X)
        y = p.transform(X)

        ys.append(y)

    #correlation
コード例 #5
0
        print(len(all_representations))

        return all_representations


if __name__ == '__main__':
    from fastsklearnfeature.splitting.Splitter import Splitter
    import time

    s = Splitter(train_fraction=[0.6, 10000000])

    dataset = (Config.get('statlog_heart.csv'),
               int(Config.get('statlog_heart.target')))
    #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_27_colic_horse.csv", 22)
    #dataset = ("/home/felix/datasets/ExploreKit/csv/phpAmSP4g_cancer.csv", 30)
    #dataset = ("/home/felix/datasets/ExploreKit/csv/phpOJxGL9_indianliver.csv", 10)
    #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_29_credit-a_credit.csv", 15)
    #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_37_diabetes_diabetes.csv", 8)
    #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_31_credit-g_german_credit.csv", 20)
    #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_23_cmc_contraceptive.csv", 9)
    #dataset = ("/home/felix/datasets/ExploreKit/csv/phpn1jVwe_mammography.csv", 6)

    r = Reader(dataset[0], dataset[1], s)
    raw_features = r.read()

    g = TreeGenerator(raw_features)

    start_time = time.time()

    g.generate_candidates()