def generate(self, seed=42): if type(self.reader) == type(None): s = None if isinstance(self.classifier(), ClassifierMixin): s = Splitter(train_fraction=[0.6, 10000000], valid_fraction=0.0, test_fraction=0.4, seed=seed) elif isinstance(self.classifier(), RegressorMixin): s = RandomSplitter(train_fraction=[0.6, 10000000], valid_fraction=0.0, test_fraction=0.4, seed=seed) else: pass self.dataset = Reader(self.dataset_config[0], self.dataset_config[1], s) else: self.dataset = self.reader self.raw_features = self.dataset.read() print("training:" + str(len(self.dataset.splitted_target['train']))) print("test:" + str(len(self.dataset.splitted_target['test']))) if Config.get_default('instance.selection', 'False') == 'True': self.train_X_all = copy.deepcopy(self.dataset.splitted_values['train']) self.train_y_all = copy.deepcopy(self.dataset.splitted_target['train']) self.dataset.splitted_values['train'], self.dataset.splitted_target['train'] = sample_data_by_cnn(self.dataset.splitted_values['train'], self.dataset.splitted_target['train']) print("training:" + str(len(self.dataset.splitted_target['train']))) else: self.train_X_all = self.dataset.splitted_values['train'] self.train_y_all = self.dataset.splitted_target['train']
def generate(self): s = Splitter(train_fraction=[0.6, 10000000], seed=42) #s = Splitter(train_fraction=[0.1, 10000000], seed=42) self.dataset = Reader(self.dataset_config[0], self.dataset_config[1], s) raw_features = self.dataset.read()
def generate(self): s = Splitter(train_fraction=[0.6, 10000000], seed=42) #s = Splitter(train_fraction=[0.1, 10000000], seed=42) self.dataset = Reader(self.dataset_config[0], self.dataset_config[1], s) raw_features = self.dataset.read() g = Generator(raw_features) self.candidates = g.generate_all_candidates() print("Number candidates: " + str(len(self.candidates)))
all_data = pickle.load(open(file, "rb")) feature_predictions = pickle.load( open('/home/felix/phd/feature_predictions/all_data_predictions.p', "rb")) name2result_predictions = {} for result in feature_predictions: name2result_predictions[str(result['candidate'])] = result dataset_config = (Config.get('statlog_heart.csv'), int(Config.get('statlog_heart.target'))) s = Splitter(train_fraction=[0.6, 10000000], seed=42) dataset = Reader(dataset_config[0], dataset_config[1], s) raw_features = dataset.read() X = dataset.splitted_values['train'] #delta mean -> avg, min, max gain def calculate_MSE(candidate: CandidateFeature, X): ys = [] for p in candidate.parents: p.fit(X) y = p.transform(X) ys.append(y) #correlation
print(len(all_representations)) return all_representations if __name__ == '__main__': from fastsklearnfeature.splitting.Splitter import Splitter import time s = Splitter(train_fraction=[0.6, 10000000]) dataset = (Config.get('statlog_heart.csv'), int(Config.get('statlog_heart.target'))) #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_27_colic_horse.csv", 22) #dataset = ("/home/felix/datasets/ExploreKit/csv/phpAmSP4g_cancer.csv", 30) #dataset = ("/home/felix/datasets/ExploreKit/csv/phpOJxGL9_indianliver.csv", 10) #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_29_credit-a_credit.csv", 15) #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_37_diabetes_diabetes.csv", 8) #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_31_credit-g_german_credit.csv", 20) #dataset = ("/home/felix/datasets/ExploreKit/csv/dataset_23_cmc_contraceptive.csv", 9) #dataset = ("/home/felix/datasets/ExploreKit/csv/phpn1jVwe_mammography.csv", 6) r = Reader(dataset[0], dataset[1], s) raw_features = r.read() g = TreeGenerator(raw_features) start_time = time.time() g.generate_candidates()