def load_data(self): classes_count = len(le_.classes_) # load test data # x_test = TestDataTask(self.task_core).run() # train x_train, y_train = TrainingDataTask(self.task_core).run() # split train_idxs, test_idxs = list(StratifiedShuffleSplit(y_train, 1, test_size=self.task_core.cv_ratio, random_state=self.task_core.n_seed))[0] x_test = x_train.filter_rows_by_idxs(test_idxs) y_test = y_train[test_idxs] x_train = x_train.filter_rows_by_idxs(train_idxs) y_train = y_train[train_idxs] # 2014 only for test x_test, y_test, _, _ = divide_by_has_sessions(x_test, y_test) print('running prediction model') probabilities = run_model(x_train, y_train, x_test, classes_count, self.classifier, self.task_core.n_threads, self.task_core.n_seed, self.task_core.cache_dir) print_probabilities(probabilities) s = score(probabilities, y_test) return {'Score': s}
def load_data(self): classes_count = len(le_.classes_) # load test data x_test = TestDataTask(self.task_core).run() # train x_train, y_train = TrainingDataTask(self.task_core).run() # perm_idxs = list(np.random.permutation(y_train.shape[0])) # x_train = x_train.filter_rows(perm_idxs) # y_train = y_train[perm_idxs] x_test, x_train = sync_columns_2(x_test, x_train) probabilities = run_model(x_train, y_train, x_test, classes_count, self.classifier, self.task_core.n_threads, self.task_core.n_seed, self.task_core.cache_dir) print_probabilities(probabilities) save_submission(x_test.ids_, probabilities, self.task_core.submission_file)