def load_test_data(self, sessions_df): data_df = read_from_csv(self.task_core.test_data_file, self.task_core.n_seed #, max_rows=50000 ) cache_file = os.path.join(self.task_core.cache_dir, 'features_test_' + str(len(data_df.index)) + '.p') if os.path.isfile(cache_file): print('Loading test features from file') x = DataSet.load_from_file(cache_file) else: x = ds_from_df(data_df, sessions_df, True) print('saving test features to file') DataSet.save_to_file(x, cache_file) return x
def load_train_data(self, sessions_df): data_df = read_from_csv(self.task_core.data_file, self.task_core.n_seed #, max_rows=50000 ) cache_file = os.path.join(self.task_core.cache_dir, 'features_train_' + str(len(data_df.index)) + '.p') if os.path.isfile(cache_file): print('Loading train features from file') x = DataSet.load_from_file(cache_file) else: x = ds_from_df(data_df, sessions_df, False) print('saving train features to file') DataSet.save_to_file(x, cache_file) labels = data_df['country_destination'].values y = le_.transform(labels) return x, y