def main(): parser = argparse.ArgumentParser( description='Multilayer neural network parser') parser.add_argument('-d', '--dataset', help='The name (without extension) of the dataset', required=True) parser.add_argument('-n', '--network', help='The filename of the network configuration', required=False) args = parser.parse_args() try: with open(DATA_PATH + args.dataset + '.json', 'r') as filetypes: types = json.load(filetypes) except: print('Dataset types not found, automatic types will be used.') types = {} df = preprocess( pd.read_csv(DATA_PATH + args.dataset + '.tsv', sep='\t', dtype=types), types) network_file = open(args.network, 'r') cv = CrossValidator( NeuralNetwork(network_file=network_file, target_attribute='target', data_instance=df.iloc[0])) cv.cross_validate(df, 10, 1)
def setUpClass(self): self.DEBUG = False self.METRICS = False self.data_api_impl = DataApi('../../../data/') self.cross_validator_impl = CrossValidator() self.preprocessor_impl = Preprocessor()
def __init__(self): # logger instance - VERBOSE level is highest (most verbose) level for logging self.logger = Logger('DEMO') # configure log level here # datalayer instance - read csv data files and convert into raw data frames self.datalayer = DataApi('../../data/') # preprocessor instance - everything for prerocessing data frames self.preprocessor = Preprocessor() # cross_validator instance - setup cross validation partitions self.cross_validator = CrossValidator() # utils instance - random things self.utils = Utils()
def main(): parser = argparse.ArgumentParser(description='Random Forest parser') parser.add_argument('--opt', help='test-benchmark or test-dataset.', required=True) parser.add_argument('--dataset', help='The dataset filename.', default='', required=False) parser.add_argument('--target_attribute', help='Target attribute to be predicted.', default='', required=False) parser.add_argument('--n_trees', help='The number of trees. The default is 5.', default=5, type=int, required=False) parser.add_argument('--n_attributes', help='The number of attributes. The default is the squared root of otal attributes.', default=-1, type=int, required=False) parser.add_argument('--k_folds', help='The number of folds for cross validation. The default is 5', default=5, type=int, required=False) parser.add_argument('--r', help='The number of repetitions for repeated cross validation. The default is 1', default=1, type=int, required=False) args = parser.parse_args() if args.opt == 'test-benchmark': test_benchmark_categorical() test_benchmark_numerical() if args.opt == 'test-dataset': if args.dataset == '' or not os.path.isfile(DATA_PATH + args.dataset): print('Dataset not found.') return try: with open(DATA_PATH + args.dataset[:-3] + 'json', 'r') as filetypes: types = json.load(filetypes) except: print('Dataset types not found, automatic types will be used.') types = {} data = pd.read_csv( DATA_PATH + args.dataset, delimiter='\t' if args.dataset[-3:] == 'tsv' else ',', dtype=types ) if args.target_attribute not in data.columns: print("Target attribute doesn't exist on dataset.") return n_trees = args.n_trees n_random_attributes = args.n_attributes if n_random_attributes == -1: n_random_attributes = int((len(data.columns) - 1) ** 1/2) cv = CrossValidator( RandomForest(n_trees, args.target_attribute, n_random_attributes) ) cv.cross_validate(data, args.k_folds, args.r) print('\nGlobal accuracy: %.3f (%.3f)' % (cv.accuracy, cv.accuracy_std))
def __init__(self): self.DEBUG = False # get instances of all the classes needed to run an experiment self.data_api_impl = DataApi('../../data/') self.preprocessor_impl = Preprocessor() self.cross_validator_impl = CrossValidator() self.parameter_tuner_impl = ParameterTuner() # algorithm implementations self.knn_impl = KNN() self.enn_impl = EditedKNN() self.cnn_impl = CondensedKNN() self.kmeans_knn_impl = KMeansClustering() self.k_medoids_clustering_impl = KMedoidsClustering() self.results_processor_impl = Results() self.CLASSIFICATION = False self.REGRESSION = False
def test(): lvl = 1 wavelet = 'db4' # Haar' ts_file_name = 'ford_ts.csv' last_days = 1200 time_frame = 60 time_bias = 1 data_loader = DataLoader(ts_file_name, last_days, debug=True) raw_data = data_loader.as_matrix() ts_data = denoise(raw_data, lvl, wavelet) # plt.plot(raw_data[3]) # plt.show() # plt.plot(ts_data[3]) # plt.show() daily_features, _ = np.shape(ts_data) dataset = data_loader.prepare_dataset_sae(ts_data, time_frame, time_bias) runner = Runner(daily_features, lstm_layers=1, gamma=0.005, delay=4, sae_lr=0.01, beta=0, hidden_nodes_activation_rate=0.9, hidden_layers_sizes=[8], debug=True) cross_validator = CrossValidator() pred_target = cross_validator.run_validation(runner, dataset, sae_epoch=1, lstm_epoch=1) pred_target_dollars = [(data_loader.to_dolar(x), data_loader.to_dolar(y)) for x, y in pred_target] dollars_loss = sum([abs(x - y) for x, y in pred_target_dollars]) print("[RUNNER] Dollars lost={}".format(dollars_loss))
print(number_of_edits_previous) loopcounter += 1 print("Number of While Loops: ") return edited_train_set.reset_index(drop=True) # EXECUTE SCRIPT if __name__ == '__main__': print('running edited knn...') edited_knn = EditedKNN() data_api_impl = DataApi('../../data/') cross_validator_impl = CrossValidator() preprocessor_impl = Preprocessor() wine_data = data_api_impl.get_raw_data_frame('segmentation') prep_wine_data = preprocessor_impl.preprocess_raw_data_frame( wine_data, 'segmentation') wine_data_train_set = cross_validator_impl.get_training_set( prep_wine_data, test_set_number=3) print('wine_data_train_set.shape: ' + str(wine_data_train_set.shape)) wine_data_test_set = cross_validator_impl.get_test_set( prep_wine_data, test_set_number, indexes_list) edited_knn.enn(wine_data_train_set, wine_data_test_set, prep_wine_data, k)
class CrossValidatorTests(unittest.TestCase): # SETUP @classmethod def setUpClass(self): self.DEBUG = False self.METRICS = False self.data_api_impl = DataApi('../../../data/') self.cross_validator_impl = CrossValidator() self.preprocessor_impl = Preprocessor() @classmethod def tearDownClass(self): pass # TESTS ''' # test get indexes list for abalone data def test_get_indexes_list_abalone_data(self): abalone_data = self.data_api_impl.get_raw_data_frame('abalone') self.assertTrue(abalone_data is not None) abalone_indexes = self.cross_validator_impl.get_indexes_list(abalone_data) self.assertTrue(len(abalone_indexes) == 4177) # 4177 rows in abalone data frame for i in range(1, 10): self.assertTrue(abalone_indexes.count(i) == 417) # each subset has 417 rows self.assertTrue(abalone_indexes.count(10) == 424) # last subset has 417 + remaining... # test get indexes list for car data def test_get_indexes_list_car_data(self): car_data = self.data_api_impl.get_raw_data_frame('car') self.assertTrue(car_data is not None) car_indexes = self.cross_validator_impl.get_indexes_list(car_data) self.assertTrue(len(car_indexes) == 1728) # 1728 rows in car data frame for i in range(1, 10): self.assertTrue(car_indexes.count(i) == 172) # each subset has 172 rows self.assertTrue(car_indexes.count(10) == 180) # last subset has 172 + remaining... # test get indexes list for forest fires data def test_get_indexes_list_ff_data(self): ff_data = self.data_api_impl.get_raw_data_frame('forestfires') self.assertTrue(ff_data is not None) ff_indexes = self.cross_validator_impl.get_indexes_list(ff_data) self.assertTrue(len(ff_indexes) == 518) # 518 rows in forest fires data frame for i in range(1, 10): self.assertTrue(ff_indexes.count(i) == 51) # each subset has 51 rows self.assertTrue(ff_indexes.count(10) == 59) # last subset has 51 + remaining... # test get indexes list for machine data def test_get_indexes_list_machine_data(self): machine_data = self.data_api_impl.get_raw_data_frame('machine') self.assertTrue(machine_data is not None) machine_indexes = self.cross_validator_impl.get_indexes_list(machine_data) self.assertTrue(len(machine_indexes) == 209) # 209 rows in machine data frame for i in range(1, 10): self.assertTrue(machine_indexes.count(i) == 20) # each subset has 20 rows self.assertTrue(machine_indexes.count(10) == 29) # last subset has 20 + remaining... # test get indexes list for segmentation data def test_get_indexes_list_segmentation_data(self): segmentation_data = self.data_api_impl.get_raw_data_frame('segmentation') self.assertTrue(segmentation_data is not None) segmentation_indexes = self.cross_validator_impl.get_indexes_list(segmentation_data) self.assertTrue(len(segmentation_indexes) == 213) # 213 rows in segmentation data frame for i in range(1, 10): self.assertTrue(segmentation_indexes.count(i) == 21) # each subset has 21 rows self.assertTrue(segmentation_indexes.count(10) == 24) # last subset has 21 + remaining... # test get indexes list for wine data def test_get_indexes_list_wine_data(self): wine_data = self.data_api_impl.get_raw_data_frame('wine') self.assertTrue(wine_data is not None) wine_indexes = self.cross_validator_impl.get_indexes_list(wine_data) self.assertTrue(len(wine_indexes) == 6497) # 6497 rows in wine data frame for i in range(1, 10): self.assertTrue(wine_indexes.count(i) == 649) # each subset has 649 rows self.assertTrue(wine_indexes.count(10) == 656) # last subset has 649 + remaining... # TRAINING SET # test get training set 2 with wine data def test_get_training_set(self): wine_data = self.data_api_impl.get_raw_data_frame('wine') wine_data_training_set = self.cross_validator_impl.get_training_set(wine_data, 2) self.assertTrue(wine_data_training_set.shape[0] == 5848) # 6497 - 649 rows in test set 2 means 5484 rows in training set self.assertTrue(wine_data_training_set.shape[1] == 12) # number of columns does not change # TEST SET # test get test set (-2) with wine data def test_get_test_set(self): wine_data = self.data_api_impl.get_raw_data_frame('wine') wine_data_test_set = self.cross_validator_impl.get_test_set(wine_data, 2) self.assertTrue(wine_data_test_set.shape[0] == 649) # 649 rows in test set 2 self.assertTrue(wine_data_test_set.shape[1] == 12) # number of columns does not change ''' def test_cv_partitions(self): abalone_data = self.data_api_impl.get_raw_data_frame('abalone') prep_abalone_data = self.preprocessor_impl.preprocess_raw_data_frame(abalone_data, 'abalone') cv_partitions = self.cross_validator_impl.get_cv_partitions(prep_abalone_data) self.assertTrue(cv_partitions is not None) for partition in cv_partitions: train_data_indexes = list(cv_partitions[partition][0].index.values) test_data_indexes = list(cv_partitions[partition][1].index.values) for test_index in test_data_indexes: self.assertTrue(test_index not in train_data_indexes)
class ExperimentRunner: ''' CONSTRUCTOR ''' def __init__(self): # logger instance - VERBOSE level is highest (most verbose) level for logging self.logger = Logger('DEMO') # configure log level here # datalayer instance - read csv data files and convert into raw data frames self.datalayer = DataApi('../../data/') # preprocessor instance - everything for prerocessing data frames self.preprocessor = Preprocessor() # cross_validator instance - setup cross validation partitions self.cross_validator = CrossValidator() # utils instance - random things self.utils = Utils() # get average result given cross validation results dictionary def get_avg_result(self, cv_results): result_vals = [] # for each cross validation partition, append result value to corresponding list for test_data_key in cv_results: test_result = cv_results[test_data_key] result_vals.append(test_result) # should always equal the value of the 'folds' variable in cross validator test_data_count = len(cv_results) # calculate average values avg_result = sum(result_vals) / test_data_count # return average result return avg_result ''' get preprocessed data ready for consumption by experiment running logic INPUT: - data_set_name: name of data set to fetch data for OUTPUT: - preprocessed data frame - fully ready for experiment consumption ''' def get_experiment_data(self, data_set_name): data = self.datalayer.get_raw_data_frame(data_set_name) self.logger.log('DEMO', 'data_set_name: \t%s\n' % str(data_set_name)) self.logger.log( 'DEMO', 'raw data: \n\n%s, shape: %s\n' % (str(data), str(data.shape))) self.logger.log('DEMO', '----------------------------------------------------' \ + '-----------------------------------------------\n') data = self.preprocessor.preprocess_raw_data_frame(data, data_set_name) self.logger.log( 'DEMO', 'preprocessed data: \n\n%s, shape: %s\n' % (str(data), str(data.shape))) self.logger.log('DEMO', '----------------------------------------------------' \ + '-----------------------------------------------\n') return data ''' run experiment INPUT: - data_set_name: name of data set to run experiment on - neural_network: instance of neural network to train/test with data - hyperparams: hyperparameters and corresponding values to use in experiment OUTPUT: - <void> - logs all the important stuff at DEMO level ''' def run_experiment(self, data_set_name, neural_network, hyperparams): # LAYER ACTIVATION FUNCTION SPECIFICATION self.logger.log( 'DEMO', 'layer_activation_funcs: %s\n' % str(hyperparams["layer_activation_funcs"])) # DATA RETRIEVAL AND PREPROCESSING data = self.get_experiment_data(data_set_name) self.logger.log('DEMO', 'data_set_name: %s\n' % str(data_set_name)) # CROSS VALIDATION PARTITIONING # get cross validation partitions for data cv_partitions = self.cross_validator.get_cv_partitions(data) # dictionary for storing accuracy results cv_results = {} # list of sizes of test sets used for getting average test set size test_data_sizes = [] # NEURAL NETWORK TRAINING AND TESTING for partition in cv_partitions: # initialize key and corresponding nested dictionary in results dictionary test_data_key = 'test_data_' + str(partition) cv_results[test_data_key] = {} # get training set and test set for given cross validation partition train_data, test_data = cv_partitions[partition] test_data_sizes.append( test_data.shape[0] ) # add number of rows in test set to test_set_sizes list # HANDLE RBF NETWORK P2 RESULTS if neural_network.network_name == 'RBF': # configure RBF network shape based on training data neural_network.configure_rbf_network(train_data, data, data_set_name, hyperparams["k"]) # GRADIENT DESCENT # run gradient descent for given neural network instance test_result_vals = neural_network.train_gradient_descent( train_data, hyperparams, partition, test_data) self.logger.log('DEMO', ('accuracy_vals' if neural_network.CLASSIFICATION else 'error_vals') \ + ' for partition %s: %s\n' % (str(partition+1), str(test_result_vals)), True) # append accuracy/error result of final gradient descent iteration to results dictionary cv_results[test_data_key] = test_result_vals[-1] # FINAL RESULTS (THE MODEL) self.logger.log('DEMO', '------------------------------------------------------------' \ + ' TRAINING DONE ------------------------------------------------------------') self.logger.log('DEMO', 'trained network: weights --> \n\n%s, shapes: %s\n' \ % (str(neural_network.weights), str(self.utils.get_shapes(neural_network.weights))), True) self.logger.log('DEMO', 'trained network: biases --> \n\n%s, shapes: %s\n' \ % (str(neural_network.biases), str(self.utils.get_shapes(neural_network.biases))), True) self.logger.log('DEMO', 'data_set_name: %s\n' % str(data_set_name), True) self.logger.log('DEMO', 'trained network: AVERAGE ' \ + ('ACCURACY' if neural_network.CLASSIFICATION else 'ERROR') + ' --> %s\n' \ % str(self.get_avg_result(cv_results)), True)
print('K MEANS CLUSTERING CONVERGED. iterations: ' + str(iteration_count)) return centroids_data # EXECUTE SCRIPT if __name__ == '__main__': print('k means clustering...') k_means_clustering_impl = KMeansClustering() data_api_impl = DataApi('../../data/') preprocessor_impl = Preprocessor() cross_validator_impl = CrossValidator() ''' wine_data = data_api_impl.get_raw_data_frame('wine') prep_wine_data = preprocessor_impl.preprocess_raw_data_frame(wine_data, 'wine') ''' abalone_data = data_api_impl.get_raw_data_frame('abalone') prep_abalone_data = preprocessor_impl.preprocess_raw_data_frame(abalone_data, 'abalone') print('\npossible classes: ' + str(list(set(abalone_data.loc[:, 'CLASS'].values))) + '\n') training_set, test_set = cross_validator_impl.get_cv_partitions(prep_abalone_data)[0] # get training set (full data frame - rows in test_set_index bucket) #training_set = cross_validator_impl.get_training_set(prep_abalone_data, test_set_number=3)
def get_data_set(self): return self.data_set # set algorithm name for context def set_algorithm_name(self, algorithm_name): self.algorithm_name = algorithm_name # EXECUTE SCRIPT if __name__ == '__main__': print('\nk nearest neighbor...\n') data_api_impl = DataApi('../../data/') cross_validator_impl = CrossValidator() preprocessor_impl = Preprocessor() knn_impl = KNN() segmentation_data = data_api_impl.get_raw_data_frame('segmentation') segmentation_data_preproc = preprocessor_impl.preprocess_raw_data_frame( segmentation_data, "segmentation") distance_matrix = knn_impl.get_distance_matrix(segmentation_data_preproc) print("Segmentation Data Preprocessed: ") print(segmentation_data_preproc) print( "--------------------------------------------------------------------------------------" ) knn = knn_impl.knn(10, segmentation_data_preproc, distance_matrix, 5)
class ExperimentRunner(): def __init__(self): self.DEBUG = False # get instances of all the classes needed to run an experiment self.data_api_impl = DataApi('../../data/') self.preprocessor_impl = Preprocessor() self.cross_validator_impl = CrossValidator() self.parameter_tuner_impl = ParameterTuner() # algorithm implementations self.knn_impl = KNN() self.enn_impl = EditedKNN() self.cnn_impl = CondensedKNN() self.kmeans_knn_impl = KMeansClustering() self.k_medoids_clustering_impl = KMedoidsClustering() self.results_processor_impl = Results() self.CLASSIFICATION = False self.REGRESSION = False # run algorithm on data set with various parameters def run_experiment(self, data_frame_name, algorithm): self.set_experiment_type(data_frame_name) # get raw data frame to run experiment against raw_data_frame = self.data_api_impl.get_raw_data_frame(data_frame_name) print(raw_data_frame) # preprocess data preprocessed_data_frame = self.preprocessor_impl.preprocess_raw_data_frame(raw_data_frame, data_frame_name) print(preprocessed_data_frame) # get indexes list for data frame cross validation - a list of row numbers used to partition the data data_frame_indexes_list = self.cross_validator_impl.get_indexes_list(preprocessed_data_frame) if self.DEBUG: print('\ndata_frame_name --> ' + data_frame_name) print('\nraw_data_frame:\n') print(raw_data_frame) print('\npreprocessed_data_frame:\n') print(preprocessed_data_frame) print('\ndata_frame_indexes_list for cross validation:\n') print(data_frame_indexes_list) # nested dictionary to hold algorithm performance results for each combination of training/test sets # key pattern --> key = test_set_1 , where the number at the end of the key is the test set index # each value is another dictionary with keys = { 'zero_one_loss', 'mean_squared_error' } # the nested dictionary values are the corresponding loss function metrics for predictions using the test set cross_validation_results = {} # list of sizes of test sets used for getting average test set size test_set_sizes = [] algorithm_parameters = self.parameter_tuner_impl.get_params(data_frame_name, algorithm) # dictionary where key is parameter and value is tuple of average loss function results results_by_parameter = {} # get all cross validation partitions for given data frame cv_partitions = self.cross_validator_impl.get_cv_partitions(preprocessed_data_frame) # for each parameter value in the list of algorithm parameter values (see ParameterTuner) for parameter in algorithm_parameters: if self.DEBUG: print('\n' + str(self.parameter_tuner_impl.get_parameter_key(algorithm)) + ': ' + str(parameter) + '\n') # for each test set used in cross validation (number of folds) for partition in cv_partitions: # initialize key and corresponding nested dictionary in results dictionary test_set_key = 'test_set_' + str(partition) cross_validation_results[test_set_key] = {} # get training set and test set for given cross validation partition training_set, test_set = cv_partitions[partition] test_set_sizes.append(test_set.shape[0]) # add number of rows in test set to test_set_sizes list if self.DEBUG: print('preprocessed dataframe before running algorithm:') print(preprocessed_data_frame) # run algorithms on training set / test set combination # returns dictionary where key is the row index (as string) and value is the predicted class for that row prediction_results = self.run_algorithm(data_frame_name, algorithm, training_set, test_set, \ preprocessed_data_frame, parameter) # calculate loss function results given prediction results - measure prediction accuracy accuracy, mean_squared_error = self.results_processor_impl.loss_function_analysis(test_set, prediction_results) cross_validation_results[test_set_key]['accuracy'] = accuracy cross_validation_results[test_set_key]['mean_squared_error'] = mean_squared_error # calculate average loss function results over all cross validation folds avg_accuracy, avg_mean_squared_error = self.results_processor_impl.get_avg_loss_vals(cross_validation_results) avg_test_set_size = sum(test_set_sizes) / len(test_set_sizes) # get average test set size for reference results_by_parameter[str(parameter)] = (avg_accuracy, avg_mean_squared_error) print('\n\nRESULTS: average test set size: ' + str(avg_test_set_size) + \ ((' --> accuracy: ' + str(avg_accuracy)) if self.CLASSIFICATION \ else (' --> mean_squared_error: ' + str(avg_mean_squared_error)))) print('\n---------------------------------------------------------------------------------------------------------------------') # return dictionary of results by parameter return results_by_parameter def set_experiment_type(self, data_frame_name): if data_frame_name in ['abalone', 'car', 'segmentation']: self.CLASSIFICATION = True self.REGRESSION = False elif data_frame_name in ['machine', 'forestfires', 'wine']: self.REGRESSION = True self.CLASSIFICATION = False else: raise Exception('ERROR: unknown data_set_name --> ' + str(data_frame_name)) ''' run algorithm execution handler given algorithm name INPUT: - algorithm_name: name of algorithm to run handler for OUTPUT: - prediction results dictionary, maps instance index to tuple: (prediction, actual) ''' def run_algorithm(self, data_set_name, algorithm_name, training_set, \ test_set, preprocessed_data_frame, parameter): if algorithm == 'knn': self.knn_impl.set_data_set(data_set_name) self.knn_impl.set_algorithm_name(algorithm_name) return self.knn_impl.do_knn(training_set, test_set, preprocessed_data_frame, parameter) elif algorithm == 'enn': self.enn_impl.set_data_set(data_set_name) self.enn_impl.set_algorithm_name(algorithm_name) return self.enn_impl.do_enn(training_set, test_set, preprocessed_data_frame, parameter) elif algorithm == 'cnn': self.cnn_impl.set_data_set(data_set_name) self.cnn_impl.set_algorithm_name(algorithm_name) return self.cnn_impl.do_cnn(training_set, test_set, preprocessed_data_frame, parameter) elif algorithm == 'kmeans_knn': self.kmeans_knn_impl.set_data_set(data_set_name) self.kmeans_knn_impl.set_algorithm_name(algorithm_name) return self.kmeans_knn_impl.cluster_do_knn(training_set, test_set, preprocessed_data_frame, data_set_name, parameter) elif algorithm == 'kmedoids_knn': self.k_medoids_clustering_impl.set_data_set(data_set_name) self.k_medoids_clustering_impl.set_algorithm_name(algorithm_name) return self.k_medoids_clustering_impl.cluster(training_set, test_set, preprocessed_data_frame, data_set_name, parameter)
# create_plot(tree) # Prune the training set. pruned_tree = TreePruner(tree).prune() create_plot(pruned_tree) print('Tree depth: ', get_tree_depth(tree)) # Classify other results c = Classifier(pruned_tree, short_labels) print('\nClassify the training set: ') dsc = DataSetClassifier(c, enricher) results = dsc.classify_data_set(original_data_set) print('Invalid classified entries:', dsc.invalid_entries, '\nTotal entries:', len(results), '\nError:', str(round(dsc.error_rate, 2)) + '%') print('\nClassify the test set: ') testing_data_set = DataSetLoader('dataset/test.data').load() results = dsc.classify_data_set(testing_data_set) print('Invalid classified entries:', dsc.invalid_entries, '\nTotal entries:', len(results), '\nError:', str(round(dsc.error_rate, 2)) + '%\n') print('Limiting depth:') CrossValidator([ 'dataset/cvs_splits/training00.data', 'dataset/cvs_splits/training01.data', 'dataset/cvs_splits/training02.data', 'dataset/cvs_splits/training03.data' ]).run()
train_data_player = DataFramePlayer.load_csv(train_data_path) label_data_player = DataFramePlayer.load_csv(label_data_path) # プレイヤーを使った加工の処理 # playerにカセットをセットして、play()することで、加工が行われます。 # 加工結果はプレイヤー内部のデータフレームに保持されます。 label_data_player.add(CleanLabelCassette).play() # カセット単体でも使用することが出来ます train_data_mean = MeanCassette.extract(train_data_player.df) spilt = 5 # クロスバリデーションの設定 validator = CrossValidator(objective=__objective, spilt=spilt, train_data=train_data_player.df, label_data=label_data_player.df) feature_columns = train_data_player.df.columns sub_predicts = pd.DataFrame() # クロスバリデータをforで回すことで、計算objectveの結果だけをイテレーションごとに取り出すことが出来ます。 for folds, clf in validator: predicts = clf.predict_proba( train_data_player.df, num_iteration=clf.best_iteration_)[:, 1] / spilt fold_importance_df = lgbexe.analyze_lightgbm(clf, feature_columns) # プレイヤーを通じて内部のデータフレームをcsv形式で保存することが出来ます DataFramePlayer(sub_predicts).save_csv('result', '.', is_attend_date=True)