def experiment2(datafilepath, columns_info_file_path): dataset = DataSet(*load_dataset(datafilepath, columns_info_file_path)) dataset_size = dataset.size split_sizes = {} split_sizes['testing_dataset_size'] = 75 split_sizes['validation_dataset_size'] = 30 remaining_data_size = dataset_size - split_sizes[ 'testing_dataset_size'] - split_sizes['validation_dataset_size'] split_sizes['training_dataset_size'] = remaining_data_size complete_training_dataset, testing_dataset, validation_dataset = datasetfilters.split_dataset_by_sizes( dataset, split_sizes) initial_ratio = 0.3 current_ratio = initial_ratio iterations_results = [] iteration = 1 while (current_ratio < 1): print("Iteration no:", iteration) split_sizes['training_dataset_size'] = int(current_ratio * remaining_data_size) #training_dataset, testing_dataset, validation_dataset = datasetfilters.split_dataset_by_sizes(dataset, split_sizes) training_dataset = complete_training_dataset.subset( [i for i in range(0, split_sizes['training_dataset_size'])]) results = experiment_iteration(training_dataset, testing_dataset, validation_dataset) results['iteration_results']['training_dataset_size'] = split_sizes[ 'training_dataset_size'] iterations_results.append(results) iteration = iteration + 1 current_ratio = current_ratio + 0.05 print_iterations_results_table2(iterations_results)
def experiment1(datafilepath, columns_info_file_path, no_of_iterations): dataset = DataSet(*load_dataset(datafilepath, columns_info_file_path)) ratios = [0.6, 0.35, 0.05] iterations_results = [] for iteration in range(1, no_of_iterations + 1): print("Iteration no:", iteration) training_dataset, testing_dataset, validation_dataset = datasetfilters.split_dataset( dataset, ratios) results = experiment_iteration(training_dataset, testing_dataset, validation_dataset) results['iteration_index'] = iteration iterations_results.append(results) plot_decison_tree_iterations_results(iterations_results) print_iterations_results_table(iterations_results)
) #sys.exit(0) elif 2 not in indexes: print( 'data info file (.info) path is not present in arguments list... ending program... bye.. bye... ' ) #sys.exit(0) else: datafilepath = sys.argv[1] columns_info_file_path = sys.argv[2] print('Input data set is ', datasetname) mode = 'run' if (True): dataset = DataSet(*load_dataset(datafilepath, columns_info_file_path)) dataset_size = dataset.size testing_size = 50 training_size = dataset_size - testing_size ratios = [training_size / dataset_size, testing_size / dataset_size, 0] print('{:-<200}'.format('-')) print('Dataset loaded successfully, datafile:', datafilepath, ' datainfofilepath:', columns_info_file_path) training_stop_criteria = {'type': 'weights_change', 'value': 0.00005} training_stop_criteria['max_iterations'] = 1000 dataset_description = {'datasetname': datasetname} m = 10 if (mode == 'run'): #nayes_bayes_experiment(dataset_description, dataset, ratios, m) multi_classifier_roc_experiment(dataset_description, dataset,