def experiment2(datafilepath, columns_info_file_path):
    dataset = DataSet(*load_dataset(datafilepath, columns_info_file_path))
    dataset_size = dataset.size
    split_sizes = {}
    split_sizes['testing_dataset_size'] = 75
    split_sizes['validation_dataset_size'] = 30
    remaining_data_size = dataset_size - split_sizes[
        'testing_dataset_size'] - split_sizes['validation_dataset_size']
    split_sizes['training_dataset_size'] = remaining_data_size
    complete_training_dataset, testing_dataset, validation_dataset = datasetfilters.split_dataset_by_sizes(
        dataset, split_sizes)
    initial_ratio = 0.3
    current_ratio = initial_ratio
    iterations_results = []
    iteration = 1
    while (current_ratio < 1):
        print("Iteration no:", iteration)
        split_sizes['training_dataset_size'] = int(current_ratio *
                                                   remaining_data_size)
        #training_dataset, testing_dataset, validation_dataset = datasetfilters.split_dataset_by_sizes(dataset, split_sizes)
        training_dataset = complete_training_dataset.subset(
            [i for i in range(0, split_sizes['training_dataset_size'])])
        results = experiment_iteration(training_dataset, testing_dataset,
                                       validation_dataset)
        results['iteration_results']['training_dataset_size'] = split_sizes[
            'training_dataset_size']
        iterations_results.append(results)
        iteration = iteration + 1
        current_ratio = current_ratio + 0.05
    print_iterations_results_table2(iterations_results)
def experiment1(datafilepath, columns_info_file_path, no_of_iterations):
    dataset = DataSet(*load_dataset(datafilepath, columns_info_file_path))
    ratios = [0.6, 0.35, 0.05]
    iterations_results = []
    for iteration in range(1, no_of_iterations + 1):
        print("Iteration no:", iteration)
        training_dataset, testing_dataset, validation_dataset = datasetfilters.split_dataset(
            dataset, ratios)
        results = experiment_iteration(training_dataset, testing_dataset,
                                       validation_dataset)
        results['iteration_index'] = iteration
        iterations_results.append(results)
    plot_decison_tree_iterations_results(iterations_results)
    print_iterations_results_table(iterations_results)
Ejemplo n.º 3
0
        )
        #sys.exit(0)
    elif 2 not in indexes:
        print(
            'data info file (.info) path is not present in arguments list... ending program... bye.. bye... '
        )
        #sys.exit(0)
    else:
        datafilepath = sys.argv[1]
        columns_info_file_path = sys.argv[2]

    print('Input data set is ', datasetname)

    mode = 'run'
    if (True):
        dataset = DataSet(*load_dataset(datafilepath, columns_info_file_path))
        dataset_size = dataset.size
        testing_size = 50
        training_size = dataset_size - testing_size
        ratios = [training_size / dataset_size, testing_size / dataset_size, 0]

        print('{:-<200}'.format('-'))
        print('Dataset loaded successfully, datafile:', datafilepath,
              ' datainfofilepath:', columns_info_file_path)
        training_stop_criteria = {'type': 'weights_change', 'value': 0.00005}
        training_stop_criteria['max_iterations'] = 1000
        dataset_description = {'datasetname': datasetname}
        m = 10
        if (mode == 'run'):
            #nayes_bayes_experiment(dataset_description, dataset, ratios, m)
            multi_classifier_roc_experiment(dataset_description, dataset,