예제 #1
0
def main(P, mate, mutate):
    """Run this experiment"""
    training_ints = initialize_instances('m_trg.csv')
    testing_ints = initialize_instances('m_test.csv')
    validation_ints = initialize_instances('m_val.csv')
    factory = BackPropagationNetworkFactory()
    measure = SumOfSquaresError()
    data_set = DataSet(training_ints)
    relu = RELU()
    rule = RPROPUpdateRule()
    oa_name = "GA_{}_{}_{}".format(P, mate, mutate)
    with open(OUTFILE.replace('XXX', oa_name), 'w') as f:
        f.write('{},{},{},{},{},{},{},{}\n'.format('iteration', 'MSE_trg',
                                                   'MSE_val', 'MSE_tst',
                                                   'acc_trg', 'acc_val',
                                                   'acc_tst', 'elapsed'))
    classification_network = factory.createClassificationNetwork([
        INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, HIDDEN_LAYER3, OUTPUT_LAYER
    ], relu)
    nnop = NeuralNetworkOptimizationProblem(data_set, classification_network,
                                            measure)
    oa = StandardGeneticAlgorithm(P, mate, mutate, nnop)
    train(oa, classification_network, oa_name, training_ints, validation_ints,
          testing_ints, measure)
예제 #2
0
def main(CE):
    """Run this experiment"""
    training_ints = initialize_instances(TRAIN_DATA_FILE)
    testing_ints = initialize_instances(TEST_DATA_FILE)
    validation_ints = initialize_instances(VALIDATE_DATA_FILE)
    factory = BackPropagationNetworkFactory()
    measure = SumOfSquaresError()
    data_set = DataSet(training_ints)
    relu = RELU()
    # 50 and 0.000001 are the defaults from RPROPUpdateRule.java
    rule = RPROPUpdateRule(0.064, 50, 0.000001)
    oa_name = "SA_{}".format(CE)
    with open(OUTFILE.format(oa_name), 'w') as f:
        f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format(
            'iteration', 'MSE_trg', 'MSE_val', 'MSE_tst', 'acc_trg', 'acc_val',
            'acc_tst', 'f1_trg', 'f1_val', 'f1_tst', 'elapsed'))
    classification_network = factory.createClassificationNetwork([
        INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, HIDDEN_LAYER3, OUTPUT_LAYER
    ], relu)
    nnop = NeuralNetworkOptimizationProblem(data_set, classification_network,
                                            measure)
    oa = SimulatedAnnealing(1E10, CE, nnop)
    train(oa, classification_network, oa_name, training_ints, validation_ints,
          testing_ints, measure, TRAINING_ITERATIONS, OUTFILE.format(oa_name))
예제 #3
0
def main(ds_name, P, mate, mutate):
    """Run this experiment"""
    nn_config, train_file, val_file, test_file = get_problemset(ds_name)
    training_ints = initialize_instances(train_file)
    testing_ints = initialize_instances(test_file)
    validation_ints = initialize_instances(val_file)
    factory = BackPropagationNetworkFactory()
    measure = SumOfSquaresError()
    data_set = DataSet(training_ints)
    relu = RELU()
    # 50 and 0.000001 are the defaults from RPROPUpdateRule.java
    rule = RPROPUpdateRule(0.064, 50, 0.000001)
    oa_name = "GA_{}_{}_{}_{}".format(ds_name, P, mate, mutate)
    with open(OUTFILE.format(oa_name), 'w') as f:
        f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format(
            'iteration', 'MSE_trg', 'MSE_val', 'MSE_tst', 'acc_trg', 'acc_val',
            'acc_tst', 'f1_trg', 'f1_val', 'f1_tst', 'elapsed'))
    classification_network = factory.createClassificationNetwork(
        nn_config, relu)
    nnop = NeuralNetworkOptimizationProblem(data_set, classification_network,
                                            measure)
    oa = StandardGeneticAlgorithm(P, mate, mutate, nnop)
    train(oa, classification_network, oa_name, training_ints, validation_ints,
          testing_ints, measure, TRAINING_ITERATIONS, OUTFILE.format(oa_name))
예제 #4
0
def run_all():
    dataSource = 'wine'
    INPUT_LAYER = 13
    HIDDEN_LAYER = 100
    OUTPUT_LAYER = 1

    # dataSource = 'wage'
    # INPUT_LAYER = 106
    # HIDDEN_LAYER = 1000
    # OUTPUT_LAYER = 1

    train_data = initialize_instances('data/balanced_' + dataSource +
                                      '_cleaned_train.csv')
    test_data = initialize_instances('data/balanced_' + dataSource +
                                     '_cleaned_test.csv')
    factory = BackPropagationNetworkFactory()
    measure = SumOfSquaresError()
    data_set = DataSet(train_data)
    update_rule = RPROPUpdateRule()

    alg = 'backprop'
    classification_network = factory.createClassificationNetwork(
        [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU())
    oa = BatchBackPropagationTrainer(data_set, classification_network, measure,
                                     update_rule)
    fit = oa
    run(alg, oa, fit, classification_network, measure, train_data, test_data,
        dataSource)

    alg = 'RHC'
    classification_network = factory.createClassificationNetwork(
        [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU())
    nnop = NeuralNetworkOptimizationProblem(data_set, classification_network,
                                            measure)
    oa = RandomizedHillClimbing(nnop)
    iters = 1
    fit = FixedIterationTrainer(oa, iters)
    run(alg, oa, fit, classification_network, measure, train_data, test_data,
        dataSource)

    alg = 'SA'
    classification_network = factory.createClassificationNetwork(
        [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU())
    nnop = NeuralNetworkOptimizationProblem(data_set, classification_network,
                                            measure)
    startTemp = 1E10
    coolingFactor = .8
    oa = SimulatedAnnealing(startTemp, coolingFactor, nnop)
    iters = 1
    fit = FixedIterationTrainer(oa, iters)
    run(alg, oa, fit, classification_network, measure, train_data, test_data,
        dataSource)

    alg = 'GA'
    classification_network = factory.createClassificationNetwork(
        [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU())
    nnop = NeuralNetworkOptimizationProblem(data_set, classification_network,
                                            measure)
    population = 200
    mates = 50
    mutations = 10
    oa = StandardGeneticAlgorithm(population, mates, mutations, nnop)
    iters = 1
    fit = FixedIterationTrainer(oa, iters)
    run(alg, oa, fit, classification_network, measure, train_data, test_data,
        dataSource)
예제 #5
0
def main():
    train_instances = initialize_instances('wine_train.csv')
    validate_instances = initialize_instances('wine_validate.csv')
    test_instances = initialize_instances('wine_test.csv')
    factory = BackPropagationNetworkFactory()
    measure = SumOfSquaresError()
    data_set = DataSet(train_instances)
    iteration_list = [10, 100, 500, 1000, 2500]
    cooling_list = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]

    networks = []  # BackPropagationNetwork
    nnop = []  # NeuralNetworkOptimizationProblem
    oa = []  # OptimizationAlgorithm
    oa_names = ["SA"]
    results = ""
    error = 0
    low_quality_correct = 0
    low_quality_incorrect = 0
    high_quality_correct = 0
    high_quality_incorrect = 0
    predicted_array = []
    actual_array = []

    for name in oa_names:
        classification_network = factory.createClassificationNetwork(
            [11, 22, 1], RELU())
        networks.append(classification_network)
        nnop.append(
            NeuralNetworkOptimizationProblem(data_set, classification_network,
                                             measure))

    with open("Results/NN/SA_Train.csv", 'w') as f:
        f.write(
            'iterations,cooling,fitness,accuracy,train_time,test_time,mse,low_correct,low_incorrect,high_correct,high_incorrect\n'
        )

    with open("Results/NN/SA_Validate.csv", 'w') as f:
        f.write('iterations,cooling,fitness,accuracy,train_time,test_time\n')

    with open("Results/NN/SA_Test.csv", 'w') as f:
        f.write(
            'iterations,cooling,fitness,accuracy,train_time,test_time,mse,low_correct,low_incorrect,high_correct,high_incorrect\n'
        )

    for p in range(len(cooling_list)):
        for i in range(len(iteration_list)):
            cooling = cooling_list[p]
            iteration = iteration_list[i]
            start = time.time()
            correct = 0
            incorrect = 0
            sim = SimulatedAnnealing(1E11, cooling, nnop[0])

            train(sim, networks[0], oa_names[0], train_instances, measure,
                  iteration)
            end = time.time()
            training_time = end - start

            optimal_instance = sim.getOptimal()
            networks[0].setWeights(optimal_instance.getData())

            start = time.time()
            for instance in train_instances:
                networks[0].setInputValues(instance.getData())
                networks[0].run()

                actual = instance.getLabel().getContinuous()
                predicted = networks[0].getOutputValues().get(0)
                predicted = max(min(predicted, 1), 0)

                predicted_array.append(round(predicted))
                actual_array.append(max(min(actual, 1), 0))

                if abs(predicted - actual) < 0.5:
                    correct += 1
                    if actual == 0:
                        low_quality_correct += 1
                    else:
                        high_quality_correct += 1
                else:
                    incorrect += 1
                    if actual == 0:
                        low_quality_incorrect += 1
                    else:
                        high_quality_incorrect += 1
                result = instance.getLabel()
                network_vals = networks[0].getOutputValues()
                example = Instance(network_vals, Instance(network_vals.get(0)))
                error += measure.value(result, example)

            end = time.time()
            testing_time = end - start

            training_mse = error / len(train_instances)
            print("Low quality correct: " + str(low_quality_correct))
            print("Low quality incorrect: " + str(low_quality_incorrect))
            print("High quality correct: " + str(high_quality_correct))
            print("High quality incorrect: " + str(high_quality_incorrect))
            print("Training MSE: " + str(training_mse))

            results += "\nResults for Training %s: \nCorrectly classified %d instances." % (
                'SA', correct)
            results += "\nIncorrectly classified Training %d instances.\nPercent correctly classified: %0.03f%%" % (
                incorrect, float(correct) / (correct + incorrect) * 100.0)
            results += "\nTraining time: %0.03f seconds" % (training_time, )
            results += "\nTesting time: %0.03f seconds\n" % (testing_time, )

            data = '{},{},{},{},{},{},{},{},{},{},{}\n'.format(
                iteration, cooling, correct,
                float(correct) / (correct + incorrect) * 100.0, training_time,
                testing_time, training_mse, low_quality_correct,
                low_quality_incorrect, high_quality_correct,
                high_quality_incorrect)
            print(data)
            with open("Results/NN/SA_Train.csv", 'a') as f:
                f.write(data)

            correct = 0
            incorrect = 0

            for instance in validate_instances:
                networks[0].setInputValues(instance.getData())
                networks[0].run()

                actual = instance.getLabel().getContinuous()
                predicted = networks[0].getOutputValues().get(0)
                predicted = max(min(predicted, 1), 0)

                if abs(predicted - actual) < 0.5:
                    correct += 1
                else:
                    incorrect += 1

            results += "\nResults for Cross Validation %s: \nCorrectly classified %d instances." % (
                'SA', correct)
            results += "\nIncorrectly classified Cross Validation %d instances.\nPercent correctly classified: %0.03f%%" % (
                incorrect, float(correct) / (correct + incorrect) * 100.0)
            results += "\nTraining time: %0.03f seconds" % (training_time, )
            results += "\nTesting time: %0.03f seconds\n" % (testing_time, )

            data = '{},{},{},{},{},{}\n'.format(
                iteration, cooling, correct,
                float(correct) / (correct + incorrect) * 100.0, training_time,
                testing_time)
            print(data)
            with open("Results/NN/SA_Validate.csv", 'a') as f:
                f.write(data)

            correct = 0
            incorrect = 0
            error = 0
            low_quality_correct = 0
            low_quality_incorrect = 0
            high_quality_correct = 0
            high_quality_incorrect = 0
            predicted_array = []
            actual_array = []

            for instance in test_instances:
                networks[0].setInputValues(instance.getData())
                networks[0].run()

                actual = instance.getLabel().getContinuous()
                predicted = networks[0].getOutputValues().get(0)
                predicted = max(min(predicted, 1), 0)

                predicted_array.append(round(predicted))
                actual_array.append(max(min(actual, 1), 0))

                if abs(predicted - actual) < 0.5:
                    correct += 1
                    if actual == 0:
                        low_quality_correct += 1
                    else:
                        high_quality_correct += 1
                else:
                    incorrect += 1
                    if actual == 0:
                        low_quality_incorrect += 1
                    else:
                        high_quality_incorrect += 1
                result = instance.getLabel()
                network_vals = networks[0].getOutputValues()
                example = Instance(network_vals, Instance(network_vals.get(0)))
                error += measure.value(result, example)

            testing_mse = error / len(test_instances)
            print("Low quality correct: " + str(low_quality_correct))
            print("Low quality incorrect: " + str(low_quality_incorrect))
            print("High quality correct: " + str(high_quality_correct))
            print("High quality incorrect: " + str(high_quality_incorrect))
            print("Testing MSE: " + str(testing_mse))

            results += "\nResults for Testing %s: \nCorrectly classified %d instances." % (
                "SA", correct)
            results += "\nIncorrectly classified Testing %d instances.\nPercent correctly classified: %0.03f%%" % (
                incorrect, float(correct) / (correct + incorrect) * 100.0)
            results += "\nTraining time: %0.03f seconds" % (training_time, )
            results += "\nTesting time: %0.03f seconds\n" % (testing_time, )

            data = '{},{},{},{},{},{},{},{},{},{},{}\n'.format(
                iteration, cooling, correct,
                float(correct) / (correct + incorrect) * 100.0, training_time,
                testing_time, testing_mse, low_quality_correct,
                low_quality_incorrect, high_quality_correct,
                high_quality_incorrect)
            print(data)
            with open("Results/NN/SA_Test.csv", 'a') as f:
                f.write(data)

    print results
예제 #6
0
def main():
    train_instances = initialize_instances('wine_train.csv')
    validate_instances = initialize_instances('wine_validate.csv')
    test_instances = initialize_instances('wine_test.csv')
    factory = BackPropagationNetworkFactory()
    measure = SumOfSquaresError()
    data_set = DataSet(train_instances)
    iteration_list = [10, 100, 500, 1000, 2500, 5000]

    with open("Results/NN/RHC_Train.csv", 'w') as f:
        f.write('iterations,fitness,accuracy,train_time,test_time\n')

    with open("Results/NN/RHC_Validate.csv", 'w') as f:
        f.write('iterations,fitness,accuracy,train_time,test_time\n')

    with open("Results/NN/RHC_Test.csv", 'w') as f:
        f.write('iterations,fitness,accuracy,train_time,test_time\n')

    networks = []  # BackPropagationNetwork
    nnop = []  # NeuralNetworkOptimizationProblem
    oa = []  # OptimizationAlgorithm
    oa_names = ["RHC"]
    results = ""

    for name in oa_names:
        classification_network = factory.createClassificationNetwork([11, 22, 1], RELU())
        networks.append(classification_network)
        nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure))

    oa.append(RandomizedHillClimbing(nnop[0]))

    for i in range(len(iteration_list)):
        iteration = iteration_list[i]
        start = time.time()
        correct = 0
        incorrect = 0

        train(oa[0], networks[0], oa_names[0], train_instances, measure,iteration)
        end = time.time()
        training_time = end - start

        optimal_instance = oa[0].getOptimal()
        networks[0].setWeights(optimal_instance.getData())

        start = time.time()
        for instance in train_instances:
            networks[0].setInputValues(instance.getData())
            networks[0].run()

            predicted = instance.getLabel().getContinuous()
            actual = networks[0].getOutputValues().get(0)

            if abs(predicted - actual) < 0.5:
                correct += 1
            else:
                incorrect += 1

        end = time.time()
        testing_time = end - start

        results += "\nResults for Training %s: \nCorrectly classified %d instances." % ('RHC', correct)
        results += "\nIncorrectly classified Training %d instances.\nPercent correctly classified: %0.03f%%" % (incorrect, float(correct)/(correct+incorrect)*100.0)
        results += "\nTraining time: %0.03f seconds" % (training_time,)
        results += "\nTesting time: %0.03f seconds\n" % (testing_time,)

        data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct)/(correct+incorrect)*100.0, training_time,testing_time)
        print(data)
        with open("Results/NN/RHC_Train.csv", 'a') as f:
            f.write(data)

        correct = 0
        incorrect = 0

        for instance in validate_instances:
            networks[0].setInputValues(instance.getData())
            networks[0].run()

            predicted = instance.getLabel().getContinuous()
            actual = networks[0].getOutputValues().get(0)

            if abs(predicted - actual) < 0.5:
                correct += 1
            else:
                incorrect += 1

        results += "\nResults for Cross Validation %s: \nCorrectly classified %d instances." % ('RHC', correct)
        results += "\nIncorrectly classified Cross Validation %d instances.\nPercent correctly classified: %0.03f%%" % (
        incorrect, float(correct) / (correct + incorrect) * 100.0)
        results += "\nTraining time: %0.03f seconds" % (training_time,)
        results += "\nTesting time: %0.03f seconds\n" % (testing_time,)



        data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct) / (correct + incorrect) * 100.0, training_time,
                                   testing_time)
        print(data)
        with open("Results/NN/RHC_Validate.csv", 'a') as f:
            f.write(data)

        correct = 0
        incorrect = 0

        for instance in test_instances:
            networks[0].setInputValues(instance.getData())
            networks[0].run()

            predicted = instance.getLabel().getContinuous()
            actual = networks[0].getOutputValues().get(0)

            if abs(predicted - actual) < 0.5:
                correct += 1
            else:
                incorrect += 1

        results += "\nResults for Testing %s: \nCorrectly classified %d instances." % ("RHC", correct)
        results += "\nIncorrectly classified Testing %d instances.\nPercent correctly classified: %0.03f%%" % (
        incorrect, float(correct) / (correct + incorrect) * 100.0)
        results += "\nTraining time: %0.03f seconds" % (training_time,)
        results += "\nTesting time: %0.03f seconds\n" % (testing_time,)

        data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct) / (correct + incorrect) * 100.0, training_time,
                                   testing_time)
        print(data)
        with open("Results/NN/RHC_Test.csv", 'a') as f:
            f.write(data)



    print results
예제 #7
0
def main():
    """Run algorithms on the abalone dataset."""
    instances = initialize_instances()
    factory = BackPropagationNetworkFactory()
    measure = SumOfSquaresError()
    data_set = DataSet(instances)

    networks = []  # BackPropagationNetwork
    nnop = []  # NeuralNetworkOptimizationProblem
    oa = []  # OptimizationAlgorithm
    oa_names = ["RHC", "SA", "GA"]
    results = ""

    for name in oa_names:
        classification_network = factory.createClassificationNetwork(
            [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU())
        networks.append(classification_network)
        nnop.append(
            NeuralNetworkOptimizationProblem(data_set, classification_network,
                                             measure))

    oa.append(RandomizedHillClimbing(nnop[0]))
    oa.append(SimulatedAnnealing(1E11, .95, nnop[1]))
    oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2]))

    for i, name in enumerate(oa_names):
        start = time.time()
        correct = 0
        incorrect = 0

        train(oa[i], networks[i], oa_names[i], instances, measure)
        end = time.time()
        training_time = end - start

        optimal_instance = oa[i].getOptimal()
        networks[i].setWeights(optimal_instance.getData())

        start = time.time()
        for instance in instances:
            networks[i].setInputValues(instance.getData())
            networks[i].run()

            predicted = instance.getLabel().getContinuous()
            actual = networks[i].getOutputValues().get(0)

            if abs(predicted - actual) < 0.5:
                correct += 1
            else:
                incorrect += 1

        end = time.time()
        testing_time = end - start

        results += "\nResults for %s: \nCorrectly classified %d instances." % (
            name, correct)
        results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % (
            incorrect, float(correct) / (correct + incorrect) * 100.0)
        results += "\nTraining time: %0.03f seconds" % (training_time, )
        results += "\nTesting time: %0.03f seconds\n" % (testing_time, )

    print results
예제 #8
0
def main():
    """
    Run algorithms on the gamma dataset.
    Essentially ran twice for 2-fold cross validation
    Metrics are evaluated outside of this file
    """
    train_data = initialize_instances(TRAIN_FILE)
    test_data = initialize_instances(TEST_FILE)                 # Get data
    factory = BackPropagationNetworkFactory()
    measure = SumOfSquaresError()
    data_set = DataSet(train_data)

    networks = []  # BackPropagationNetwork
    nnop = []      # NeuralNetworkOptimizationProblem
    oa = []        # OptimizationAlgorithm
    oa_names = ["RHC", "SA", "GA"]
    results = ""

    # Create each network architecture and an optimization instance
    for name in oa_names:
        activation = RELU()
        # Change network size
        classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], activation)
        networks.append(classification_network)
        nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure))

    # Randomized Optimzation Algos
    oa.append(RandomizedHillClimbing(nnop[0]))
    oa.append(SimulatedAnnealing(1E11, .95, nnop[1]))
    oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2]))

    # Go through each optimization problem and do 2-fold CV
    for i, name in enumerate(oa_names):
        start = time.time()
        metrics = train(oa[i], networks[i], oa_names[i], train_data, test_data, measure)
        end = time.time()
        training_time = end - start
        results += "\nFold 1 train time: %0.03f seconds" % (training_time,)

        # Write data to CSV file
        with open("metrics/" + oa_names[i] + '_f1.csv', 'w') as f:
            writer = csv.writer(f)
            for metric in metrics:
                writer.writerow(metric)

    print results

    # 2nd fold;
    train_data = initialize_instances(TEST_FILE)
    test_data = initialize_instances(TRAIN_FILE)                 # Get data
    factory = BackPropagationNetworkFactory()
    measure = SumOfSquaresError()
    data_set = DataSet(train_data)

    networks = []  # BackPropagationNetwork
    nnop = []      # NeuralNetworkOptimizationProblem
    oa = []        # OptimizationAlgorithm
    oa_names = ["RHC", "SA", "GA"]
    results = ""

    # Create each network architecture and an optimization instance
    for name in oa_names:
        activation = RELU()
        # Change network size
        classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], activation)
        networks.append(classification_network)
        nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure))

    # Randomized Optimzation Algos
    oa.append(RandomizedHillClimbing(nnop[0]))
    oa.append(SimulatedAnnealing(1E11, .95, nnop[1]))
    oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2]))

    # Go through each optimization problem and do 2-fold CV
    for i, name in enumerate(oa_names):
        start = time.time()
        metrics = train(oa[i], networks[i], oa_names[i], train_data, test_data, measure)
        end = time.time()
        training_time = end - start
        results += "\nFold 1 train time: %0.03f seconds" % (training_time,)

        # Write data to CSV file
        with open("metrics/" + oa_names[i] + '_f2.csv', 'w') as f:
            writer = csv.writer(f)
            for metric in metrics:
                writer.writerow(metric)

    print results