def main(): """Run this experiment""" training_data = initialize_instances('../data/Pima-train.csv') testing_data = initialize_instances('../data/Pima-test.csv') print(len(training_data)) #testing_ints = initialize_instances('m_test.csv') #validation_ints = initialize_instances('m_val.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_data) relu = RELU() rule = RPROPUpdateRule() oa_names = ["RHC"] classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], relu) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = RandomizedHillClimbing(nnop) train(oa, classification_network, 'RHC', training_data, testing_data, measure)
def main(P, mate, mutate): """Run this experiment""" training_ints = initialize_instances(TRAIN_DATA_FILE) testing_ints = initialize_instances(TEST_DATA_FILE) validation_ints = initialize_instances(VALIDATE_DATA_FILE) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) relu = RELU() # 50 and 0.000001 are the defaults from RPROPUpdateRule.java rule = RPROPUpdateRule(0.064, 50, 0.000001) oa_name = "GA_{}_{}_{}".format(P, mate, mutate) with open(OUTFILE.format(oa_name), 'w') as f: f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format('iteration', 'MSE_trg', 'MSE_val', 'MSE_tst', 'acc_trg', 'acc_val', 'acc_tst', 'f1_trg', 'f1_val', 'f1_tst', 'elapsed')) classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, HIDDEN_LAYER3, OUTPUT_LAYER], relu) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = StandardGeneticAlgorithm(P, mate, mutate, nnop) train(oa, classification_network, oa_name, training_ints, validation_ints, testing_ints, measure, TRAINING_ITERATIONS, OUTFILE.format(oa_name))
def Genetic_algorithm(out_path, train_inst, test_inst, P, mate, mutate, training_iterations): """Run this experiment""" factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_inst) # acti = LogisticSigmoid() acti = HyperbolicTangentSigmoid() rule = RPROPUpdateRule() oa_name = "GA_P{}_mate{}_mut{}".format(P, mate, mutate) with open(out_path.replace('GA_', oa_name), 'w') as f: f.write('{},{},{},{},{},{}\n'.format('iteration', 'MSE_trg', 'MSE_tst', 'acc_trg', 'acc_tst', 'elapsed')) classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], acti) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = StandardGeneticAlgorithm(P, mate, mutate, nnop) train(oa, classification_network, oa_name, train_inst, test_inst, measure, training_iterations, out_path.replace('GA_', oa_name))
def Simulated_annealing(out_path, train_inst, test_inst, T, CE, training_iterations): """Run this experiment""" factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_inst) # acti = LogisticSigmoid() acti = HyperbolicTangentSigmoid() rule = RPROPUpdateRule() oa_name = "SA_T{}_CE{}".format(int(T), str(CE).split('.')[-1]) with open(out_path.replace('SA_', oa_name), 'w') as f: f.write('{},{},{},{},{},{}\n'.format('iteration', 'MSE_trg', 'MSE_tst', 'acc_trg', 'acc_tst', 'elapsed')) classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], acti) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = SimulatedAnnealing(T, CE, nnop) train(oa, classification_network, oa_name, train_inst, test_inst, measure, training_iterations, out_path.replace('SA_', oa_name))
def main(ds_name, CE): """Run this experiment""" nn_config, train_file, val_file, test_file = get_problemset(ds_name) training_ints = initialize_instances(train_file) testing_ints = initialize_instances(test_file) validation_ints = initialize_instances(val_file) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) relu = RELU() # 50 and 0.000001 are the defaults from RPROPUpdateRule.java rule = RPROPUpdateRule(0.064, 50, 0.000001) oa_name = "SA_{}_{}".format(ds_name, CE) with open(OUTFILE.format(oa_name), 'w') as f: f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format('iteration', 'MSE_trg', 'MSE_val', 'MSE_tst', 'acc_trg', 'acc_val', 'acc_tst', 'f1_trg', 'f1_val', 'f1_tst', 'elapsed')) classification_network = factory.createClassificationNetwork(nn_config, relu) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = SimulatedAnnealing(1E10, CE, nnop) train(oa, classification_network, oa_name, training_ints, validation_ints, testing_ints, measure, TRAINING_ITERATIONS, OUTFILE.format(oa_name))
def main(): #training_ints = initialize_instances('bCancer_trg.csv') #testing_ints = initialize_instances('bCancer_test.csv') #validation_ints = initialize_instances('bCancer_val.csv') training_ints = initialize_instances('winequality_trg.csv') testing_ints = initialize_instances('winequality_test.csv') validation_ints = initialize_instances('winequality_val.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) hts = HyperbolicTangentSigmoid() rule = RPROPUpdateRule() oa_names = ["RHC"] classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, OUTPUT_LAYER], hts) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = RandomizedHillClimbing(nnop) train(oa, classification_network, 'RHC', training_ints, validation_ints, testing_ints, measure)
def main(): """Run this experiment""" pdb.set_trace() training_ints = initialize_instances( '/Users/lijiang/Desktop/yichuan_HW/Archive/train.csv') testing_ints = initialize_instances( '/Users/lijiang/Desktop/yichuan_HW/Archive/test.csv') validation_ints = initialize_instances( '/Users/lijiang/Desktop/yichuan_HW/Archive/validation.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) relu = RELU() rule = RPROPUpdateRule() oa_names = ["RHC"] classification_network = factory.createClassificationNetwork([ INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, HIDDEN_LAYER3, OUTPUT_LAYER ], relu) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = RandomizedHillClimbing(nnop) train(oa, classification_network, 'RHC', training_ints, validation_ints, testing_ints, measure)
def main(P, mate, mutate): """Run this experiment""" training_ints = initialize_instances('../datasets/p_train.csv') testing_ints = initialize_instances('../datasets/p_test.csv') validation_ints = initialize_instances('../datasets/p_valid.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) relu = RELU() rule = RPROPUpdateRule() oa_name = "GA_{}_{}_{}".format(P, mate, mutate) with open(OUTFILE.replace('XXX', oa_name), 'w') as f: f.write('{},{},{},{},{},{},{},{}\n'.format('iteration', 'MSE_trg', 'MSE_val', 'MSE_tst', 'acc_trg', 'acc_val', 'acc_tst', 'elapsed')) classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], relu) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = StandardGeneticAlgorithm(P, mate, mutate, nnop) train(oa, classification_network, oa_name, training_ints, validation_ints, testing_ints, measure)
def Random_hill_climb(out_path, train_inst, test_inst, repeats, training_iterations): """Run this experiment""" for i in range(repeats): out_path_ = out_path.replace("RHC_", 'RHC_{}'.format(str(i).zfill(3))) with open(out_path_, 'w') as f: f.write('{},{},{},{},{},{}\n'.format('iteration', 'MSE_trg', 'MSE_tst', 'acc_trg', 'acc_tst', 'elapsed')) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_inst) # acti = LogisticSigmoid() acti = HyperbolicTangentSigmoid() rule = RPROPUpdateRule() classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], acti) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = RandomizedHillClimbing(nnop) train(oa, classification_network, 'RHC', train_inst, test_inst, measure, training_iterations, out_path_)
def main(CE): """Run this experiment""" training_ints = initialize_instances('m_trg.csv') testing_ints = initialize_instances('m_test.csv') validation_ints = initialize_instances('m_val.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) relu = RELU() rule = RPROPUpdateRule() oa_name = "SA{}".format(CE) with open(OUTFILE.replace('XXX', oa_name), 'w') as f: f.write('{},{},{},{},{},{},{},{}\n'.format('iteration', 'MSE_trg', 'MSE_val', 'MSE_tst', 'acc_trg', 'acc_val', 'acc_tst', 'elapsed')) classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER_1, HIDDEN_LAYER_2, OUTPUT_LAYER], relu) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = SimulatedAnnealing(1E10, CE, nnop) train(oa, classification_network, oa_name, training_ints, validation_ints, testing_ints, measure)
def main(): """Run this experiment""" training_ints = initialize_instances( '/Users/Sean/School/GeorgiaTech/CS7641/Assignment2/s_trg.csv') testing_ints = initialize_instances( '/Users/Sean/School/GeorgiaTech/CS7641/Assignment2/s_test.csv') validation_ints = initialize_instances( '/Users/Sean/School/GeorgiaTech/CS7641/Assignment2/s_val.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(training_ints) sig = LogisticSigmoid() rule = RPROPUpdateRule() oa_names = ["RHC"] classification_network = factory.createClassificationNetwork([ INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, HIDDEN_LAYER3, OUTPUT_LAYER ], sig) for trial in xrange(TRIALS): oa = RandomizedHillClimbing( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) train(oa, classification_network, 'RHC', training_ints, validation_ints, testing_ints, measure)
def rhc_network(name, network, measure, train_set, test_set, acc_func, iter_time, iters_total, iters_step, n_trials): for i_trial in range(n_trials): network_optimizer = NeuralNetworkOptimizationProblem(train_set, network, measure) rhc_instance = RandomizedHillClimbing(network_optimizer) rhc_trainer = FixedIterationTrainer(rhc_instance, iters_step) nn_state = {'network': network, 'trainer': rhc_trainer} wrapper_rhc = AlgoWrapper(nn_state, lambda state: state['trainer'].train(), lambda state: acc_func(train_set, state['network'], measure), lambda state: acc_func(test_set, state['network'], measure) ) # create name and invalidate if super empty decorated_name = "" if name is not None and name != "": decorated_name = name timed_trainer = TimedTrainer(decorated_name, wrapper_rhc, iter_time, iters_total, iters_step, _param_dict={'name':name} ) timed_trainer.run()
def main(): """Run algorithms on the cancer dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) max_iterations = TRAINING_ITERATIONS hidden_layer_size = HIDDEN_LAYER # for _hidden_layer in xrange(HIDDEN_LAYER): # hidden_layer_size = _hidden_layer + 1 network = None # BackPropagationNetwork nnop = None # NeuralNetworkOptimizationProblem oa = None # OptimizationAlgorithm results = "" for population_size in [100, 200, 400]: RandomOrderFilter().filter(data_set) train_test_split = TestTrainSplitFilter(TRAIN_TEST_SPLIT) train_test_split.filter(data_set) train_set = train_test_split.getTrainingSet() test_set = train_test_split.getTestingSet() network = factory.createClassificationNetwork( [INPUT_LAYER, hidden_layer_size, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(train_set, network, measure) oa = StandardGeneticAlgorithm(population_size, GA_MATE_EACH_GEN, GA_MUTATE_EACH_GEN, nnop) start = time.time() correct = 0 incorrect = 0 train(oa, network, "GA", train_set, test_set, measure, population_size) end = time.time() training_time = end - start optimal_instance = oa.getOptimal() network.setWeights(optimal_instance.getData()) start = time.time() for instance in test_set.getInstances(): network.setInputValues(instance.getData()) network.run() predicted = instance.getLabel().getContinuous() actual = network.getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start _results = "" _results += "\n[GA] population=%0.02f" % (population_size) _results += "\nResults for GA: \nCorrectly classified %d instances." % ( correct) _results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) _results += "\nTraining time: %0.03f seconds" % (training_time, ) _results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) with open('out/ga/population-%d.log' % (population_size), 'w') as f: f.write(_results) results += _results print results
def main(): """Run algorithms on the abalone dataset.""" train_instances = initialize_instances() test_instances = initialize_instances(test=True) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_instances) networks = [] # BackPropagationNetwork oa = [] # OptimizationAlgorithm oa_names = [] if do_rhc: oa_names.append("RHC") if do_sa: oa_names.append("SA") if do_ga: oa_names.append("GA") if do_bp: oa_names.append("BP") results = "" # For each algo, need to see if we are doing sweeps # No need to sweep rhc as there are no parameters if do_rhc and sweep == False: training_iter = TRAINING_ITERATIONS if do_fmnist: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) if do_chess: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = RandomizedHillClimbing(nnop) name = "RHC" train(oa, classification_network, name, train_instances, measure, training_iter, test_instances, True) if do_sa: training_iter = TRAINING_ITERATIONS count = 0 for temp, cooling in product(sa_temp, sa_cooling): if do_fmnist: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) if do_chess: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = SimulatedAnnealing(temp, cooling, nnop) name = "SA_sweep" if count == 0: print_head = True else: print_head = False train(oa, classification_network, name, train_instances, measure, training_iter, test_instances, print_head, temp, cooling) count += 1 if do_ga: training_iter = GA_TRAINING_ITERATIONS count = 0 for pop, prop_mate, prop_mutate in product(ga_pop, ga_prop_mate, ga_prop_mutate): if do_fmnist: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) if do_chess: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) mate = int(math.floor(pop * prop_mate)) mutate = int(math.floor(pop * prop_mutate)) oa = StandardGeneticAlgorithm(pop, mate, mutate, nnop) name = "GA_sweep" if count == 0: print_head = True else: print_head = False train(oa, classification_network, name, train_instances, measure, training_iter, test_instances, print_head, pop, prop_mate, prop_mutate) count += 1 if do_bp and sweep == False: training_iter = TRAINING_ITERATIONS if do_fmnist: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) if do_chess: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER]) oa = BatchBackPropagationTrainer(data_set, classification_network, measure, RPROPUpdateRule()) name = "BP" train(oa, classification_network, name, train_instances, measure, training_iter, test_instances, True)
def run_all(): dataSource = 'wine' INPUT_LAYER = 13 HIDDEN_LAYER = 100 OUTPUT_LAYER = 1 # dataSource = 'wage' # INPUT_LAYER = 106 # HIDDEN_LAYER = 1000 # OUTPUT_LAYER = 1 train_data = initialize_instances('data/balanced_' + dataSource + '_cleaned_train.csv') test_data = initialize_instances('data/balanced_' + dataSource + '_cleaned_test.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_data) update_rule = RPROPUpdateRule() alg = 'backprop' classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU()) oa = BatchBackPropagationTrainer(data_set, classification_network, measure, update_rule) fit = oa run(alg, oa, fit, classification_network, measure, train_data, test_data, dataSource) alg = 'RHC' classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU()) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = RandomizedHillClimbing(nnop) iters = 1 fit = FixedIterationTrainer(oa, iters) run(alg, oa, fit, classification_network, measure, train_data, test_data, dataSource) alg = 'SA' classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU()) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) startTemp = 1E10 coolingFactor = .8 oa = SimulatedAnnealing(startTemp, coolingFactor, nnop) iters = 1 fit = FixedIterationTrainer(oa, iters) run(alg, oa, fit, classification_network, measure, train_data, test_data, dataSource) alg = 'GA' classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], RELU()) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) population = 200 mates = 50 mutations = 10 oa = StandardGeneticAlgorithm(population, mates, mutations, nnop) iters = 1 fit = FixedIterationTrainer(oa, iters) run(alg, oa, fit, classification_network, measure, train_data, test_data, dataSource)
def main(): """Run algorithms on the abalone dataset.""" ## for optimizers with default setting # for n in range(REPEAT): # instances = initialize_instances(INPUT_FILE)[:5000] # # factory = BackPropagationNetworkFactory() # measure = SumOfSquaresError() # data_set = DataSet(instances) # # networks = [] # BackPropagationNetwork # nnop = [] # NeuralNetworkOptimizationProblem # oa = [] # OptimizationAlgorithm # oa_names = ["RHC", "SA", "GA"] # results = "" # # for name in oa_names: # classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) # networks.append(classification_network) # nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # # oa = [RandomizedHillClimbing(nnop[0]), # SimulatedAnnealing(1E11, .95, nnop[1]), # StandardGeneticAlgorithm(200, 100, 10, nnop[2])] # # # for i, name in enumerate(oa_names): # round_start = time.time() # if name == "GA" and n >= int(REPEAT/2): # continue # # iterdata = train(oa[i], networks[i], oa_names[i], instances,measure) # output_name = name + "_ANN_{}.csv".format(n) # round_end = time.time() # with open(output_name,'wb') as resultFile: # wr = csv.writer(resultFile, dialect='excel') # wr.writerows(iterdata) # print output_name, " : ",round_end - round_start,"seconds" for n in range(REPEAT): instances = initialize_instances(INPUT_FILE)[:5000] factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm # oa_names = ["1e10","1e12", "1e13", "1e15"] # oa_names = ["cf0.1","cf0.25","cf0.5", "cf0.75"] oa_names = ["toMutate20", "toMutate50", "toMutate100", "toMutate180"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # oa = [SimulatedAnnealing(1E10, .95, nnop[0]), # SimulatedAnnealing(1E12, .95, nnop[1]), # SimulatedAnnealing(1E13, .95, nnop[2]), # SimulatedAnnealing(1E15, .95, nnop[3])] # oa = [SimulatedAnnealing(1E11, .1, nnop[0]), # SimulatedAnnealing(1E11, .25, nnop[1]), # SimulatedAnnealing(1E11, .5, nnop[2]), # SimulatedAnnealing(1E11, .75, nnop[3])] oa = [ StandardGeneticAlgorithm(200, 100, 20, nnop[0]), StandardGeneticAlgorithm(200, 100, 50, nnop[1]), StandardGeneticAlgorithm(200, 100, 100, nnop[2]), StandardGeneticAlgorithm(200, 100, 180, nnop[3]) ] for i, name in enumerate(oa_names): round_start = time.time() # if name == "GA" and n >= int(REPEAT/2): # continue # iterdata = train(oa[i], networks[i], oa_names[i], instances, measure) output_name = name + "_ANN_{}.csv".format(n) round_end = time.time() with open(output_name, 'wb') as resultFile: wr = csv.writer(resultFile, dialect='excel') wr.writerows(iterdata) print output_name, " : ", round_end - round_start, "seconds"
def main(): accuracies = [[] for i in range(3)] training_times = [[] for i in range(3)] """Run algorithms on the dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) for TRAINING_ITERATIONS in TRAINING_ITERATIONS_pool: print("Calculating with %d iterations" % TRAINING_ITERATIONS) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], instances, measure, TRAINING_ITERATIONS) end = time.time() training_time = end - start optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in instances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s: \nCorrectly classified %d instances." % ( name, correct) accuracy1 = float(correct) / (correct + incorrect) * 100.0 results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, accuracy1) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) accuracies[i].append(accuracy1) training_times[i].append(training_time) print results with open(OUTPUT_FILE, "w") as outFile: for i in range(1): outFile.write(','.join([ "iterations", "rhc_accuracy", "rhc_training_time", "sa_accuracy", "sa_training_time", "ga_accuracy", "ga_training_time" ]) + '\n') for i in range(len(TRAINING_ITERATIONS_pool)): outFile.write(','.join([ str(TRAINING_ITERATIONS_pool[i]), str(accuracies[0][i]), str(training_times[0][i]), str(accuracies[1][i]), str(training_times[1][i]), str(accuracies[2][i]), str(training_times[2][i]) ]) + '\n') print("the end of the program")
def main(trainfile, testfile, validfile, oa_name, i, params): print("== [{}] ==".format(oa_name)) res = {} #for i in range(25): res[i] = {} if i == 9: print("Invalid i %d" % (i)) sys.exit(1) print("LABEL: {}".format(i)) traininstances = initialize_instances(trainfile, i) testinstances = initialize_instances(testfile, i) validinstances = initialize_instances(validfile, i) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(traininstances) rule = RPROPUpdateRule() # was networks[] classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = None # was oa = [] suffix = "" if oa_name == "BP": oa = BatchBackPropagationTrainer(data_set, classification_network, measure, rule) if oa_name == "RHC": oa = RandomizedHillClimbing(nnop) if oa_name == "SA": suffix = '-' + '-'.join(params) oa = SimulatedAnnealing(float(params[0]), float(params[1]), nnop) if oa_name == "GA": suffix = '-' + '-'.join(params) oa = StandardGeneticAlgorithm(int(params[0]), int(params[1]), int(params[2]), nnop) ttvinstances = { 'train': traininstances, 'test': testinstances, 'valid': validinstances } train_start = timeit.default_timer() train(oa, classification_network, oa_name, ttvinstances, measure, i, suffix) train_end = timeit.default_timer() print 'train time: %d secs' % (int(train_end - train_start)) if oa_name != "BP": optimal_instance = oa.getOptimal() classification_network.setWeights(optimal_instance.getData()) ttvinstances = { 'train': traininstances, 'valid': validinstances, 'test': testinstances } for key, instances in zip(ttvinstances.keys(), ttvinstances.values()): query_start = timeit.default_timer() tp = 0. fp = 0. fn = 0. tn = 0. precision = 0. recall = 0. f1 = 0. print "scoring %s..." % (key) for instance in instances: classification_network.setInputValues(instance.getData()) classification_network.run() actual = instance.getLabel().getContinuous() predicted = classification_network.getOutputValues().get(0) #print ('actual = %.3f, predicted = %.3f' % (actual, predicted)) if actual == 1.: if predicted >= 0.5: tp += 1. else: fn += 1. else: if predicted >= 0.5: fp += 1. else: tn += 1. query_end = timeit.default_timer() if tp + fp > 0.: precision = tp / (tp + fp) if fn + tp > 0.: recall = tp / (fn + tp) if precision + recall > 0.: f1 = 2. * precision * recall / (precision + recall) correct = tp + tn total = correct + fp + fn print "%s f1 = %0.10f" % (key, f1) print "%s accuracy = %0.10f" % (key, correct / total) print "%s query time: %d secs" % (key, int(query_end - query_start))
def main(): """ Run algorithms on the gamma dataset. Essentially ran twice for 2-fold cross validation Metrics are evaluated outside of this file """ train_data = initialize_instances(TRAIN_FILE) test_data = initialize_instances(TEST_FILE) # Get data factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_data) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" # Create each network architecture and an optimization instance for name in oa_names: activation = RELU() # Change network size classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], activation) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # Randomized Optimzation Algos oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) # Go through each optimization problem and do 2-fold CV for i, name in enumerate(oa_names): start = time.time() metrics = train(oa[i], networks[i], oa_names[i], train_data, test_data, measure) end = time.time() training_time = end - start results += "\nFold 1 train time: %0.03f seconds" % (training_time,) # Write data to CSV file with open("metrics/" + oa_names[i] + '_f1.csv', 'w') as f: writer = csv.writer(f) for metric in metrics: writer.writerow(metric) print results # 2nd fold; train_data = initialize_instances(TEST_FILE) test_data = initialize_instances(TRAIN_FILE) # Get data factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_data) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" # Create each network architecture and an optimization instance for name in oa_names: activation = RELU() # Change network size classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], activation) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # Randomized Optimzation Algos oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) # Go through each optimization problem and do 2-fold CV for i, name in enumerate(oa_names): start = time.time() metrics = train(oa[i], networks[i], oa_names[i], train_data, test_data, measure) end = time.time() training_time = end - start results += "\nFold 1 train time: %0.03f seconds" % (training_time,) # Write data to CSV file with open("metrics/" + oa_names[i] + '_f2.csv', 'w') as f: writer = csv.writer(f) for metric in metrics: writer.writerow(metric) print results
def main(): optalgs = ['SA'] OA = { 'SA': SimulatedAnnealing } params = { 'SA': [ [1e2, 0.15], [1e2, 0.25], [1e2, 0.35], [1e2, 0.45], [1e2, 0.55], [1e2, 0.65], [1e2, 0.75], [1e2, 0.85], [1e2, 0.95] ] } identifier = { 'SA': lambda p: str(p[1]).replace('.', '_') } iterations = [10, 50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000] train_instances, test_instances = initialize_instances() data_set = DataSet(train_instances) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() for optalg in optalgs: for param in params[optalg]: output_filename = '%s-%s.csv' % (optalg, identifier[optalg](param)) csv_file = open(output_filename, 'w') fields = ['num_iterations', 'train_accuracy', 'test_accuracy', 'train_time', 'test_time'] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() for num_iterations in iterations: network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], \ LogisticSigmoid()) nnop = NeuralNetworkOptimizationProblem(data_set, network, measure) oa = OA[optalg](*(param + [nnop])) start = time.time() train(oa, network, optalg, train_instances, measure, num_iterations) end = time.time() train_time = end - start optimal_instance = oa.getOptimal() network.setWeights(optimal_instance.getData()) train_accuracy = test(network, train_instances) start = time.time() test_accuracy = test(network, test_instances) end = time.time() test_time = end - start results = { 'num_iterations': num_iterations, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy, 'train_time': train_time, 'test_time': test_time } print optalg, param, results writer.writerow(results) csv_file.close() print '------' print '***** ***** ***** ***** *****'
def main(): trainingInstances, testingInstances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(trainingInstances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA_15", "SA_35", "SA_55", "SA_75", "SA_95"] #oa_names=["GA_100_50_5", "GA_200_50_5", "GA_100_50_10", "GA_200_50_10", "GA_100_100_5", "GA_200_100_5", "GA_100_100_10", "GA_200_100_10"] #oa_names=["GA_200_100_5", "GA_100_100_10", "GA_200_100_10"] for name in oa_names: #use RELU activation function classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], ReLU()) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .15, nnop[1])) oa.append(SimulatedAnnealing(1E11, .35, nnop[2])) oa.append(SimulatedAnnealing(1E11, .55, nnop[3])) oa.append(SimulatedAnnealing(1E11, .75, nnop[4])) oa.append(SimulatedAnnealing(1E11, .95, nnop[5])) # oa.append(StandardGeneticAlgorithm(100, 50, 5, nnop[0])) # oa.append(StandardGeneticAlgorithm(200, 50, 5, nnop[1])) # oa.append(StandardGeneticAlgorithm(100, 50, 10, nnop[2])) # oa.append(StandardGeneticAlgorithm(200, 50, 10, nnop[3])) # oa.append(StandardGeneticAlgorithm(100, 100, 5, nnop[4])) #oa.append(StandardGeneticAlgorithm(200, 100, 5, nnop[0])) #oa.append(StandardGeneticAlgorithm(100, 100, 10, nnop[1])) #oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) with open('nn_spam_results_RHC_SA.csv', 'w') as csvfile: writer = csv.writer(csvfile) for i, name in enumerate(oa_names): results = '' start = time.time() traincorrect = 0 trainincorrect = 0 testcorrect = 0 testincorrect = 0 train(oa[i], networks[i], oa_names[i], trainingInstances, testingInstances, measure) end = time.time() training_time = end - start optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in trainingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: traincorrect += 1 else: trainincorrect += 1 for instance in testingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: testcorrect += 1 else: testincorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s: \nCorrectly classified %d training instances." % ( name, traincorrect) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( trainincorrect, float(traincorrect) / (traincorrect + trainincorrect) * 100.0) results += "\nResults for %s: \nCorrectly classified %d testing instances." % ( name, testcorrect) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( testincorrect, float(testcorrect) / (testcorrect + testincorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) print results writer.writerow([results]) writer.writerow('')
def main(): """Run algorithms on the gamma dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["SA"] results = "" graph_x = "" graph_y = "" classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) for j in [0.15, 0.35, 0.55, 0.75, 0.95]: oa.append(SimulatedAnnealing(1E11, j, nnop[0])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], instances, measure) end = time.time() training_time = end - start optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in instances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s, %s: \nCorrectly classified %d instances." % ( name, j, correct) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) graph_x += ",'%.0E'" % (j) graph_y += ",%0.2f" % ((float(correct) / (correct + incorrect) * 100.0)) print results print graph_x print graph_y
def main(): """Run algorithms on the cancer dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) max_iterations = TRAINING_ITERATIONS hidden_layer_size = HIDDEN_LAYER networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" RandomOrderFilter().filter(data_set) train_test_split = TestTrainSplitFilter(TRAIN_TEST_SPLIT) train_test_split.filter(data_set) train_set = train_test_split.getTrainingSet() test_set = train_test_split.getTestingSet() for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, hidden_layer_size, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(train_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(SA_START_TEMPERATURE, SA_COOLING, nnop[1])) oa.append( StandardGeneticAlgorithm(GA_POPULATION_SIZE, GA_MATE_EACH_GEN, GA_MUTATE_EACH_GEN, nnop[2])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], train_set, test_set, measure, max_iterations=max_iterations) end = time.time() training_time = end - start optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in test_set.getInstances(): networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start _results = "" _results += "\n[%s] hidden_layer=%d, iterations=%d" % ( name, hidden_layer_size, max_iterations) _results += "\nResults for %s: \nCorrectly classified %d instances." % ( name, correct) _results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) _results += "\nTraining time: %0.03f seconds" % (training_time, ) _results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) with open('out/log/%s.log' % (oa_names[i]), 'w') as f: f.write(_results) results += _results print results
def main(): optalgs = ['GA-new'] OA = { 'GA': StandardGeneticAlgorithm, } # params = { # 'GA': [ # [10, 5, 5], [20, 10, 10], [30, 15, 15], [40, 30, 20], [80, 50, 30], # [150, 100, 30], [300, 120, 40], [500, 300, 50] # ], # } # varying population params = { 'GA': [ ], } identifier = { 'GA': lambda p: '_'.join([str(v) for v in p]), } iterations = [1000, 2000] train_instances, test_instances = initialize_instances() data_set = DataSet(train_instances) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() for optalg in optalgs: for param in params[optalg]: output_filename = '%s-%s.csv' % (optalg, identifier[optalg](param)) csv_file = open(output_filename, 'w') fields = ['num_iterations', 'train_accuracy', 'test_accuracy', 'train_time', 'test_time'] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() for num_iterations in iterations: network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], \ LogisticSigmoid()) nnop = NeuralNetworkOptimizationProblem(data_set, network, measure) oa = OA[optalg](*(param + [nnop])) start = time.time() train(oa, network, optalg, train_instances, measure, num_iterations) end = time.time() train_time = end - start optimal_instance = oa.getOptimal() network.setWeights(optimal_instance.getData()) train_accuracy = test(network, train_instances) start = time.time() test_accuracy = test(network, test_instances) end = time.time() test_time = end - start results = { 'num_iterations': num_iterations, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy, 'train_time': train_time, 'test_time': test_time } print optalg, param, results writer.writerow(results) csv_file.close() print '------' print '***** ***** ***** ***** *****'
def main(): train_instances = initialize_instances('wine_train.csv') validate_instances = initialize_instances('wine_validate.csv') test_instances = initialize_instances('wine_test.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_instances) iteration_list = [10, 100, 500, 1000, 2500] cooling_list = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95] networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["SA"] results = "" error = 0 low_quality_correct = 0 low_quality_incorrect = 0 high_quality_correct = 0 high_quality_incorrect = 0 predicted_array = [] actual_array = [] for name in oa_names: classification_network = factory.createClassificationNetwork( [11, 22, 1], RELU()) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) with open("Results/NN/SA_Train.csv", 'w') as f: f.write( 'iterations,cooling,fitness,accuracy,train_time,test_time,mse,low_correct,low_incorrect,high_correct,high_incorrect\n' ) with open("Results/NN/SA_Validate.csv", 'w') as f: f.write('iterations,cooling,fitness,accuracy,train_time,test_time\n') with open("Results/NN/SA_Test.csv", 'w') as f: f.write( 'iterations,cooling,fitness,accuracy,train_time,test_time,mse,low_correct,low_incorrect,high_correct,high_incorrect\n' ) for p in range(len(cooling_list)): for i in range(len(iteration_list)): cooling = cooling_list[p] iteration = iteration_list[i] start = time.time() correct = 0 incorrect = 0 sim = SimulatedAnnealing(1E11, cooling, nnop[0]) train(sim, networks[0], oa_names[0], train_instances, measure, iteration) end = time.time() training_time = end - start optimal_instance = sim.getOptimal() networks[0].setWeights(optimal_instance.getData()) start = time.time() for instance in train_instances: networks[0].setInputValues(instance.getData()) networks[0].run() actual = instance.getLabel().getContinuous() predicted = networks[0].getOutputValues().get(0) predicted = max(min(predicted, 1), 0) predicted_array.append(round(predicted)) actual_array.append(max(min(actual, 1), 0)) if abs(predicted - actual) < 0.5: correct += 1 if actual == 0: low_quality_correct += 1 else: high_quality_correct += 1 else: incorrect += 1 if actual == 0: low_quality_incorrect += 1 else: high_quality_incorrect += 1 result = instance.getLabel() network_vals = networks[0].getOutputValues() example = Instance(network_vals, Instance(network_vals.get(0))) error += measure.value(result, example) end = time.time() testing_time = end - start training_mse = error / len(train_instances) print("Low quality correct: " + str(low_quality_correct)) print("Low quality incorrect: " + str(low_quality_incorrect)) print("High quality correct: " + str(high_quality_correct)) print("High quality incorrect: " + str(high_quality_incorrect)) print("Training MSE: " + str(training_mse)) results += "\nResults for Training %s: \nCorrectly classified %d instances." % ( 'SA', correct) results += "\nIncorrectly classified Training %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) data = '{},{},{},{},{},{},{},{},{},{},{}\n'.format( iteration, cooling, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time, training_mse, low_quality_correct, low_quality_incorrect, high_quality_correct, high_quality_incorrect) print(data) with open("Results/NN/SA_Train.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 for instance in validate_instances: networks[0].setInputValues(instance.getData()) networks[0].run() actual = instance.getLabel().getContinuous() predicted = networks[0].getOutputValues().get(0) predicted = max(min(predicted, 1), 0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 results += "\nResults for Cross Validation %s: \nCorrectly classified %d instances." % ( 'SA', correct) results += "\nIncorrectly classified Cross Validation %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) data = '{},{},{},{},{},{}\n'.format( iteration, cooling, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time) print(data) with open("Results/NN/SA_Validate.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 error = 0 low_quality_correct = 0 low_quality_incorrect = 0 high_quality_correct = 0 high_quality_incorrect = 0 predicted_array = [] actual_array = [] for instance in test_instances: networks[0].setInputValues(instance.getData()) networks[0].run() actual = instance.getLabel().getContinuous() predicted = networks[0].getOutputValues().get(0) predicted = max(min(predicted, 1), 0) predicted_array.append(round(predicted)) actual_array.append(max(min(actual, 1), 0)) if abs(predicted - actual) < 0.5: correct += 1 if actual == 0: low_quality_correct += 1 else: high_quality_correct += 1 else: incorrect += 1 if actual == 0: low_quality_incorrect += 1 else: high_quality_incorrect += 1 result = instance.getLabel() network_vals = networks[0].getOutputValues() example = Instance(network_vals, Instance(network_vals.get(0))) error += measure.value(result, example) testing_mse = error / len(test_instances) print("Low quality correct: " + str(low_quality_correct)) print("Low quality incorrect: " + str(low_quality_incorrect)) print("High quality correct: " + str(high_quality_correct)) print("High quality incorrect: " + str(high_quality_incorrect)) print("Testing MSE: " + str(testing_mse)) results += "\nResults for Testing %s: \nCorrectly classified %d instances." % ( "SA", correct) results += "\nIncorrectly classified Testing %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) data = '{},{},{},{},{},{},{},{},{},{},{}\n'.format( iteration, cooling, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time, testing_mse, low_quality_correct, low_quality_incorrect, high_quality_correct, high_quality_incorrect) print(data) with open("Results/NN/SA_Test.csv", 'a') as f: f.write(data) print results
def main(): train_instances = initialize_instances('wine_train.csv') validate_instances = initialize_instances('wine_validate.csv') test_instances = initialize_instances('wine_test.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_instances) iteration_list = [10, 100, 500, 1000, 2500, 5000] with open("Results/NN/RHC_Train.csv", 'w') as f: f.write('iterations,fitness,accuracy,train_time,test_time\n') with open("Results/NN/RHC_Validate.csv", 'w') as f: f.write('iterations,fitness,accuracy,train_time,test_time\n') with open("Results/NN/RHC_Test.csv", 'w') as f: f.write('iterations,fitness,accuracy,train_time,test_time\n') networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork([11, 22, 1], RELU()) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) for i in range(len(iteration_list)): iteration = iteration_list[i] start = time.time() correct = 0 incorrect = 0 train(oa[0], networks[0], oa_names[0], train_instances, measure,iteration) end = time.time() training_time = end - start optimal_instance = oa[0].getOptimal() networks[0].setWeights(optimal_instance.getData()) start = time.time() for instance in train_instances: networks[0].setInputValues(instance.getData()) networks[0].run() predicted = instance.getLabel().getContinuous() actual = networks[0].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for Training %s: \nCorrectly classified %d instances." % ('RHC', correct) results += "\nIncorrectly classified Training %d instances.\nPercent correctly classified: %0.03f%%" % (incorrect, float(correct)/(correct+incorrect)*100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct)/(correct+incorrect)*100.0, training_time,testing_time) print(data) with open("Results/NN/RHC_Train.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 for instance in validate_instances: networks[0].setInputValues(instance.getData()) networks[0].run() predicted = instance.getLabel().getContinuous() actual = networks[0].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 results += "\nResults for Cross Validation %s: \nCorrectly classified %d instances." % ('RHC', correct) results += "\nIncorrectly classified Cross Validation %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time) print(data) with open("Results/NN/RHC_Validate.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 for instance in test_instances: networks[0].setInputValues(instance.getData()) networks[0].run() predicted = instance.getLabel().getContinuous() actual = networks[0].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 results += "\nResults for Testing %s: \nCorrectly classified %d instances." % ("RHC", correct) results += "\nIncorrectly classified Testing %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time) print(data) with open("Results/NN/RHC_Test.csv", 'a') as f: f.write(data) print results
def main(): """Run algorithms on the abalone dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm # oa_names = ["RHC", "SA", "GA"] oa_names = ["SA"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER_1, HIDDEN_LAYER_2, OUTPUT_LAYER], LogisticSigmoid()) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .8, nnop[0])) # oa.append(StandardGeneticAlgorithm(300, 150, 15, nnop[2])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 err_hist = train(oa[i], networks[i], oa_names[i], instances, measure) end = time.time() training_time = end - start # output error history EH_FILE = name+'_3000_0.8.csv' with open(EH_FILE, 'w') as f: writer = csv.writer(f) writer.writerows(err_hist) optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in instances: networks[i].setInputValues(instance.getData()) networks[i].run() y_true = instance.getLabel().getContinuous() y_prob = networks[i].getOutputValues().get(0) if abs(y_true - y_prob) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s: \nCorrectly classified %d instances." % (name, correct) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % (incorrect, float(correct)/(correct+incorrect)*100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) print results
def main(): """Run algorithms on the abalone dataset.""" train_instances = initialize_instances(TRAIN_FILE) test_instances = initialize_instances(TEST_FILE) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_instances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], train_instances, measure) end = time.time() training_time = end - start optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in test_instances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s: \nCorrectly classified %d instances." % ( name, correct) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) print results
def main(): """Run algorithms on the diabetes dataset.""" learningCurve_data = {} learning_curve_file = "NN_learning.pickle" numIterations_data = {} num_iterations_file = "NN_iterations.pickle" trainingInstances, testingInstances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(trainingInstances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) learningCurve_size = xrange(20, len(trainingInstances), 80) numIterations_iters = xrange(100, 5000, 150) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], trainingInstances, measure) end = time.time() training_time = end - start print "\nTraining time: %0.03f seconds" % (training_time, ) optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in trainingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() # print networks[i].getOutputValues() actual = networks[i].getOutputValues().get(0) # print predicted # print actual if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start print "\nTRAINING: Results for %s: \nCorrectly classified %d instances." % ( name, correct) print "\nTRAINING: Incorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) print "\nTRAINING: Testing time: %0.03f seconds\n" % (testing_time, ) correct = 0 incorrect = 0 start = time.time() for instance in testingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start print "\nTESTING: Results for %s: \nCorrectly classified %d instances." % ( name, correct) print "\nTESTING: Incorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) print "\nTESTING: Testing time: %0.03f seconds\n" % (testing_time, ) trainAccuracy = [] testAccuracy = [] for num in learningCurve_size: data_set = DataSet(trainingInstances[:num]) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) train(oa[i], networks[i], oa_names[i], trainingInstances, measure) optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) correct = 0 incorrect = 0 for instance in trainingInstances[:num]: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 trainAccuracy.append( float(correct) / (correct + incorrect) * 100.0) correct = 0 incorrect = 0 for instance in testingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 testAccuracy.append(float(correct) / (correct + incorrect) * 100.0) learningCurve_data[oa_names[i]] = [trainAccuracy, testAccuracy] trainAccuracy = [] testAccuracy = [] for num in numIterations_iters: data_set = DataSet(trainingInstances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) train(oa[i], networks[i], oa_names[i], trainingInstances, measure, TRAINING_ITERATIONS=num) optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) correct = 0 incorrect = 0 for instance in trainingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 trainAccuracy.append( float(correct) / (correct + incorrect) * 100.0) correct = 0 incorrect = 0 for instance in testingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 testAccuracy.append(float(correct) / (correct + incorrect) * 100.0) numIterations_data[oa_names[i]] = [trainAccuracy, testAccuracy] print "------------------------------------------------------------" import pickle with open("NN_learningCurveAccuracy.pickle", 'wb') as file: pickle.dump(learningCurve_data, file, pickle.HIGHEST_PROTOCOL) with open("NN_numIterationsAccuracy.pickle", 'wb') as file: pickle.dump(numIterations_data, file, pickle.HIGHEST_PROTOCOL) with open("NN_learningCurveSize.pickle", 'wb') as file: pickle.dump(learningCurve_size, file, pickle.HIGHEST_PROTOCOL) with open("NN_numIters.pickle", 'wb') as file: pickle.dump(numIterations_iters, file, pickle.HIGHEST_PROTOCOL)