def main(): train_instances = initialize_instances('wine_train.csv') validate_instances = initialize_instances('wine_validate.csv') test_instances = initialize_instances('wine_test.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_instances) iteration_list = [10, 100, 500, 1000, 2500] cooling_list = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95] networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["SA"] results = "" error = 0 low_quality_correct = 0 low_quality_incorrect = 0 high_quality_correct = 0 high_quality_incorrect = 0 predicted_array = [] actual_array = [] for name in oa_names: classification_network = factory.createClassificationNetwork( [11, 22, 1], RELU()) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) with open("Results/NN/SA_Train.csv", 'w') as f: f.write( 'iterations,cooling,fitness,accuracy,train_time,test_time,mse,low_correct,low_incorrect,high_correct,high_incorrect\n' ) with open("Results/NN/SA_Validate.csv", 'w') as f: f.write('iterations,cooling,fitness,accuracy,train_time,test_time\n') with open("Results/NN/SA_Test.csv", 'w') as f: f.write( 'iterations,cooling,fitness,accuracy,train_time,test_time,mse,low_correct,low_incorrect,high_correct,high_incorrect\n' ) for p in range(len(cooling_list)): for i in range(len(iteration_list)): cooling = cooling_list[p] iteration = iteration_list[i] start = time.time() correct = 0 incorrect = 0 sim = SimulatedAnnealing(1E11, cooling, nnop[0]) train(sim, networks[0], oa_names[0], train_instances, measure, iteration) end = time.time() training_time = end - start optimal_instance = sim.getOptimal() networks[0].setWeights(optimal_instance.getData()) start = time.time() for instance in train_instances: networks[0].setInputValues(instance.getData()) networks[0].run() actual = instance.getLabel().getContinuous() predicted = networks[0].getOutputValues().get(0) predicted = max(min(predicted, 1), 0) predicted_array.append(round(predicted)) actual_array.append(max(min(actual, 1), 0)) if abs(predicted - actual) < 0.5: correct += 1 if actual == 0: low_quality_correct += 1 else: high_quality_correct += 1 else: incorrect += 1 if actual == 0: low_quality_incorrect += 1 else: high_quality_incorrect += 1 result = instance.getLabel() network_vals = networks[0].getOutputValues() example = Instance(network_vals, Instance(network_vals.get(0))) error += measure.value(result, example) end = time.time() testing_time = end - start training_mse = error / len(train_instances) print("Low quality correct: " + str(low_quality_correct)) print("Low quality incorrect: " + str(low_quality_incorrect)) print("High quality correct: " + str(high_quality_correct)) print("High quality incorrect: " + str(high_quality_incorrect)) print("Training MSE: " + str(training_mse)) results += "\nResults for Training %s: \nCorrectly classified %d instances." % ( 'SA', correct) results += "\nIncorrectly classified Training %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) data = '{},{},{},{},{},{},{},{},{},{},{}\n'.format( iteration, cooling, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time, training_mse, low_quality_correct, low_quality_incorrect, high_quality_correct, high_quality_incorrect) print(data) with open("Results/NN/SA_Train.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 for instance in validate_instances: networks[0].setInputValues(instance.getData()) networks[0].run() actual = instance.getLabel().getContinuous() predicted = networks[0].getOutputValues().get(0) predicted = max(min(predicted, 1), 0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 results += "\nResults for Cross Validation %s: \nCorrectly classified %d instances." % ( 'SA', correct) results += "\nIncorrectly classified Cross Validation %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) data = '{},{},{},{},{},{}\n'.format( iteration, cooling, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time) print(data) with open("Results/NN/SA_Validate.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 error = 0 low_quality_correct = 0 low_quality_incorrect = 0 high_quality_correct = 0 high_quality_incorrect = 0 predicted_array = [] actual_array = [] for instance in test_instances: networks[0].setInputValues(instance.getData()) networks[0].run() actual = instance.getLabel().getContinuous() predicted = networks[0].getOutputValues().get(0) predicted = max(min(predicted, 1), 0) predicted_array.append(round(predicted)) actual_array.append(max(min(actual, 1), 0)) if abs(predicted - actual) < 0.5: correct += 1 if actual == 0: low_quality_correct += 1 else: high_quality_correct += 1 else: incorrect += 1 if actual == 0: low_quality_incorrect += 1 else: high_quality_incorrect += 1 result = instance.getLabel() network_vals = networks[0].getOutputValues() example = Instance(network_vals, Instance(network_vals.get(0))) error += measure.value(result, example) testing_mse = error / len(test_instances) print("Low quality correct: " + str(low_quality_correct)) print("Low quality incorrect: " + str(low_quality_incorrect)) print("High quality correct: " + str(high_quality_correct)) print("High quality incorrect: " + str(high_quality_incorrect)) print("Testing MSE: " + str(testing_mse)) results += "\nResults for Testing %s: \nCorrectly classified %d instances." % ( "SA", correct) results += "\nIncorrectly classified Testing %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) data = '{},{},{},{},{},{},{},{},{},{},{}\n'.format( iteration, cooling, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time, testing_mse, low_quality_correct, low_quality_incorrect, high_quality_correct, high_quality_incorrect) print(data) with open("Results/NN/SA_Test.csv", 'a') as f: f.write(data) print results
def main(): """Run algorithms on the dataset.""" instances = initialize_train_instances() test_instances = initialize_test_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] for i in range(3): results = "" print "--------- ", str(HIDDEN_LAYER[i]), " HIDDEN LAYERS ---------" for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER[i], OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], instances, measure, test_instances) end = time.time() training_time = end - start optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in instances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s: \nCorrectly classified %d instances." % ( name, correct) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) print results
def main(): """Run algorithms on the cancer dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) max_iterations = TRAINING_ITERATIONS hidden_layer_size = HIDDEN_LAYER networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" RandomOrderFilter().filter(data_set) train_test_split = TestTrainSplitFilter(TRAIN_TEST_SPLIT) train_test_split.filter(data_set) train_set = train_test_split.getTrainingSet() test_set = train_test_split.getTestingSet() for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, hidden_layer_size, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(train_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(SA_START_TEMPERATURE, SA_COOLING, nnop[1])) oa.append( StandardGeneticAlgorithm(GA_POPULATION_SIZE, GA_MATE_EACH_GEN, GA_MUTATE_EACH_GEN, nnop[2])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], train_set, test_set, measure, max_iterations=max_iterations) end = time.time() training_time = end - start optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in test_set.getInstances(): networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start _results = "" _results += "\n[%s] hidden_layer=%d, iterations=%d" % ( name, hidden_layer_size, max_iterations) _results += "\nResults for %s: \nCorrectly classified %d instances." % ( name, correct) _results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) _results += "\nTraining time: %0.03f seconds" % (training_time, ) _results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) with open('out/log/%s.log' % (oa_names[i]), 'w') as f: f.write(_results) results += _results print results
def main(): """Run algorithms on the cancer dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) max_iterations = TRAINING_ITERATIONS hidden_layer_size = HIDDEN_LAYER # for _hidden_layer in xrange(HIDDEN_LAYER): # hidden_layer_size = _hidden_layer + 1 network = None # BackPropagationNetwork nnop = None # NeuralNetworkOptimizationProblem oa = None # OptimizationAlgorithm results = "" for mate_each_gen in [25, 50, 100]: RandomOrderFilter().filter(data_set) train_test_split = TestTrainSplitFilter(TRAIN_TEST_SPLIT) train_test_split.filter(data_set) train_set = train_test_split.getTrainingSet() test_set = train_test_split.getTestingSet() network = factory.createClassificationNetwork( [INPUT_LAYER, hidden_layer_size, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(train_set, network, measure) oa = StandardGeneticAlgorithm(GA_POPULATION_SIZE, mate_each_gen, GA_MUTATE_EACH_GEN, nnop) start = time.time() correct = 0 incorrect = 0 train(oa, network, "GA", train_set, test_set, measure, mate_each_gen) end = time.time() training_time = end - start optimal_instance = oa.getOptimal() network.setWeights(optimal_instance.getData()) start = time.time() for instance in test_set.getInstances(): network.setInputValues(instance.getData()) network.run() predicted = instance.getLabel().getContinuous() actual = network.getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start _results = "" _results += "\n[GA] mating=%0.02f" % (mate_each_gen) _results += "\nResults for GA: \nCorrectly classified %d instances." % ( correct) _results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) _results += "\nTraining time: %0.03f seconds" % (training_time, ) _results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) with open('out/ga/mating-%d.log' % (mate_each_gen), 'w') as f: f.write(_results) results += _results print results
def main(): """Run algorithms on the diabetes dataset.""" learningCurve_data = {} learning_curve_file = "NN_learning.pickle" numIterations_data = {} num_iterations_file = "NN_iterations.pickle" trainingInstances, testingInstances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(trainingInstances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) learningCurve_size = xrange(20, len(trainingInstances), 80) numIterations_iters = xrange(100, 5000, 150) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], trainingInstances, measure) end = time.time() training_time = end - start print "\nTraining time: %0.03f seconds" % (training_time, ) optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in trainingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() # print networks[i].getOutputValues() actual = networks[i].getOutputValues().get(0) # print predicted # print actual if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start print "\nTRAINING: Results for %s: \nCorrectly classified %d instances." % ( name, correct) print "\nTRAINING: Incorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) print "\nTRAINING: Testing time: %0.03f seconds\n" % (testing_time, ) correct = 0 incorrect = 0 start = time.time() for instance in testingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start print "\nTESTING: Results for %s: \nCorrectly classified %d instances." % ( name, correct) print "\nTESTING: Incorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) print "\nTESTING: Testing time: %0.03f seconds\n" % (testing_time, ) trainAccuracy = [] testAccuracy = [] for num in learningCurve_size: data_set = DataSet(trainingInstances[:num]) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) train(oa[i], networks[i], oa_names[i], trainingInstances, measure) optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) correct = 0 incorrect = 0 for instance in trainingInstances[:num]: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 trainAccuracy.append( float(correct) / (correct + incorrect) * 100.0) correct = 0 incorrect = 0 for instance in testingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 testAccuracy.append(float(correct) / (correct + incorrect) * 100.0) learningCurve_data[oa_names[i]] = [trainAccuracy, testAccuracy] trainAccuracy = [] testAccuracy = [] for num in numIterations_iters: data_set = DataSet(trainingInstances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) train(oa[i], networks[i], oa_names[i], trainingInstances, measure, TRAINING_ITERATIONS=num) optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) correct = 0 incorrect = 0 for instance in trainingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 trainAccuracy.append( float(correct) / (correct + incorrect) * 100.0) correct = 0 incorrect = 0 for instance in testingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 testAccuracy.append(float(correct) / (correct + incorrect) * 100.0) numIterations_data[oa_names[i]] = [trainAccuracy, testAccuracy] print "------------------------------------------------------------" import pickle with open("NN_learningCurveAccuracy.pickle", 'wb') as file: pickle.dump(learningCurve_data, file, pickle.HIGHEST_PROTOCOL) with open("NN_numIterationsAccuracy.pickle", 'wb') as file: pickle.dump(numIterations_data, file, pickle.HIGHEST_PROTOCOL) with open("NN_learningCurveSize.pickle", 'wb') as file: pickle.dump(learningCurve_size, file, pickle.HIGHEST_PROTOCOL) with open("NN_numIters.pickle", 'wb') as file: pickle.dump(numIterations_iters, file, pickle.HIGHEST_PROTOCOL)
def main(trainfile, testfile, validfile, oa_name, i, params): print("== [{}] ==".format(oa_name)) res = {} #for i in range(25): res[i] = {} if i == 9: print("Invalid i %d" % (i)) sys.exit(1) print("LABEL: {}".format(i)) traininstances = initialize_instances(trainfile, i) testinstances = initialize_instances(testfile, i) validinstances = initialize_instances(validfile, i) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(traininstances) rule = RPROPUpdateRule() # was networks[] classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = None # was oa = [] suffix = "" if oa_name == "BP": oa = BatchBackPropagationTrainer(data_set, classification_network, measure, rule) if oa_name == "RHC": oa = RandomizedHillClimbing(nnop) if oa_name == "SA": suffix = '-' + '-'.join(params) oa = SimulatedAnnealing(float(params[0]), float(params[1]), nnop) if oa_name == "GA": suffix = '-' + '-'.join(params) oa = StandardGeneticAlgorithm(int(params[0]), int(params[1]), int(params[2]), nnop) ttvinstances = { 'train': traininstances, 'test': testinstances, 'valid': validinstances } train_start = timeit.default_timer() train(oa, classification_network, oa_name, ttvinstances, measure, i, suffix) train_end = timeit.default_timer() print 'train time: %d secs' % (int(train_end - train_start)) if oa_name != "BP": optimal_instance = oa.getOptimal() classification_network.setWeights(optimal_instance.getData()) ttvinstances = { 'train': traininstances, 'valid': validinstances, 'test': testinstances } for key, instances in zip(ttvinstances.keys(), ttvinstances.values()): query_start = timeit.default_timer() tp = 0. fp = 0. fn = 0. tn = 0. precision = 0. recall = 0. f1 = 0. print "scoring %s..." % (key) for instance in instances: classification_network.setInputValues(instance.getData()) classification_network.run() actual = instance.getLabel().getContinuous() predicted = classification_network.getOutputValues().get(0) #print ('actual = %.3f, predicted = %.3f' % (actual, predicted)) if actual == 1.: if predicted >= 0.5: tp += 1. else: fn += 1. else: if predicted >= 0.5: fp += 1. else: tn += 1. query_end = timeit.default_timer() if tp + fp > 0.: precision = tp / (tp + fp) if fn + tp > 0.: recall = tp / (fn + tp) if precision + recall > 0.: f1 = 2. * precision * recall / (precision + recall) correct = tp + tn total = correct + fp + fn print "%s f1 = %0.10f" % (key, f1) print "%s accuracy = %0.10f" % (key, correct / total) print "%s query time: %d secs" % (key, int(query_end - query_start))
def main(): """Run algorithms on the abalone dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm # oa_names = ["RHC", "SA", "GA"] oa_names = ["SA"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER_1, HIDDEN_LAYER_2, OUTPUT_LAYER], LogisticSigmoid()) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .8, nnop[0])) # oa.append(StandardGeneticAlgorithm(300, 150, 15, nnop[2])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 err_hist = train(oa[i], networks[i], oa_names[i], instances, measure) end = time.time() training_time = end - start # output error history EH_FILE = name+'_3000_0.8.csv' with open(EH_FILE, 'w') as f: writer = csv.writer(f) writer.writerows(err_hist) optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in instances: networks[i].setInputValues(instance.getData()) networks[i].run() y_true = instance.getLabel().getContinuous() y_prob = networks[i].getOutputValues().get(0) if abs(y_true - y_prob) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s: \nCorrectly classified %d instances." % (name, correct) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % (incorrect, float(correct)/(correct+incorrect)*100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) print results
def main(): train_instances = initialize_instances('wine_train.csv') validate_instances = initialize_instances('wine_validate.csv') test_instances = initialize_instances('wine_test.csv') factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_instances) iteration_list = [10, 100, 500, 1000, 2500, 5000] with open("Results/NN/RHC_Train.csv", 'w') as f: f.write('iterations,fitness,accuracy,train_time,test_time\n') with open("Results/NN/RHC_Validate.csv", 'w') as f: f.write('iterations,fitness,accuracy,train_time,test_time\n') with open("Results/NN/RHC_Test.csv", 'w') as f: f.write('iterations,fitness,accuracy,train_time,test_time\n') networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork([11, 22, 1], RELU()) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) for i in range(len(iteration_list)): iteration = iteration_list[i] start = time.time() correct = 0 incorrect = 0 train(oa[0], networks[0], oa_names[0], train_instances, measure,iteration) end = time.time() training_time = end - start optimal_instance = oa[0].getOptimal() networks[0].setWeights(optimal_instance.getData()) start = time.time() for instance in train_instances: networks[0].setInputValues(instance.getData()) networks[0].run() predicted = instance.getLabel().getContinuous() actual = networks[0].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for Training %s: \nCorrectly classified %d instances." % ('RHC', correct) results += "\nIncorrectly classified Training %d instances.\nPercent correctly classified: %0.03f%%" % (incorrect, float(correct)/(correct+incorrect)*100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct)/(correct+incorrect)*100.0, training_time,testing_time) print(data) with open("Results/NN/RHC_Train.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 for instance in validate_instances: networks[0].setInputValues(instance.getData()) networks[0].run() predicted = instance.getLabel().getContinuous() actual = networks[0].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 results += "\nResults for Cross Validation %s: \nCorrectly classified %d instances." % ('RHC', correct) results += "\nIncorrectly classified Cross Validation %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time) print(data) with open("Results/NN/RHC_Validate.csv", 'a') as f: f.write(data) correct = 0 incorrect = 0 for instance in test_instances: networks[0].setInputValues(instance.getData()) networks[0].run() predicted = instance.getLabel().getContinuous() actual = networks[0].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 results += "\nResults for Testing %s: \nCorrectly classified %d instances." % ("RHC", correct) results += "\nIncorrectly classified Testing %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time,) results += "\nTesting time: %0.03f seconds\n" % (testing_time,) data = '{},{},{},{},{}\n'.format(iteration, correct, float(correct) / (correct + incorrect) * 100.0, training_time, testing_time) print(data) with open("Results/NN/RHC_Test.csv", 'a') as f: f.write(data) print results
def main(): """Run algorithms on the abalone dataset.""" ## for optimizers with default setting # for n in range(REPEAT): # instances = initialize_instances(INPUT_FILE)[:5000] # # factory = BackPropagationNetworkFactory() # measure = SumOfSquaresError() # data_set = DataSet(instances) # # networks = [] # BackPropagationNetwork # nnop = [] # NeuralNetworkOptimizationProblem # oa = [] # OptimizationAlgorithm # oa_names = ["RHC", "SA", "GA"] # results = "" # # for name in oa_names: # classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) # networks.append(classification_network) # nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # # oa = [RandomizedHillClimbing(nnop[0]), # SimulatedAnnealing(1E11, .95, nnop[1]), # StandardGeneticAlgorithm(200, 100, 10, nnop[2])] # # # for i, name in enumerate(oa_names): # round_start = time.time() # if name == "GA" and n >= int(REPEAT/2): # continue # # iterdata = train(oa[i], networks[i], oa_names[i], instances,measure) # output_name = name + "_ANN_{}.csv".format(n) # round_end = time.time() # with open(output_name,'wb') as resultFile: # wr = csv.writer(resultFile, dialect='excel') # wr.writerows(iterdata) # print output_name, " : ",round_end - round_start,"seconds" for n in range(REPEAT): instances = initialize_instances(INPUT_FILE)[:5000] factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm # oa_names = ["1e10","1e12", "1e13", "1e15"] # oa_names = ["cf0.1","cf0.25","cf0.5", "cf0.75"] oa_names = ["toMutate20", "toMutate50", "toMutate100", "toMutate180"] results = "" for name in oa_names: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # oa = [SimulatedAnnealing(1E10, .95, nnop[0]), # SimulatedAnnealing(1E12, .95, nnop[1]), # SimulatedAnnealing(1E13, .95, nnop[2]), # SimulatedAnnealing(1E15, .95, nnop[3])] # oa = [SimulatedAnnealing(1E11, .1, nnop[0]), # SimulatedAnnealing(1E11, .25, nnop[1]), # SimulatedAnnealing(1E11, .5, nnop[2]), # SimulatedAnnealing(1E11, .75, nnop[3])] oa = [ StandardGeneticAlgorithm(200, 100, 20, nnop[0]), StandardGeneticAlgorithm(200, 100, 50, nnop[1]), StandardGeneticAlgorithm(200, 100, 100, nnop[2]), StandardGeneticAlgorithm(200, 100, 180, nnop[3]) ] for i, name in enumerate(oa_names): round_start = time.time() # if name == "GA" and n >= int(REPEAT/2): # continue # iterdata = train(oa[i], networks[i], oa_names[i], instances, measure) output_name = name + "_ANN_{}.csv".format(n) round_end = time.time() with open(output_name, 'wb') as resultFile: wr = csv.writer(resultFile, dialect='excel') wr.writerows(iterdata) print output_name, " : ", round_end - round_start, "seconds"
def main(): trainingInstances, testingInstances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(trainingInstances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA_15", "SA_35", "SA_55", "SA_75", "SA_95"] #oa_names=["GA_100_50_5", "GA_200_50_5", "GA_100_50_10", "GA_200_50_10", "GA_100_100_5", "GA_200_100_5", "GA_100_100_10", "GA_200_100_10"] #oa_names=["GA_200_100_5", "GA_100_100_10", "GA_200_100_10"] for name in oa_names: #use RELU activation function classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER], ReLU()) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .15, nnop[1])) oa.append(SimulatedAnnealing(1E11, .35, nnop[2])) oa.append(SimulatedAnnealing(1E11, .55, nnop[3])) oa.append(SimulatedAnnealing(1E11, .75, nnop[4])) oa.append(SimulatedAnnealing(1E11, .95, nnop[5])) # oa.append(StandardGeneticAlgorithm(100, 50, 5, nnop[0])) # oa.append(StandardGeneticAlgorithm(200, 50, 5, nnop[1])) # oa.append(StandardGeneticAlgorithm(100, 50, 10, nnop[2])) # oa.append(StandardGeneticAlgorithm(200, 50, 10, nnop[3])) # oa.append(StandardGeneticAlgorithm(100, 100, 5, nnop[4])) #oa.append(StandardGeneticAlgorithm(200, 100, 5, nnop[0])) #oa.append(StandardGeneticAlgorithm(100, 100, 10, nnop[1])) #oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) with open('nn_spam_results_RHC_SA.csv', 'w') as csvfile: writer = csv.writer(csvfile) for i, name in enumerate(oa_names): results = '' start = time.time() traincorrect = 0 trainincorrect = 0 testcorrect = 0 testincorrect = 0 train(oa[i], networks[i], oa_names[i], trainingInstances, testingInstances, measure) end = time.time() training_time = end - start optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in trainingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: traincorrect += 1 else: trainincorrect += 1 for instance in testingInstances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: testcorrect += 1 else: testincorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s: \nCorrectly classified %d training instances." % ( name, traincorrect) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( trainincorrect, float(traincorrect) / (traincorrect + trainincorrect) * 100.0) results += "\nResults for %s: \nCorrectly classified %d testing instances." % ( name, testcorrect) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( testincorrect, float(testcorrect) / (testcorrect + testincorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) print results writer.writerow([results]) writer.writerow('')
def main(): """Run algorithms on the abalone dataset.""" train_instances = initialize_instances() test_instances = initialize_instances(test=True) factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_instances) networks = [] # BackPropagationNetwork oa = [] # OptimizationAlgorithm oa_names = [] if do_rhc: oa_names.append("RHC") if do_sa: oa_names.append("SA") if do_ga: oa_names.append("GA") if do_bp: oa_names.append("BP") results = "" # For each algo, need to see if we are doing sweeps # No need to sweep rhc as there are no parameters if do_rhc and sweep == False: training_iter = TRAINING_ITERATIONS if do_fmnist: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) if do_chess: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = RandomizedHillClimbing(nnop) name = "RHC" train(oa, classification_network, name, train_instances, measure, training_iter, test_instances, True) if do_sa: training_iter = TRAINING_ITERATIONS count = 0 for temp, cooling in product(sa_temp, sa_cooling): if do_fmnist: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) if do_chess: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) oa = SimulatedAnnealing(temp, cooling, nnop) name = "SA_sweep" if count == 0: print_head = True else: print_head = False train(oa, classification_network, name, train_instances, measure, training_iter, test_instances, print_head, temp, cooling) count += 1 if do_ga: training_iter = GA_TRAINING_ITERATIONS count = 0 for pop, prop_mate, prop_mutate in product(ga_pop, ga_prop_mate, ga_prop_mutate): if do_fmnist: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) if do_chess: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER]) nnop = NeuralNetworkOptimizationProblem(data_set, classification_network, measure) mate = int(math.floor(pop * prop_mate)) mutate = int(math.floor(pop * prop_mutate)) oa = StandardGeneticAlgorithm(pop, mate, mutate, nnop) name = "GA_sweep" if count == 0: print_head = True else: print_head = False train(oa, classification_network, name, train_instances, measure, training_iter, test_instances, print_head, pop, prop_mate, prop_mutate) count += 1 if do_bp and sweep == False: training_iter = TRAINING_ITERATIONS if do_fmnist: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) if do_chess: classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER]) oa = BatchBackPropagationTrainer(data_set, classification_network, measure, RPROPUpdateRule()) name = "BP" train(oa, classification_network, name, train_instances, measure, training_iter, test_instances, True)
def main(): """Run algorithms on the gamma dataset.""" instances = initialize_instances() factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(instances) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["GA"] results = "" graph_x = "" graph_y = "" classification_network = factory.createClassificationNetwork( [INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]) networks.append(classification_network) nnop.append( NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) population = [100, 50, 10] mate = [50, 25, 5] mutate = [25, 10, 2] for k in range(3): oa.append( StandardGeneticAlgorithm(population[k], mate[k], mutate[k], nnop[0])) for i, name in enumerate(oa_names): start = time.time() correct = 0 incorrect = 0 train(oa[i], networks[i], oa_names[i], instances, measure) end = time.time() training_time = end - start print(population[k], mate[k], mutate[k]) optimal_instance = oa[i].getOptimal() networks[i].setWeights(optimal_instance.getData()) start = time.time() for instance in instances: networks[i].setInputValues(instance.getData()) networks[i].run() predicted = instance.getLabel().getContinuous() actual = networks[i].getOutputValues().get(0) if abs(predicted - actual) < 0.5: correct += 1 else: incorrect += 1 end = time.time() testing_time = end - start results += "\nResults for %s, %s, %s, %s: \nCorrectly classified %d instances." % ( name, population[k], mate[k], mutate[k], correct) results += "\nIncorrectly classified %d instances.\nPercent correctly classified: %0.03f%%" % ( incorrect, float(correct) / (correct + incorrect) * 100.0) results += "\nTraining time: %0.03f seconds" % (training_time, ) results += "\nTesting time: %0.03f seconds\n" % (testing_time, ) graph_x += "\npopulation:'%s', mate:'%s', mutate:'%s'" % ( population[k], mate[k], mutate[k]) graph_y += ",%0.2f" % ((float(correct) / (correct + incorrect) * 100.0)) #print graph_x print graph_y
def main(): """ Run algorithms on the gamma dataset. Essentially ran twice for 2-fold cross validation Metrics are evaluated outside of this file """ train_data = initialize_instances(TRAIN_FILE) test_data = initialize_instances(TEST_FILE) # Get data factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_data) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" # Create each network architecture and an optimization instance for name in oa_names: activation = RELU() # Change network size classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], activation) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # Randomized Optimzation Algos oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) # Go through each optimization problem and do 2-fold CV for i, name in enumerate(oa_names): start = time.time() metrics = train(oa[i], networks[i], oa_names[i], train_data, test_data, measure) end = time.time() training_time = end - start results += "\nFold 1 train time: %0.03f seconds" % (training_time,) # Write data to CSV file with open("metrics/" + oa_names[i] + '_f1.csv', 'w') as f: writer = csv.writer(f) for metric in metrics: writer.writerow(metric) print results # 2nd fold; train_data = initialize_instances(TEST_FILE) test_data = initialize_instances(TRAIN_FILE) # Get data factory = BackPropagationNetworkFactory() measure = SumOfSquaresError() data_set = DataSet(train_data) networks = [] # BackPropagationNetwork nnop = [] # NeuralNetworkOptimizationProblem oa = [] # OptimizationAlgorithm oa_names = ["RHC", "SA", "GA"] results = "" # Create each network architecture and an optimization instance for name in oa_names: activation = RELU() # Change network size classification_network = factory.createClassificationNetwork([INPUT_LAYER, HIDDEN_LAYER1, HIDDEN_LAYER2, OUTPUT_LAYER], activation) networks.append(classification_network) nnop.append(NeuralNetworkOptimizationProblem(data_set, classification_network, measure)) # Randomized Optimzation Algos oa.append(RandomizedHillClimbing(nnop[0])) oa.append(SimulatedAnnealing(1E11, .95, nnop[1])) oa.append(StandardGeneticAlgorithm(200, 100, 10, nnop[2])) # Go through each optimization problem and do 2-fold CV for i, name in enumerate(oa_names): start = time.time() metrics = train(oa[i], networks[i], oa_names[i], train_data, test_data, measure) end = time.time() training_time = end - start results += "\nFold 1 train time: %0.03f seconds" % (training_time,) # Write data to CSV file with open("metrics/" + oa_names[i] + '_f2.csv', 'w') as f: writer = csv.writer(f) for metric in metrics: writer.writerow(metric) print results