def main(c = "decision_tree", option = "IG", dataset = "iris", ratio = 0.8): classifier_types = {0: "decision_tree", 1: "naive_bayes", 2: "neural_net"} options = {0:["IG", "IGR"], 1:["normal"], 2:["shallow", "medium"]} ratio = float(ratio) if dataset == "monks": (training, test) = load_data.load_monks(ratio) elif dataset == "congress": (training, test) = load_data.load_congress_data(ratio) elif dataset == "iris": (training, test) = load_data.load_iris(ratio) else: print "Error: Cannot find dataset name." return print "Training... Please hold." # classifier_types = {0: "decision_tree", 2: "neural_net"} # options = {0:["IG", "IGR"], 2:["shallow", "medium"]} # (training, test) = load_data.load_iris(0.8) # nn_classifier = Classifier(classifier_type="neural_net", option = "medium") # nn_classifier.train(training) # nn_classifier.test(test) # print test # (training, test) = load_data.load_congress_data(0.8) # print test # (training, test) = load_data.load_monks(1) # print test # (training, test) = load_data.load_iris(0.8) # print training # "option = IG/IGR" # dt_classifier = Classifier(classifier_type="decision_tree", weights=[], option="IG") # dt_classifier.train(training) # dt_classifier.test(test) # for i, c in classifier_types.iteritems(): # for option in options[i]: print " " print "=================================================================" print "Dataset = ", dataset print "Classifier = ", c print "Option = ", option classifier = Classifier(classifier_type=c, weights = [], option = option) classifier.train(training) classifier.test(test) print "=================================================================" print " " # option value could be either shallow(3 layers) or medium(5) # nn_classifier = Classifier(classifier_type="neural_net", option = "medium") # nn_classifier.train(training) # nn_classifier.test(test) return
def trainNtest(args): classifierType = ["decision_tree", "naive_bayes", "neural_network"] data_set = ["congress", "monk", "iris"] data = "" if len(args) == 4: if args[0][3:] == "congress": data = ld.load_congress_data(int(args[1][3:]) / 100.0) num_input = 16 num_output = 2 elif args[0][3:] == "monk": data = ld.load_monks(int(args[1])) num_input = 6 num_output = 2 elif args[0][3:] == "iris": data = ld.load_iris(int(args[1][3:]) / 100.0) num_input = 4 num_output = 3 else: print "INVALID DATA NAME" return method_num = int(args[2][3]) kwargs = {} if method_num == 0 or method_num == 2: kwargs[1] = args[2][5] kwargs[2] = args[2][7] classifier = Classifier(classifierType[int(args[2][3])], one=args[2][5], two=args[2][7], num_input=num_input, num_output=num_output) else: classifier = Classifier(classifierType[int(args[2][3])]) else: print "ERROR: NEED 4 PARAMETERS" return #pdb.set_trace() #nb = Naive_Bayes("naive_bayes") #classifier = Classifier(classifierType[1]) #data = ld.load_congress_data(.85) #data = ld.load_iris(.70) #pdb.set_trace() classifier.train(data[0]) if args[3] == "-test": classifier.test(data[1]) else: classifier.test(data[0])
import numpy as np import sys import load_data import classifier as c """ This is the main python method that will be run. You should determine what sort of command line arguments you want to use. But in this module you will need to 1) initialize your classifier and its params 2) load training/test data 3) train the algorithm 4) test it and output the desired statistics. """ result = [] (training, test) = load_data.load_congress_data() c1 = c("naive_bayes") c2 = c("neural") c2.train(result) c1.train(result) c1.test(result)
from decision_tree import Decision_Tree from naive_bayes import Naive_Bayes import load_data as ld import pdb # nb = Decision_Tree("decision_tree", pruning=False, info_gain_ratio=True) nb = Naive_Bayes("naive_bayes") # monks3 = ld.load_monks(1) # monks3 = ld.load_monks(2) # monks3 = ld.load_monks(3) # monks3 = ld.load_iris(.75) monks3 = ld.load_congress_data(0.75) # nb.train(monks3[0]) """tot, hit = 0, 0 for person in monks3[1]: predict = nb.predict(person) if predict == person[0]: hit += 1 tot += 1""" # classify = nb.train(monks3[0]) nb.train(monks3[0]) # nb.train(monks3[0]) # nb.train(monks3[0]) # nb.train(monks3[0]) # pdb.set_trace() # nb.test(monks3[1])
from nn import NN from nb import NB from nb import GNB from tree import DT import random import math import numpy.matlib # number of class of each data set ncIris = 3 ncCongress = 2 ncMonks = 2 # Dataset reading trainSetIris = np.matrix(load_iris(0.7)[0]) testSetIris = np.matrix(load_iris(0.7)[1]) trainSetCongress = np.matrix(load_congress_data(0.7)[0]) testSetCongress = np.matrix(load_congress_data(0.7)[1]) trainM1 = np.matrix(load_monks(1)[0]) testM1 = np.matrix(load_monks(1)[1]) trainM2 = np.matrix(load_monks(2)[0]) testM2 = np.matrix(load_monks(2)[1]) trainM3 = np.matrix(load_monks(3)[0]) testM3 = np.matrix(load_monks(3)[1]) # Decision Tree on Congress using IG print "1. DT, Congress, IG--------------------------------" model0 = DT() model0.train(trainSetCongress, ncCongress, 1) model0.test(testSetCongress, ncCongress, 1) # Prune model0.prune(trainSetCongress, ncCongress)
4) test it and output the desired statistics. """ classifier_type = sys.argv[1] if classifier_type == "decision_tree": data = sys.argv[3] else: data = sys.argv[2] params = dict() (train_data, test_data) = (None, None) if data=="congress": (train_data, test_data) = load_data.load_congress_data(0.7) elif data=="iris": (train_data, test_data) = load_data.load_iris(0.7) elif data=="monk": i = 3 if classifier_type == "decision_tree": i = 4 numb = sys.argv[i] (train_data, test_data) = load_data.load_monks(int(numb)) params = dict() if classifier_type == "neural_network": i=3 if data=="monk": i=4
import numpy as np def sigmoid(x): return 1.0/(1.0 + np.exp(-x)) def sigmoid_derivative(x): return sigmoid(x)*(1.0-sigmoid(x)) class NeuralNetwork: def __init__(self, layers): self.activation = sigmoid self.activation_prime = sigmoid_derivative self.weights = [] print(layers) print(range(1,len(layers)-1)) for i in range(1, len(layers) - 1): r = 2*np.random.random((layers[i-1] + 1, layers[i] + 1)) -1 self.weights.append(r) r = 2*np.random.random( (layers[i] + 1, layers[i+1])) - 1 self.weights.append(r) # print(self.weights) def train(self, X, learning_rate=0.2, epochs=80000): y = X[:,0] X = np.delete(X,[0],axis=1) if __name__ == '__main__': X,Y = load_data.load_congress_data(0.7) (rows,cols) = X.shape nn = NeuralNetwork([cols-1,(cols-1),1])
from neural_network import Neural_Network import load_data as ld import pdb nb = Neural_Network("neural_network",weights = [], num_input=16, num_hidden=1000, num_output=2) #neural_net = Classifier(weights = [], num_input=30, num_hidden=10, num_output=3) data = ld.load_congress_data(.85) #data = ld.load_iris(.75) #data = ld.load_monks(3) classify = nb.train(data[0]) #nb.train(iris[0]) #pdb.set_trace() #nb.test(congress[1]) tot, hit = 0, 0 ones = 0 zeros = 0 twos = 0 for person in data[1]: predict = nb.predict(person) if predict == person[0]: hit += 1 tot += 1 if predict == 1: ones += 1 elif predict == 0: zeros += 1
from naive_bayes import Naive_Bayes import load_data as ld import pdb nb = Naive_Bayes("naive_bayes") congress = ld.load_congress_data(0.75) # iris = ld.load_iris(.75) classify = nb.train(congress[0]) # nb.train(iris[0]) # pdb.set_trace() # nb.test(congress[1]) tot, hit = 0, 0 for person in congress[1]: predict = nb.predict(person) if predict == person[0]: hit += 1 tot += 1 print hit, tot, hit / float(tot)