def train(self, method=None, bins=None): k = 10 kfolder = KFolder(self.D, k, normalize=True, shuffle=False) self.X_train, self.Y_train = [], [] self.X_test, self.Y_test, self.P = [], [], [] for i in range(k): # Get data and labels at fold k X,Y = kfolder.training(i) # Get the testing data Xi,Yi = kfolder.testing(i) # Store the results self.X_train.append(X), self.Y_train.append(Y) # Calculate the prior p = [float(len(Y[Y==0.0])) / float(len(Y))] p.append(1.0-p[0]) # Calculate the parameters for our naive bayes classification if method == 'gaussian': mean, var = self.gaussian(X, Y) self.X_test.append(Xi), self.Y_test.append(Yi), self.P.append((mean, var, p)) elif method == 'bernoulli': mean, e0, e1 = self.bernoulli(X, Y) self.X_test.append(Xi), self.Y_test.append(Yi), self.P.append((mean, e0, e1, p)) elif method == 'histogram': if bins == 4: histograms = self.hist4(X, Y) elif bins == 9: histograms = self.hist9(X, Y) else: print "Unsupported number of bins." self.X_test.append(Xi), self.Y_test.append(Yi), self.P.append((histograms, p))
def train(self, l, method='normal'): k = 10 kfolder = KFolder(self.D, k, normalize=True) self.X_train, self.Y_train = [], [] self.X_test, self.Y_test, self.W = [], [], [] for i in range(k): # Get data and labels at fold k X,Y = kfolder.training(i) # Solve for the vector of linear factors, W if method == 'normal': rsolver = RegressionSolver(X, Y) Wi = rsolver.solve(l) elif method == 'descent': gd = GradientDescent(X, Y) Wi = gd.linreg_stoch2() elif method == 'logistic': gd = GradientDescent(X, Y) Wi = gd.logreg_stoch() # Get the testing data Xi,Yi = kfolder.testing(i) # Store the results self.X_train.append(X), self.Y_train.append(Y) self.X_test.append(Xi), self.Y_test.append(Yi), self.W.append(Wi)
def train(self, random=False): k = 10 kfolder = KFolder(self.D, k, normalize=True, shuffle=False) self.X_train, self.Y_train = [], [] self.X_test, self.Y_test, self.P = [], [], [] for i in range(1): # Get data and labels at fold k X,Y = kfolder.training(i) # Get the testing data Xi,Yi = kfolder.testing(i) # Solve for the vector of linear factors, W return self.boost(X, Y, Xi, Yi, self.thresh(X), random=random)
def train(self, shared=True): k = 10 kfolder = KFolder(self.D, k, normalize=False, shuffle=False) self.X_train, self.Y_train = [], [] self.X_test, self.Y_test, self.P = [], [], [] test_acc = np.zeros(k) for i in range(k): # Get data and labels at fold k X,Y = kfolder.training(i) # Get the testing data Xi,Yi = kfolder.testing(i) # Solve for the vector of linear factors, W test_acc[i] = self.boost(X, Y, Xi, Yi, self.thresh(X)) print "Average test_acc:",test_acc.mean()
def train(self, random=False): k = 10 kfolder = KFolder(self.D, k, normalize=True, shuffle=False) self.X_train, self.Y_train = [], [] self.X_test, self.Y_test, self.P = [], [], [] # set up top15 tot_top15 = np.zeros(57) for i in range(k): print "Fold:", i+1 # Get data and labels at fold k X,Y = kfolder.training(i) # Get the testing data Xi,Yi = kfolder.testing(i) # Solve for the vector of linear factors, W round_top15 = self.boost(X, Y, Xi, Yi, self.thresh(X), random=random) for f in round_top15: tot_top15[f] += 1 return np.argsort(tot_top15)[-15:]
def train(self, shared=True): k = 10 kfolder = KFolder(self.D, k, normalize=True, shuffle=True) self.X_train, self.Y_train = [], [] self.X_test, self.Y_test, self.P = [], [], [] for i in range(k): # Get data and labels at fold k X,Y = kfolder.training(i) # Solve for the vector of linear factors, W u,cov,p = self.estimate(X, Y, shared) # Get the testing data Xi,Yi = kfolder.testing(i) # Store the results self.X_train.append(X), self.Y_train.append(Y) self.X_test.append(Xi), self.Y_test.append(Yi), self.P.append((u,cov,p))
def train(self, depth, bags): k = 10 kfolder = KFolder(self.D, k, normalize=False) self.T = [] # Get data and labels at fold k self.X,self.Y = kfolder.training(0) C = np.append(self.X,self.Y,axis=1) n = len(C) for i in range(bags): print "Bag #", i+1 # Sample a bag idx = np.random.randint(n, size=n) Xi, Yi = C.T[:-1].T[idx], C.T[-1:].T[idx].ravel() # Build the decision tree dt = DecisionTree(Xi, Yi) Ti = dt.build(depth) self.T.append(Ti) # Get the testing data self.Xt,self.Yt = kfolder.testing(0)
def train(self): k = 10 kfolder = KFolder(self.D, k, normalize=True) self.X_train, self.Y_train = [], [] self.X_test, self.Y_test, self.W = [], [], [] for i in range(k): # Get data and labels at fold k X,Y = kfolder.training(i) # Solve for the vector of linear factors, W rsolver = RegressionSolver(X, Y) Wi = rsolver.solve() # Get the testing data Xi,Yi = kfolder.testing(i) # Store the results self.X_train.append(X), self.Y_train.append(Y) self.X_test.append(Xi), self.Y_test.append(Yi), self.W.append(Wi)
def train(self): k = 10 kfolder = KFolder(self.D, k, normalize=False) self.X_train, self.Y_train = [], [] self.X_test, self.Y_test, self.T = [], [], [] for i in range(1): # k # Get data and labels at fold k X, Y = kfolder.training(i) # Build the decision tree dt = DecisionTree(X, Y) Ti = dt.build(6) # Get the testing data Xi, Yi = kfolder.testing(i) # Store the results self.X_train.append(X), self.Y_train.append(Y) self.X_test.append(Xi), self.Y_test.append(Yi), self.T.append(Ti)
f = open(data_file, "r") for line in f: x = line.split(',') x = [float(e) for e in x] dmat.append(x) data = np.array(dmat) # k-folds folds = 10 kfolder = KFolder(data, folds, standard=True, shuffle=False) for i in range(1): print "Fold:", i+1 # Get data and labels at fold k X,Y = kfolder.training(i) # Get the testing data Xi,Yi = kfolder.testing(i) # Run knn H = wnn(X, Xi, Y, d=euclidean_distance, r=4.6) c = np.sum(Yi.ravel()==H) print "r=%f:" % 4.6, float(c)/float(len(Yi)) elif args.d == "d": # Load data using specialized script train_dataset = load_mnist(path="../data/mnist/", dataset="training") test_dataset = load_mnist(path="../data/mnist/", dataset="testing") # Take a fraction of the data to speed computation train_images, train_labels = sample(train_dataset, 10000) test_images, test_labels = sample(test_dataset, 1000) # Get the bounds of the haar rectangles
data_file = "../data/spambase/spambase.data" dmat = [] f = open(data_file, "r") for line in f: x = line.split(',') x = [float(e) for e in x] dmat.append(x) data = np.array(dmat) # k-folds xvalidation k = 10 kfolder = KFolder(data, k, standard=True, shuffle=True) for i in range(k-1): print "Fold:", i+1 # Get data and labels at fold k X,Y = kfolder.testing(i+1) # Get the testing data Xi,Yi = kfolder.testing(i) Yi[Yi==0] = -1.0 # Train Y[Y==0] = -1.0 G, Gi = gram(X), tgram(X, Xi) a, b = train(X, Y.ravel(), G, C=1e-4, tol=1e-4, eps=1e-3) # Test print "Training accuracy:", test(Y, Y, G, a, b) print "Testing accuracy:", test(Y, Yi, Gi, a, b)