Esempio n. 1
0
    def train(self, method=None, bins=None):
        k = 10 
        kfolder = KFolder(self.D, k, normalize=True, shuffle=False)
        self.X_train, self.Y_train = [], []
        self.X_test, self.Y_test, self.P = [], [], []
        for i in range(k):
            # Get data and labels at fold k
            X,Y = kfolder.training(i)

            # Get the testing data
            Xi,Yi = kfolder.testing(i)

            # Store the results
            self.X_train.append(X), self.Y_train.append(Y)

            # Calculate the prior
            p = [float(len(Y[Y==0.0])) / float(len(Y))]
            p.append(1.0-p[0])

            # Calculate the parameters for our naive bayes classification
            if method == 'gaussian':
                mean, var = self.gaussian(X, Y) 
                self.X_test.append(Xi), self.Y_test.append(Yi), self.P.append((mean, var, p))
            elif method == 'bernoulli':
                mean, e0, e1 = self.bernoulli(X, Y)
                self.X_test.append(Xi), self.Y_test.append(Yi), self.P.append((mean, e0, e1, p))
            elif method == 'histogram':
                if bins == 4:
                    histograms = self.hist4(X, Y)
                elif bins == 9:
                    histograms = self.hist9(X, Y)
                else:
                    print "Unsupported number of bins."
                self.X_test.append(Xi), self.Y_test.append(Yi), self.P.append((histograms, p))
Esempio n. 2
0
    def train(self, l, method='normal'):
        k = 10
        kfolder = KFolder(self.D, k, normalize=True)
        self.X_train, self.Y_train = [], []
        self.X_test, self.Y_test, self.W = [], [], []
        for i in range(k):
            # Get data and labels at fold k
            X,Y = kfolder.training(i)

            # Solve for the vector of linear factors, W
            if method == 'normal':
                rsolver = RegressionSolver(X, Y)
                Wi = rsolver.solve(l)
            elif method == 'descent':
                gd = GradientDescent(X, Y)
                Wi = gd.linreg_stoch2()
            elif method == 'logistic':
                gd = GradientDescent(X, Y)
                Wi = gd.logreg_stoch()

            # Get the testing data
            Xi,Yi = kfolder.testing(i)

            # Store the results
            self.X_train.append(X), self.Y_train.append(Y)
            self.X_test.append(Xi), self.Y_test.append(Yi), self.W.append(Wi)
Esempio n. 3
0
 def train(self, random=False):
     k = 10 
     kfolder = KFolder(self.D, k, normalize=True, shuffle=False)
     self.X_train, self.Y_train = [], []
     self.X_test, self.Y_test, self.P = [], [], []
     for i in range(1):
         # Get data and labels at fold k
         X,Y = kfolder.training(i)
         # Get the testing data
         Xi,Yi = kfolder.testing(i)
         # Solve for the vector of linear factors, W
         return self.boost(X, Y, Xi, Yi, self.thresh(X), random=random) 
Esempio n. 4
0
 def train(self, shared=True):
     k = 10
     kfolder = KFolder(self.D, k, normalize=False, shuffle=False)
     self.X_train, self.Y_train = [], []
     self.X_test, self.Y_test, self.P = [], [], []
     test_acc = np.zeros(k)
     for i in range(k):
         # Get data and labels at fold k
         X,Y = kfolder.training(i)
         # Get the testing data
         Xi,Yi = kfolder.testing(i)
         # Solve for the vector of linear factors, W
         test_acc[i] = self.boost(X, Y, Xi, Yi, self.thresh(X)) 
     print "Average test_acc:",test_acc.mean()
Esempio n. 5
0
 def train(self, random=False):
     k = 10 
     kfolder = KFolder(self.D, k, normalize=True, shuffle=False)
     self.X_train, self.Y_train = [], []
     self.X_test, self.Y_test, self.P = [], [], []
     # set up top15
     tot_top15 = np.zeros(57)
     for i in range(k):
         print "Fold:", i+1
         # Get data and labels at fold k
         X,Y = kfolder.training(i)
         # Get the testing data
         Xi,Yi = kfolder.testing(i)
         # Solve for the vector of linear factors, W
         round_top15 = self.boost(X, Y, Xi, Yi, self.thresh(X), random=random) 
         for f in round_top15:
             tot_top15[f] += 1
     return np.argsort(tot_top15)[-15:]
Esempio n. 6
0
    def train(self, shared=True):
        k = 10 
        kfolder = KFolder(self.D, k, normalize=True, shuffle=True)
        self.X_train, self.Y_train = [], []
        self.X_test, self.Y_test, self.P = [], [], []
        for i in range(k):
            # Get data and labels at fold k
            X,Y = kfolder.training(i)

            # Solve for the vector of linear factors, W
            u,cov,p = self.estimate(X, Y, shared) 

            # Get the testing data
            Xi,Yi = kfolder.testing(i)

            # Store the results
            self.X_train.append(X), self.Y_train.append(Y)
            self.X_test.append(Xi), self.Y_test.append(Yi), self.P.append((u,cov,p))
Esempio n. 7
0
 def train(self, depth, bags):
     k = 10
     kfolder = KFolder(self.D, k, normalize=False)
     self.T = []
     # Get data and labels at fold k
     self.X,self.Y = kfolder.training(0)
     C = np.append(self.X,self.Y,axis=1)
     n = len(C)
     for i in range(bags):
         print "Bag #", i+1
         # Sample a bag
         idx = np.random.randint(n, size=n)
         Xi, Yi = C.T[:-1].T[idx], C.T[-1:].T[idx].ravel()
         # Build the decision tree
         dt = DecisionTree(Xi, Yi)
         Ti = dt.build(depth)
         self.T.append(Ti)
     # Get the testing data
     self.Xt,self.Yt = kfolder.testing(0)
Esempio n. 8
0
    def train(self):
        k = 10
        kfolder = KFolder(self.D, k, normalize=True)
        self.X_train, self.Y_train = [], []
        self.X_test, self.Y_test, self.W = [], [], []
        for i in range(k):
            # Get data and labels at fold k
            X,Y = kfolder.training(i)

            # Solve for the vector of linear factors, W
            rsolver = RegressionSolver(X, Y)
            Wi = rsolver.solve()

            # Get the testing data
            Xi,Yi = kfolder.testing(i)

            # Store the results
            self.X_train.append(X), self.Y_train.append(Y)
            self.X_test.append(Xi), self.Y_test.append(Yi), self.W.append(Wi)
Esempio n. 9
0
    def train(self):
        k = 10
        kfolder = KFolder(self.D, k, normalize=False)
        self.X_train, self.Y_train = [], []
        self.X_test, self.Y_test, self.T = [], [], []
        for i in range(1):  # k
            # Get data and labels at fold k
            X, Y = kfolder.training(i)

            # Build the decision tree
            dt = DecisionTree(X, Y)
            Ti = dt.build(6)

            # Get the testing data
            Xi, Yi = kfolder.testing(i)

            # Store the results
            self.X_train.append(X), self.Y_train.append(Y)
            self.X_test.append(Xi), self.Y_test.append(Yi), self.T.append(Ti)
Esempio n. 10
0
    f = open(data_file, "r")
    for line in f:
        x = line.split(',')
        x = [float(e) for e in x]
        dmat.append(x)
    data = np.array(dmat)

    # k-folds 
    folds = 10 
    kfolder = KFolder(data, folds, standard=True, shuffle=False)
    for i in range(1):
        print "Fold:", i+1
        # Get data and labels at fold k
        X,Y = kfolder.training(i)
        # Get the testing data
        Xi,Yi = kfolder.testing(i)

        # Run knn
        H = wnn(X, Xi, Y, d=euclidean_distance, r=4.6)
        c = np.sum(Yi.ravel()==H)
        print "r=%f:" % 4.6, float(c)/float(len(Yi))
elif args.d == "d":
    # Load data using specialized script
    train_dataset = load_mnist(path="../data/mnist/", dataset="training")
    test_dataset = load_mnist(path="../data/mnist/", dataset="testing")

    # Take a fraction of the data to speed computation
    train_images, train_labels = sample(train_dataset, 10000)
    test_images, test_labels = sample(test_dataset, 1000)

    # Get the bounds of the haar rectangles
Esempio n. 11
0
data_file = "../data/spambase/spambase.data"
dmat = []
f = open(data_file, "r")
for line in f:
    x = line.split(',')
    x = [float(e) for e in x]
    dmat.append(x)
data = np.array(dmat)

# k-folds xvalidation
k = 10 
kfolder = KFolder(data, k, standard=True, shuffle=True)
for i in range(k-1):
    print "Fold:", i+1
    
    # Get data and labels at fold k
    X,Y = kfolder.testing(i+1)
    
    # Get the testing data
    Xi,Yi = kfolder.testing(i)
    Yi[Yi==0] = -1.0
    
    # Train
    Y[Y==0] = -1.0
    G, Gi = gram(X), tgram(X, Xi)
    a, b = train(X, Y.ravel(), G, C=1e-4, tol=1e-4, eps=1e-3)

    # Test
    print "Training accuracy:", test(Y, Y, G, a, b)
    print "Testing accuracy:", test(Y, Yi, Gi, a, b)