Ejemplo n.º 1
0
 def check_accuracy(self):
     '''computes thetas on training set, saves them, and checks
        accuracy on evaluation set'''
     tinit = 0.005* np.random.rand(self.LABS, self.N)
     thetas = soft.optimizeThetas(tinit, self.xt, self.gt, self.LABS, self.L)
     thetas = thetas.reshape(self.LABS, -1)
     np.savetxt('./data/kaggle/optimized_thetas.csv', thetas, delimiter=',')
     h = soft.h(thetas, self.xe)
     predictions = h.argmax(axis=1)
     zeros_are_right = np.subtract(self.ye.T, predictions)
     misses = 1.0 * np.count_nonzero(zeros_are_right)
     acc = 1 - misses/len(predictions)
     print 'accuracy:', acc
     pass
    def check_accuracy(self):
        logit_thetas = {}

        soft_thetas = np.array(pd.read_csv('./data/kaggle/optimized_thetas.csv', header=None))
        soft_thetas = soft_thetas.reshape(self.LABS, -1)
        
        h = soft.h(soft_thetas, self.xe)
        m = h.shape[0]
        misses = 0.00
        count = 0.0
        for i in range(m):
            true_label = self.ye[i,0]
            [ml_1, ml_2] = h[i,:].argsort()[-2:][::-1] # 1st and 2nd model choices
            p1,p2 = h[i,:][ml_1], h[i,:][ml_2]
            
            right_order = True
            if ml_1 > ml_2:
                right_order = False
                s = `ml_2`+`ml_1`
            else:
                s = `ml_1`+`ml_2`
            
            if p1<0.99 and p2>0.01:

                if s not in logit_thetas:
                    count +=1
                    logit_thetas[s] = self.optimize_logit_for(s)

                l_t = logit_thetas[s]
                logix = np.hstack([1, self.xe[i,:]])

                p = logit.h(l_t, logix)
                if (p>0.5):
                    prediction = (ml_1 if right_order else ml_2)
                else:
                    prediction = (ml_2 if right_order else ml_1)
            else:
                prediction = ml_1
            
            #print prediction, true_label
            if prediction!=true_label:
                misses +=1.0
        
        print 'misses', misses
        print 'logit thetas searched', count
        acc = 1 - misses/m
        print 'accuracy:', acc
        pass
Ejemplo n.º 3
0
 def test_model_submit(self):
     # compute thetas on whole training set
     tinit = 0.005* np.random.rand(self.LABS, self.N)
     x = np.vstack([self.xt, self.xe])
     y = np.vstack([self.yt, self.ye])
     g = np.vstack([self.gt, self.ge])
     # find thetas and save them
     thetas = soft.optimizeThetas(tinit, x, g, self.LABS, self.L)
     thetas = thetas.reshape(self.LABS, -1)
     np.savetxt('./data/kaggle/submit_optimized_thetas.csv', thetas, delimiter=',')
     # compute predictions
     m, n = self.x_test.shape
     h = soft.h(thetas, self.x_test)
     predictions = np.zeros((m,2))
     for i in range(m):
         a = h[i,:].argmax()
         predictions[i,:]=[i+1, a]
     print 'To submitt add header: ImageId,Label'
     print predictions[0:10,:]
     np.savetxt('./data/kaggle/predictions.csv', predictions, fmt='%i,%i')
     pass
    def test_model_submit(self):
        logit_thetas = {}
        
        soft_thetas = np.array(pd.read_csv('./data/kaggle/submit_optimized_thetas.csv', header=None))
        soft_thetas = soft_thetas.reshape(self.LABS, -1)

        m, n = self.x_test.shape
        h = soft.h(soft_thetas, self.x_test)
        predictions = np.zeros((m,2))
        for i in range(m):
            [ml_1, ml_2] = h[i,:].argsort()[-2:][::-1] # 1st and 2nd model choices
            p1,p2 = h[i,:][ml_1], h[i,:][ml_2]
            right_order = True
            if ml_1 > ml_2:
                right_order = False
                s = `ml_2`+`ml_1`
            else:
                s = `ml_1`+`ml_2`
            
            if p1<0.99 and p2>0.01:
                if s not in logit_thetas:
                    logit_thetas[s] = self.optimize_logit_for(s)

                l_t = logit_thetas[s]
                logix = np.hstack([1, self.x_test[i,:]])

                p = logit.h(l_t, logix)
                if (p>0.5):
                    predictions[i,:] = ([i+1, ml_1] if right_order else [i+1, ml_2])
                else:
                    predictions[i,:] = ([i+1, ml_2] if right_order else [i+1, ml_1])
            else:
                predictions[i,:]=[i+1, ml_1]

        print 'To submitt add header: ImageId,Label'
        print predictions[0:10,:]
        np.savetxt('./data/kaggle/predictions_2steps.csv', predictions, fmt='%i,%i')
        pass