def check_accuracy(self): logit_thetas = {} soft_thetas = np.array(pd.read_csv('./data/kaggle/optimized_thetas.csv', header=None)) soft_thetas = soft_thetas.reshape(self.LABS, -1) h = soft.h(soft_thetas, self.xe) m = h.shape[0] misses = 0.00 count = 0.0 for i in range(m): true_label = self.ye[i,0] [ml_1, ml_2] = h[i,:].argsort()[-2:][::-1] # 1st and 2nd model choices p1,p2 = h[i,:][ml_1], h[i,:][ml_2] right_order = True if ml_1 > ml_2: right_order = False s = `ml_2`+`ml_1` else: s = `ml_1`+`ml_2` if p1<0.99 and p2>0.01: if s not in logit_thetas: count +=1 logit_thetas[s] = self.optimize_logit_for(s) l_t = logit_thetas[s] logix = np.hstack([1, self.xe[i,:]]) p = logit.h(l_t, logix) if (p>0.5): prediction = (ml_1 if right_order else ml_2) else: prediction = (ml_2 if right_order else ml_1) else: prediction = ml_1 #print prediction, true_label if prediction!=true_label: misses +=1.0 print 'misses', misses print 'logit thetas searched', count acc = 1 - misses/m print 'accuracy:', acc pass
def test_model_submit(self): logit_thetas = {} soft_thetas = np.array(pd.read_csv('./data/kaggle/submit_optimized_thetas.csv', header=None)) soft_thetas = soft_thetas.reshape(self.LABS, -1) m, n = self.x_test.shape h = soft.h(soft_thetas, self.x_test) predictions = np.zeros((m,2)) for i in range(m): [ml_1, ml_2] = h[i,:].argsort()[-2:][::-1] # 1st and 2nd model choices p1,p2 = h[i,:][ml_1], h[i,:][ml_2] right_order = True if ml_1 > ml_2: right_order = False s = `ml_2`+`ml_1` else: s = `ml_1`+`ml_2` if p1<0.99 and p2>0.01: if s not in logit_thetas: logit_thetas[s] = self.optimize_logit_for(s) l_t = logit_thetas[s] logix = np.hstack([1, self.x_test[i,:]]) p = logit.h(l_t, logix) if (p>0.5): predictions[i,:] = ([i+1, ml_1] if right_order else [i+1, ml_2]) else: predictions[i,:] = ([i+1, ml_2] if right_order else [i+1, ml_1]) else: predictions[i,:]=[i+1, ml_1] print 'To submitt add header: ImageId,Label' print predictions[0:10,:] np.savetxt('./data/kaggle/predictions_2steps.csv', predictions, fmt='%i,%i') pass