def optimize_logit_for(self, pair): # extract the right images from the set [a,b] = [int(pair[0]), int(pair[1])] data = np.array(pd.read_csv('./data/kaggle/train.csv', header=0)).astype('float64') data_t = data[0:, :] data_a = data_t[data_t[:,0]==a] data_b = data_t[data_t[:,0]==b] data_ab = np.vstack([data_a, data_b]) xt = data_ab[:, 1:] yt = data_ab[:, 0].astype('int8') yt = np.atleast_2d(yt).T print 'xt shape', xt.shape # Perform logistic regression xt2 = np.column_stack([np.ones((xt.shape[0],1)), xt]) yt2 = yt.flatten() count = 0 for i in range(yt2.size): if yt2[i]==a: yt2[i] = 1 count +=1 else: yt2[i] = 0 ini_thetas = 0.005*np.random.rand(xt2.shape[1],1) L = 1e+5 opt_thetas = logit.optimizeThetas(ini_thetas, xt2, yt2, L, visual=False) return opt_thetas
def test_optimize_thetas_reg(): l = Logysterical(tr, None) ini_thetas = np.zeros((l.tr["X"].shape[1], 1)) pp = l.optimizeThetas(ini_thetas) print pp diff = pp - np.array( [ 1.273005, 0.624876, 1.177376, -2.020142, -0.912616, -1.429907, 0.125668, -0.368551, -0.360033, -0.171068, -1.460894, -0.052499, -0.618889, -0.273745, -1.192301, -0.240993, -0.207934, -0.047224, -0.278327, -0.296602, -0.453957, -1.045511, 0.026463, -0.294330, 0.014381, -0.328703, -0.143796, -0.924883, ] ) for e in diff: assert e < 0.01