def run_method(self): """ hc, hr, hs: hyper-params n_train = # train stances n_train_vera = # train claims """ data_all = pd.concat([self.train_data, self.test_data], \ ignore_index = True) X = scipy.sparse.vstack((self.X_train, self.X_test)) cmv = models.model_cv(data_all, X, self.cds, self.cdv) cmv.init_model() cmv.em(3) (self.ps, self.pt, self.pp_s, self.pp_t) = cmv.get_res(n_train = self.n_train, \ n_train_vera = self.n_train_vera) self.cal_scores(cmv.clf_vera)
def snopes_cmv(): cmv = models.model_cv(data_all, X, cds, cdv) cmv.init_model() cmv.em(5) st2 = cmv.clf_stance.predict(f_train) a_train['articleHeadlineStance'] = st2 a_test['articleHeadlineStance'] = cmv.clf_stance.predict(f_test) a_all = pd.concat([a_train, a_test], ignore_index=True) f_all = scipy.sparse.vstack((f_train, f_test)) cf = models.model_transfer(a_all, f_all, [], [], n_train=len(a_train), vera_range=[0, 1]) cf.init_model(cmv.clf_stance) cf.em(5) util.get_acc(a_test, cf.res_s[cf.n_train:], \ cf.res_v[cf.train_m:], cf.alpha[cf.train_m:], bin_brier=True)
['Baseline', 'Variational'], time=range(0, 2001, 400), xlab='Stance labels by journalists', ylab='Stance accuracy') active.plot_curves([ active.take_res(eres1[(1, 0, 0, 0)], 2), active.take_res(eres1[(2, 0, 0, 0)], 2) ], ['Baseline', 'Variational'], time=range(0, 500, 50), xlab='Test-set stance labels by crowds', ylab='') import models dic_slug = models.get_claim_slug() cmv = models.model_cv(data_all, X, cds, cdv) cmv.init_model() cmv.em(5) (cs, probs, vera) = models.get_source_prob(cmv, 269, dic_slug) (cs, probs, vera) = models.take_subset(cs, probs, vera, [0, 4, 6, 7, 14, 16]) models.plot_source(cs, probs, vera, save_name='ex2.pdf') (probs, reps, ss, vera) = cmv.get_prob() models.plot_probs(probs, reps, ss, vera, save_name='example.pdf') ############################################ eres1 = pickle.load(open('save_e3.pkl')) eres1.update(pickle.load(open('save_e4.pkl'))) eres1.update(pickle.load(open('active_gibbs3.pkl'))) eres1.update(pickle.load(open('active_cmv3.pkl')))
def offline_experiment(): #arg = int(sys.argv[1]) # number of expert labels num = [1, 400, 800, 1200, 1600, 2000] res_bl = [[], [], [], [], [], []] res_cm = [[], [], [], [], [], []] res_cmv = [[], [], [], [], [], []] save_cm = [] #for seed in [arg]: for seed in range(10, 20, 1): print "seed", seed rs = np.random.RandomState(seed=seed) for i in range(6): e = num[i] erange = rs.permutation(2071)[:e] #erange = range(2071) (data_all, X, cds, cdv, cds_test) = models.prepare_cm_data(train_val_data_pp, \ X_train_val, test_data_pp, X_test, data, expert_range = erange, \ train_range = 2071, test_range = 2595) cmv = models.model_cv(data_all, X, cds, cdv) cmv.init_model() cm = models.crowd_model(data_all, X, cds, cdv) cm.init_model() #(ps, pt) = util.baseline_crowd(train_val_data_pp, X_train_val,\ # test_data_pp, X_test, cds, cdv) (ps, pt, pp_s, pp_t, clf_st, clf_vera) = util.baseline_crowd(\ train_val_data_pp, X_train_val, test_data_pp, X_test, cds, cdv, return_proba=True) print e print util.get_acc(test_data_pp, ps, pt, pp_t) res_bl[i].append(util.get_acc(test_data_pp, ps, pt, pp_t)) cm.em(3) cmv.em(3) save_cm.append(cm) res_cm[i].append(util.get_acc(test_data_pp, cm.res_s[2071:], \ cm.res_v[240:], cm.pos_v[240:])) res_cmv[i].append(util.get_acc(test_data_pp, cmv.res_s[2071:], \ cmv.res_v[240:], cmv.alpha[240:])) #print util.get_acc(test_data_pp, cm.res_s[2071:], cm.res_v[240:]) print util.get_acc(test_data_pp, cmv.res_s[2071:], \ cmv.res_v[240:], cmv.alpha[240:]) print '------------------' res_bl = np.asarray(res_bl) print np.mean(res_bl, 1) res_cmv = np.asarray(res_cmv) print np.mean(res_cmv, 1) res_cm = np.asarray(res_cm) print np.mean(res_cm, 1) pickle.dump([res_bl, res_cmv, res_cm], open('res_all.pkl', 'w'))