Example #1
0
    def run_method(self):
        """
        hc, hr, hs: hyper-params
        n_train      = # train stances
        n_train_vera = # train claims
        """

        data_all = pd.concat([self.train_data, self.test_data], \
                             ignore_index = True)

        X = scipy.sparse.vstack((self.X_train, self.X_test))

        cmv = models.model_cv(data_all, X, self.cds, self.cdv)
        cmv.init_model()
        cmv.em(3)


        (self.ps, self.pt, self.pp_s, self.pp_t) = cmv.get_res(n_train = self.n_train, \
        n_train_vera = self.n_train_vera)

        self.cal_scores(cmv.clf_vera)
Example #2
0
def snopes_cmv():
    cmv = models.model_cv(data_all, X, cds, cdv)
    cmv.init_model()
    cmv.em(5)

    st2 = cmv.clf_stance.predict(f_train)
    a_train['articleHeadlineStance'] = st2
    a_test['articleHeadlineStance'] = cmv.clf_stance.predict(f_test)

    a_all = pd.concat([a_train, a_test], ignore_index=True)
    f_all = scipy.sparse.vstack((f_train, f_test))

    cf = models.model_transfer(a_all,
                               f_all, [], [],
                               n_train=len(a_train),
                               vera_range=[0, 1])

    cf.init_model(cmv.clf_stance)

    cf.em(5)

    util.get_acc(a_test, cf.res_s[cf.n_train:], \
                   cf.res_v[cf.train_m:], cf.alpha[cf.train_m:], bin_brier=True)
Example #3
0
                   ['Baseline', 'Variational'],
                   time=range(0, 2001, 400),
                   xlab='Stance labels by journalists',
                   ylab='Stance accuracy')

active.plot_curves([
    active.take_res(eres1[(1, 0, 0, 0)], 2),
    active.take_res(eres1[(2, 0, 0, 0)], 2)
], ['Baseline', 'Variational'],
                   time=range(0, 500, 50),
                   xlab='Test-set stance labels by crowds',
                   ylab='')

import models
dic_slug = models.get_claim_slug()
cmv = models.model_cv(data_all, X, cds, cdv)
cmv.init_model()
cmv.em(5)
(cs, probs, vera) = models.get_source_prob(cmv, 269, dic_slug)
(cs, probs, vera) = models.take_subset(cs, probs, vera, [0, 4, 6, 7, 14, 16])
models.plot_source(cs, probs, vera, save_name='ex2.pdf')

(probs, reps, ss, vera) = cmv.get_prob()
models.plot_probs(probs, reps, ss, vera, save_name='example.pdf')

############################################
eres1 = pickle.load(open('save_e3.pkl'))
eres1.update(pickle.load(open('save_e4.pkl')))

eres1.update(pickle.load(open('active_gibbs3.pkl')))
eres1.update(pickle.load(open('active_cmv3.pkl')))
Example #4
0
def offline_experiment():
    #arg = int(sys.argv[1])
    # number of expert labels
    num = [1, 400, 800, 1200, 1600, 2000]
    res_bl = [[], [], [], [], [], []]
    res_cm = [[], [], [], [], [], []]
    res_cmv = [[], [], [], [], [], []]
    save_cm = []

    #for seed in [arg]:
    for seed in range(10, 20, 1):
        print "seed", seed
        rs = np.random.RandomState(seed=seed)
        for i in range(6):
            e = num[i]
            erange = rs.permutation(2071)[:e]
            #erange = range(2071)
            (data_all, X, cds, cdv, cds_test) = models.prepare_cm_data(train_val_data_pp, \
                X_train_val, test_data_pp, X_test, data, expert_range = erange, \
                train_range = 2071, test_range = 2595)

            cmv = models.model_cv(data_all, X, cds, cdv)
            cmv.init_model()

            cm = models.crowd_model(data_all, X, cds, cdv)
            cm.init_model()

            #(ps, pt) = util.baseline_crowd(train_val_data_pp, X_train_val,\
            #    test_data_pp, X_test, cds, cdv)

            (ps, pt, pp_s, pp_t, clf_st, clf_vera) = util.baseline_crowd(\
            train_val_data_pp, X_train_val, test_data_pp, X_test, cds, cdv, return_proba=True)

            print e
            print util.get_acc(test_data_pp, ps, pt, pp_t)
            res_bl[i].append(util.get_acc(test_data_pp, ps, pt, pp_t))

            cm.em(3)
            cmv.em(3)

            save_cm.append(cm)

            res_cm[i].append(util.get_acc(test_data_pp, cm.res_s[2071:], \
                  cm.res_v[240:], cm.pos_v[240:]))

            res_cmv[i].append(util.get_acc(test_data_pp, cmv.res_s[2071:], \
                   cmv.res_v[240:], cmv.alpha[240:]))

            #print util.get_acc(test_data_pp, cm.res_s[2071:], cm.res_v[240:])
            print util.get_acc(test_data_pp, cmv.res_s[2071:], \
                   cmv.res_v[240:], cmv.alpha[240:])
            print '------------------'

    res_bl = np.asarray(res_bl)
    print np.mean(res_bl, 1)

    res_cmv = np.asarray(res_cmv)
    print np.mean(res_cmv, 1)

    res_cm = np.asarray(res_cm)
    print np.mean(res_cm, 1)

    pickle.dump([res_bl, res_cmv, res_cm], open('res_all.pkl', 'w'))