labeled_sets = kfold_by_cat(labeled_docdict, labeled_size_experiment) i = 0 for labeled_set in labeled_sets[:N_LABELED_SETS]: i += 1 labeled_x, labeled_y = convert_docdict_to_array(labeled_set, vec, enc) rowout = [labeled_x.shape[0], i] nb = MultinomialNB(alpha=ALPHA) nb.fit(labeled_x, labeled_y) noem_score = nb.score(test_x, test_y) experiment_scores_noem.append(noem_score) rowout.append(noem_score) em = NaiveBayesEM(unlabeled_x, ncats, alpha=ALPHA, labeled_x=labeled_x, labeled_y=labeled_y) em.models.append(nb) em.runEM() nb_out = em.models[-1] em_score = nb_out.score(test_x, test_y) rowout.append(em_score) this_em_iter_scores = [] for model in em.models: this_em_iter_scores.append(model.score(test_x, test_y)) print "size = %s, score = %s, em_score = %s" % (labeled_size_experiment, noem_score, em_score) with open(FNAME_OUT, "a") as fout:
for labeled_set in labeled_sets[:N_LABELED_SETS]: i += 1 labeled_x, labeled_y = convert_docdict_to_array( labeled_set, vec, enc) rowout = [labeled_x.shape[0], alpha, i] nb = MultinomialNB(alpha=alpha) nb.fit(labeled_x, labeled_y) noem_score = nb.score(test_x, test_y) experiment_scores_noem.append(noem_score) rowout.append(noem_score) em = NaiveBayesEM(unlabeled_x, ncats, alpha=alpha, labeled_x=labeled_x, labeled_y=labeled_y) em.models.append(nb) em.runEM() nb_out = em.models[-1] em_score = nb_out.score(test_x, test_y) rowout.append(em_score) this_em_iter_scores = [] for model in em.models: this_em_iter_scores.append(model.score(test_x, test_y)) print "size = %s, alpha = %s, score = %s, em_score = %s" % (
i = 0 total = len(labeled_sets) for labeled_set in labeled_sets[:MAX_N_UNLABELED_SETS]: print "testing size =", labeled_size_experiment, ", n =", i i += 1 labeled_x, labeled_y = convert_docdict_to_array(labeled_set, vec, enc) rowout = [labeled_x.shape[0], i] nb = MultinomialNB(alpha=ALPHA) nb.fit(labeled_x, labeled_y) noem_score = nb.score(test_x, test_y) experiment_scores_noem.append(noem_score) rowout.append(noem_score) em = NaiveBayesEM(unlabeled_x, ncats, alpha=ALPHA) em.model = nb em.runEM() nb_out = em.model em_score = nb_out.score(test_x, test_y) experiment_scores_em.append(em_score) rowout.append(em_score) print rowout with open(FNAME_OUT,'a') as fout: fwriter = csv.writer(fout) fwriter.writerow(rowout)