labeled_sets = kfold_by_cat(labeled_docdict, labeled_size_experiment)

        i = 0
        for labeled_set in labeled_sets[:N_LABELED_SETS]:
            i += 1
            labeled_x, labeled_y = convert_docdict_to_array(labeled_set, vec, enc)

            rowout = [labeled_x.shape[0], i]

            nb = MultinomialNB(alpha=ALPHA)
            nb.fit(labeled_x, labeled_y)
            noem_score = nb.score(test_x, test_y)
            experiment_scores_noem.append(noem_score)
            rowout.append(noem_score)

            em = NaiveBayesEM(unlabeled_x, ncats, alpha=ALPHA, labeled_x=labeled_x, labeled_y=labeled_y)
            em.models.append(nb)
            em.runEM()

            nb_out = em.models[-1]
            em_score = nb_out.score(test_x, test_y)

            rowout.append(em_score)

            this_em_iter_scores = []
            for model in em.models:
                this_em_iter_scores.append(model.score(test_x, test_y))

            print "size = %s, score = %s, em_score = %s" % (labeled_size_experiment, noem_score, em_score)

            with open(FNAME_OUT, "a") as fout:
Exemple #2
0
                for labeled_set in labeled_sets[:N_LABELED_SETS]:
                    i += 1
                    labeled_x, labeled_y = convert_docdict_to_array(
                        labeled_set, vec, enc)

                    rowout = [labeled_x.shape[0], alpha, i]

                    nb = MultinomialNB(alpha=alpha)
                    nb.fit(labeled_x, labeled_y)
                    noem_score = nb.score(test_x, test_y)
                    experiment_scores_noem.append(noem_score)
                    rowout.append(noem_score)

                    em = NaiveBayesEM(unlabeled_x,
                                      ncats,
                                      alpha=alpha,
                                      labeled_x=labeled_x,
                                      labeled_y=labeled_y)
                    em.models.append(nb)
                    em.runEM()

                    nb_out = em.models[-1]
                    em_score = nb_out.score(test_x, test_y)

                    rowout.append(em_score)

                    this_em_iter_scores = []
                    for model in em.models:
                        this_em_iter_scores.append(model.score(test_x, test_y))

                    print "size = %s, alpha = %s, score = %s, em_score = %s" % (
Exemple #3
0
        i = 0
        total = len(labeled_sets)
        for labeled_set in labeled_sets[:MAX_N_UNLABELED_SETS]:
            print "testing size =", labeled_size_experiment, ", n =", i
            i += 1
            labeled_x, labeled_y = convert_docdict_to_array(labeled_set, vec, enc)

            rowout = [labeled_x.shape[0], i]

            nb = MultinomialNB(alpha=ALPHA)
            nb.fit(labeled_x, labeled_y)
            noem_score = nb.score(test_x, test_y)
            experiment_scores_noem.append(noem_score)
            rowout.append(noem_score)

            em = NaiveBayesEM(unlabeled_x, ncats, alpha=ALPHA)
            em.model = nb
            em.runEM()

            nb_out = em.model
            em_score = nb_out.score(test_x, test_y)

            experiment_scores_em.append(em_score)
            rowout.append(em_score)
            print rowout

            with open(FNAME_OUT,'a') as fout:
                fwriter = csv.writer(fout)
                fwriter.writerow(rowout)