Ejemplo n.º 1
0
    def update(self):
        # Handle to storage for model parameters
        params = self._parameters

        # Make sure all my meta data is ready to go
        params.validateMeta()

        observation_vectors = []
        truth_vectors = []

        # Make sure my model data is ready to go
        self._model_data.validate()
        self._model_data.validateViews(self.getMetaData("db_views"))

        # Check my model data
        observation_vectors = self._model_data.getMetaData(
            "observation_vectors")

        truth_vectors = self._model_data.getMetaData("truth_vectors")

        params.setMetaData("db_views", [])

        # Houston we are go
        mnb = MultinomialNB()

        mnb.alpha = self.getMetaData("alpha")
        mnb.fit_prior = self.getMetaData("fit_prior")
        class_prior = self.getMetaData("class_prior")
        if (class_prior != None):
            mnb.class_prior = class_prior

        mnb.fit(observation_vectors, truth_vectors)
        params.setBinaryData("mnb_model", "application/pickle",
                             pickle.dumps(mnb))

        self.finalize()
Ejemplo n.º 2
0
    test_set = [(features(words), labelize(category in categories)) for (words, categories) in test_corpus]

    # train classifier
    # print "Training classifier for '%s'" % category
    # classifier = MaxentClassifier.train(train_set, max_iter= 3)
    # classifier = NaiveBayesClassifier.train(train_set)
    model = MultinomialNB()
    classifier = SklearnClassifier(model)

    # set priors
    classifier._encoder.fit([category, "no"])
    # [category, "no"] unless this is true then ["no", category]
    flip = classifier.labels()[0] == "no"
    categorized_proportion = len([words for (words, categories) in corpus if category in categories]) * 1.0 / len(corpus)
    if flip:
        model.class_prior = [1-categorized_proportion, categorized_proportion]
    else:
        model.class_prior = [categorized_proportion, 1-categorized_proportion]

    classifier.train(train_set)

    # test classifier
    test_results = classifier.classify_many([feat for (feat, label) in test_set])
    pos_test_set = set(i for i, result in enumerate(test_results) if result == category)
    reference_values = [label for (feat, label) in test_set]
    pos_ref_set = set(i for i, (feat, label) in enumerate(test_set) if label == category)
    accuracy = scores.accuracy(reference_values, test_results)
    accuracies.append(accuracy)
    precision = scores.precision(pos_ref_set, pos_test_set)
    recall = scores.recall(pos_ref_set, pos_test_set)
    f1 = scores.f_measure(pos_ref_set, pos_test_set)