コード例 #1
0
ファイル: classifier.py プロジェクト: kedorlaomer/ta-final
def train(spamDir, hamDir):

    featureset = []

    for text in getDirContent(spamDir):
        featureset.append((featuresForText(text), SPAM))
    for text in getDirContent(hamDir):
        featureset.append((featuresForText(text), NOSPAM))

    shuffle(featureset)
    classifier = NaiveBayesClassifier.train(featureset)
    saveClassifier(classifier, CLASSIFIER_PATH)

    print "Done with learning."
コード例 #2
0
ファイル: classifier.py プロジェクト: kedorlaomer/ta-final
def getBayesAccuracy(splitRatio=0.9):

    featureset = []

    for text in getSpamContent():
        featureset.append((featuresForText(text), SPAM))
    for text in getHamContent():
        featureset.append((featuresForText(text), NOSPAM))

    shuffle(featureset)
    trainset, devset = splitByRatio(featureset, splitRatio)

    classifier = NaiveBayesClassifier.train(trainset)
    print classifier.show_most_informative_features(10)
    return nltk_classify.accuracy(classifier, devset)
コード例 #3
0
ファイル: classifier.py プロジェクト: kedorlaomer/ta-final
def classify(evalDir, resultFilename):

    classifier = loadClassifier(CLASSIFIER_PATH)
    if not classifier:
        raise Exception("Classifier was not loaded.")
    print "loaded"

    with open(resultFilename, "w") as f:

        for text, filepath in iterDirContent(evalDir, yieldFilepath=True):

            classification = classifier.classify(featuresForText(text))
            f.write("%s\t%s\n" % (
                filepath,
                S_SPAM if classification else S_NOSPAM
            ))

    print "Classified output was saved to file '%s'." % resultFilename
    print "Done with classifying."