Beispiel #1
0
def load_classifier():
    if not os.path.exists('classifier.db'):
        classifier = train_all(None , corpus)
        classifier.save('classifier.db')
    return Classifier.load('classifier.db')
Beispiel #2
0
def load_classifier():
    if not os.path.exists('classifier.db'):
        sys.stderr.write('Unable to load classifier.db -- please run this script first')
        sys.exit(1)
    return Classifier.load('classifier.db')
Beispiel #3
0
def load_classifier():
    if not os.path.exists('classifier.db'):
        sys.stderr.write('Unable to load classifier.db -- please run this script first')
        sys.exit(1)
    return Classifier.load('classifier.db')
    subj = ""
    for line in text.splitlines():
        if line.startswith("subject:"):
            is_subj = True
            subj = line[8:]
        else:
            lines.append(line)
    return email_extract(subj, " ".join(lines), min_len, max_len)


def test_enron_files(classifier, path, label, selector=None):
    files = get_file_list(path, selector)
    correct = total = 0
    for filename in files:
        with open(os.path.join(path, filename)) as fh:
            contents = fh.read()
        features = enron_email_extract(contents)
        res = classifier.classify(features)
        best = res[0][0]
        if best == label:
            correct += 1
        total += 1
    pct = 100 * (float(correct) / total)
    print 'Accuracy of "%s": %s%% based on %s documents' % (label, pct, total)


if __name__ == "__main__":
    classifier = Classifier.load("classifier.bin")
    for d, l in get_dir_and_labels():
        test_enron_files(classifier, d, l)