Esempio n. 1
0
def main():
    input_files = {
        "pro": PRO_CORPUS_PATH,
        "con": CON_CORPUS_PATH,
    }

    # line count
    N = 0
    for _, filename in input_files.items():
        for _ in open(filename):
            N += 1
    print "Corpus has {} examples".format(N)

    # Choose test set, either 10% or 10000 examples, whatever is less
    M = min(N / 10, 1000)
    testindexes = set(random.sample(xrange(N), M))

    corpus = ProConsCorpus(input_files, lambda i: i not in testindexes)
    test = ProConsCorpus(input_files, lambda i: i in testindexes)
    print "Corpuses created"

    problem = OpinionProblem(corpus)
    classifier = NaiveBayes(corpus, problem)
    print "Classifier created"

    p = precision(classifier, test)
    print "Precision = {}".format(p)
Esempio n. 2
0
def main():
    input_files = {
        "pro": PRO_CORPUS_PATH,
        "con": CON_CORPUS_PATH,
    }

    # line count
    N = 0
    for _, filename in input_files.items():
        for _ in open(filename):
            N += 1
    print "Corpus has {} examples".format(N)

    # Choose test set, either 10% or 10000 examples, whatever is less
    M = min(N / 10, 1000)
    testindexes = set(random.sample(xrange(N), M))

    corpus = ProConsCorpus(input_files, lambda i: i not in testindexes)
    test = ProConsCorpus(input_files, lambda i: i in testindexes)
    print "Corpuses created"

    problem = OpinionProblem(corpus)
    classifier = NaiveBayes(corpus, problem)
    print "Classifier created"

    p = precision(classifier, test)
    print "Precision = {}".format(p)
Esempio n. 3
0
def main():
    # line count
    N = 0
    for _ in open(IRIS_PATH):
        N += 1
    testindexes = set(random.sample(xrange(N), N / 10))

    dataset = IrisDataset(IRIS_PATH, lambda i: i not in testindexes)
    testset = IrisDataset(IRIS_PATH, lambda i: i in testindexes)
    problem = VectorDataClassificationProblem(dataset, dataset.target_index)
    # Distance without target
    problem.distance = lambda x, y: euclidean_vector_distance(x[:-1], y[:-1])

    classifiers = {
        "K-Nearest Neighbours": KNearestNeighbors,
    }

    print "Precision:\n"
    for name, method in classifiers.iteritems():
        classifier = method(dataset, problem)
        p = precision(classifier, testset)
        print "{:>20} = {:.2}".format(name, p)
Esempio n. 4
0
def main():
    # line count
    N = 0
    for _ in open(IRIS_PATH):
        N += 1
    testindexes = set(random.sample(xrange(N), N / 10))

    dataset = IrisDataset(IRIS_PATH, lambda i: i not in testindexes)
    testset = IrisDataset(IRIS_PATH, lambda i: i in testindexes)
    problem = VectorDataClassificationProblem(dataset, dataset.target_index)
    # Distance without target
    problem.distance = lambda x, y: euclidean_vector_distance(x[:-1], y[:-1])

    classifiers = {
        "K-Nearest Neighbours": KNearestNeighbors,
        "Naive Bayes": NaiveBayes,
        "Decision Tree": DecisionTreeLearner_Queued,
    }

    print "Precision:\n"
    for name, method in classifiers.iteritems():
        classifier = method(dataset, problem)
        p = precision(classifier, testset)
        print "{:>20} = {:.2}".format(name, p)
N = 0
for _, filename in input_files:
    for _ in open(filename):
        N += 1
print "Corpus has {} examples".format(N)

# Choose test set, either 10% or 10000 examples, whatever is less
M = min(N / 10, 10000)
testindexes = set(random.sample(xrange(N), M))
print "Keeping {} examples for testing".format(M)

problem = LanguageClassificationProblem()
train = OnlineCorpusReader(input_files, lambda i: i not in testindexes)
test = OnlineCorpusReader(input_files, lambda i: i in testindexes)


print "Training Naive Bayes..."
classifier = NaiveBayes(train, problem)
print "Testing..."
p = precision(classifier, test)
print "Precision Naive Bayes = {}".format(p)


print "Training Decision Tree (large data)..."
classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500)
print "Final tree:"
print tree_to_str(classifier.root)
print "Testing..."
p = precision(classifier, test)
print "Precision Decision Tree = {}".format(p)
print("Counting examples")
# line count
N = 0
for _, filename in input_files:
    for _ in open(filename):
        N += 1
print("Corpus has {} examples".format(N))

# Choose test set, either 10% or 10000 examples, whatever is less
M = min(N / 10, 10000)
testindexes = set(random.sample(range(N), M))
print("Keeping {} examples for testing".format(M))

problem = LanguageClassificationProblem()
train = OnlineCorpusReader(input_files, lambda i: i not in testindexes)
test = OnlineCorpusReader(input_files, lambda i: i in testindexes)

print("Training Naive Bayes...")
classifier = NaiveBayes(train, problem)
print("Testing...")
p = precision(classifier, test)
print("Precision Naive Bayes = {}".format(p))

print("Training Decision Tree (large data)...")
classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500)
print("Final tree:")
print(tree_to_str(classifier.root))
print("Testing...")
p = precision(classifier, test)
print("Precision Decision Tree = {}".format(p))
N = 0
for _, filename in input_files:
    for _ in open(filename):
        N += 1
print "Corpus has {} examples".format(N)

# Choose test set, either 10% or 10000 examples, whatever is less
M = min(N / 10, 10000)
testindexes = set(random.sample(xrange(N), M))
print "Keeping {} examples for testing".format(M)

problem = LanguageClassificationProblem()
train = OnlineCorpusReader(input_files, lambda i: i not in testindexes)
test = OnlineCorpusReader(input_files, lambda i: i in testindexes)


print "Training Naive Bayes..."
classifier = NaiveBayes(train, problem)
print "Testing..."
p = precision(classifier, problem.target, test)
print "Precision Naive Bayes = {}".format(p)


print "Training Decision Tree (large data)..."
classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500)
print "Final tree:"
print tree_to_str(classifier.root)
print "Testing..."
p = precision(classifier, problem.target, test)
print "Precision Decision Tree = {}".format(p)