N = 0
for _, filename in input_files:
    for _ in open(filename):
        N += 1
print "Corpus has {} examples".format(N)

# Choose test set, either 10% or 10000 examples, whatever is less
M = min(N / 10, 10000)
testindexes = set(random.sample(xrange(N), M))
print "Keeping {} examples for testing".format(M)

problem = LanguageClassificationProblem()
train = OnlineCorpusReader(input_files, lambda i: i not in testindexes)
test = OnlineCorpusReader(input_files, lambda i: i in testindexes)


print "Training Naive Bayes..."
classifier = NaiveBayes(train, problem)
print "Testing..."
p = precision(classifier, test)
print "Precision Naive Bayes = {}".format(p)


print "Training Decision Tree (large data)..."
classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500)
print "Final tree:"
print tree_to_str(classifier.root)
print "Testing..."
p = precision(classifier, test)
print "Precision Decision Tree = {}".format(p)
print("Counting examples")
# line count
N = 0
for _, filename in input_files:
    for _ in open(filename):
        N += 1
print("Corpus has {} examples".format(N))

# Choose test set, either 10% or 10000 examples, whatever is less
M = min(N / 10, 10000)
testindexes = set(random.sample(range(N), M))
print("Keeping {} examples for testing".format(M))

problem = LanguageClassificationProblem()
train = OnlineCorpusReader(input_files, lambda i: i not in testindexes)
test = OnlineCorpusReader(input_files, lambda i: i in testindexes)

print("Training Naive Bayes...")
classifier = NaiveBayes(train, problem)
print("Testing...")
p = precision(classifier, test)
print("Precision Naive Bayes = {}".format(p))

print("Training Decision Tree (large data)...")
classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500)
print("Final tree:")
print(tree_to_str(classifier.root))
print("Testing...")
p = precision(classifier, test)
print("Precision Decision Tree = {}".format(p))
예제 #3
0
    (False, True, True, False, 'Full', '$', True, False, 'Burger', '>60',
     False),
    (True, True, True, True, 'Full', '$$$', False, True, 'Italian', '10-30',
     False),
    (False, False, False, False, 'None', '$', False, False, 'Thai', '0-10',
     False),
    (True, True, True, True, 'Full', '$', False, False, 'Burger', '30-60',
     True),
]


class RestaurantProblem(ClassificationProblem):
    def __init__(self):
        super(RestaurantProblem, self).__init__()
        names = [
            'Alternative', 'Bar', 'Fri', 'Hungry', 'Pattern', 'Price', 'Rain',
            'Res', 'Type', 'Estimate'
        ]
        for i, name in enumerate(names):
            a = VectorIndexAttribute(i, name)
            self.attributes.append(a)

    def target(self, example):
        return example[10]


problema = RestaurantProblem()
arbol = DecisionTreeLearner_Queued(RestaurantDataset, problema)

print tree_to_str(arbol.root)
print "Counting examples"
# line count
N = 0
for _, filename in input_files:
    for _ in open(filename):
        N += 1
print "Corpus has {} examples".format(N)

# Choose test set, either 10% or 10000 examples, whatever is less
M = min(N / 10, 10000)
testindexes = set(random.sample(xrange(N), M))
print "Keeping {} examples for testing".format(M)

problem = LanguageClassificationProblem()
train = OnlineCorpusReader(input_files, lambda i: i not in testindexes)
test = OnlineCorpusReader(input_files, lambda i: i in testindexes)

print "Training Naive Bayes..."
classifier = NaiveBayes(train, problem)
print "Testing..."
p = precision(classifier, test)
print "Precision Naive Bayes = {}".format(p)

print "Training Decision Tree (large data)..."
classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500)
print "Final tree:"
print tree_to_str(classifier.root)
print "Testing..."
p = precision(classifier, test)
print "Precision Decision Tree = {}".format(p)
N = 0
for _, filename in input_files:
    for _ in open(filename):
        N += 1
print("Corpus has {} examples".format(N))

# Choose test set, either 10% or 10000 examples, whatever is less
M = min(N / 10, 10000)
testindexes = set(random.sample(range(N), M))
print("Keeping {} examples for testing".format(M))

problem = LanguageClassificationProblem()
train = OnlineCorpusReader(input_files, lambda i: i not in testindexes)
test = OnlineCorpusReader(input_files, lambda i: i in testindexes)


print("Training Naive Bayes...")
classifier = NaiveBayes(train, problem)
print("Testing...")
p = precision(classifier, test)
print("Precision Naive Bayes = {}".format(p))


print("Training Decision Tree (large data)...")
classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500)
print("Final tree:")
print(tree_to_str(classifier.root))
print("Testing...")
p = precision(classifier, test)
print("Precision Decision Tree = {}".format(p))