N = 0 for _, filename in input_files: for _ in open(filename): N += 1 print "Corpus has {} examples".format(N) # Choose test set, either 10% or 10000 examples, whatever is less M = min(N / 10, 10000) testindexes = set(random.sample(xrange(N), M)) print "Keeping {} examples for testing".format(M) problem = LanguageClassificationProblem() train = OnlineCorpusReader(input_files, lambda i: i not in testindexes) test = OnlineCorpusReader(input_files, lambda i: i in testindexes) print "Training Naive Bayes..." classifier = NaiveBayes(train, problem) print "Testing..." p = precision(classifier, test) print "Precision Naive Bayes = {}".format(p) print "Training Decision Tree (large data)..." classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500) print "Final tree:" print tree_to_str(classifier.root) print "Testing..." p = precision(classifier, test) print "Precision Decision Tree = {}".format(p)
print("Counting examples") # line count N = 0 for _, filename in input_files: for _ in open(filename): N += 1 print("Corpus has {} examples".format(N)) # Choose test set, either 10% or 10000 examples, whatever is less M = min(N / 10, 10000) testindexes = set(random.sample(range(N), M)) print("Keeping {} examples for testing".format(M)) problem = LanguageClassificationProblem() train = OnlineCorpusReader(input_files, lambda i: i not in testindexes) test = OnlineCorpusReader(input_files, lambda i: i in testindexes) print("Training Naive Bayes...") classifier = NaiveBayes(train, problem) print("Testing...") p = precision(classifier, test) print("Precision Naive Bayes = {}".format(p)) print("Training Decision Tree (large data)...") classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500) print("Final tree:") print(tree_to_str(classifier.root)) print("Testing...") p = precision(classifier, test) print("Precision Decision Tree = {}".format(p))
(False, True, True, False, 'Full', '$', True, False, 'Burger', '>60', False), (True, True, True, True, 'Full', '$$$', False, True, 'Italian', '10-30', False), (False, False, False, False, 'None', '$', False, False, 'Thai', '0-10', False), (True, True, True, True, 'Full', '$', False, False, 'Burger', '30-60', True), ] class RestaurantProblem(ClassificationProblem): def __init__(self): super(RestaurantProblem, self).__init__() names = [ 'Alternative', 'Bar', 'Fri', 'Hungry', 'Pattern', 'Price', 'Rain', 'Res', 'Type', 'Estimate' ] for i, name in enumerate(names): a = VectorIndexAttribute(i, name) self.attributes.append(a) def target(self, example): return example[10] problema = RestaurantProblem() arbol = DecisionTreeLearner_Queued(RestaurantDataset, problema) print tree_to_str(arbol.root)
print "Counting examples" # line count N = 0 for _, filename in input_files: for _ in open(filename): N += 1 print "Corpus has {} examples".format(N) # Choose test set, either 10% or 10000 examples, whatever is less M = min(N / 10, 10000) testindexes = set(random.sample(xrange(N), M)) print "Keeping {} examples for testing".format(M) problem = LanguageClassificationProblem() train = OnlineCorpusReader(input_files, lambda i: i not in testindexes) test = OnlineCorpusReader(input_files, lambda i: i in testindexes) print "Training Naive Bayes..." classifier = NaiveBayes(train, problem) print "Testing..." p = precision(classifier, test) print "Precision Naive Bayes = {}".format(p) print "Training Decision Tree (large data)..." classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500) print "Final tree:" print tree_to_str(classifier.root) print "Testing..." p = precision(classifier, test) print "Precision Decision Tree = {}".format(p)
N = 0 for _, filename in input_files: for _ in open(filename): N += 1 print("Corpus has {} examples".format(N)) # Choose test set, either 10% or 10000 examples, whatever is less M = min(N / 10, 10000) testindexes = set(random.sample(range(N), M)) print("Keeping {} examples for testing".format(M)) problem = LanguageClassificationProblem() train = OnlineCorpusReader(input_files, lambda i: i not in testindexes) test = OnlineCorpusReader(input_files, lambda i: i in testindexes) print("Training Naive Bayes...") classifier = NaiveBayes(train, problem) print("Testing...") p = precision(classifier, test) print("Precision Naive Bayes = {}".format(p)) print("Training Decision Tree (large data)...") classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500) print("Final tree:") print(tree_to_str(classifier.root)) print("Testing...") p = precision(classifier, test) print("Precision Decision Tree = {}".format(p))