def main(): input_files = { "pro": PRO_CORPUS_PATH, "con": CON_CORPUS_PATH, } # line count N = 0 for _, filename in input_files.items(): for _ in open(filename): N += 1 print "Corpus has {} examples".format(N) # Choose test set, either 10% or 10000 examples, whatever is less M = min(N / 10, 1000) testindexes = set(random.sample(xrange(N), M)) corpus = ProConsCorpus(input_files, lambda i: i not in testindexes) test = ProConsCorpus(input_files, lambda i: i in testindexes) print "Corpuses created" problem = OpinionProblem(corpus) classifier = NaiveBayes(corpus, problem) print "Classifier created" p = precision(classifier, test) print "Precision = {}".format(p)
def main(): # line count N = 0 for _ in open(IRIS_PATH): N += 1 testindexes = set(random.sample(xrange(N), N / 10)) dataset = IrisDataset(IRIS_PATH, lambda i: i not in testindexes) testset = IrisDataset(IRIS_PATH, lambda i: i in testindexes) problem = VectorDataClassificationProblem(dataset, dataset.target_index) # Distance without target problem.distance = lambda x, y: euclidean_vector_distance(x[:-1], y[:-1]) classifiers = { "K-Nearest Neighbours": KNearestNeighbors, } print "Precision:\n" for name, method in classifiers.iteritems(): classifier = method(dataset, problem) p = precision(classifier, testset) print "{:>20} = {:.2}".format(name, p)
def main(): # line count N = 0 for _ in open(IRIS_PATH): N += 1 testindexes = set(random.sample(xrange(N), N / 10)) dataset = IrisDataset(IRIS_PATH, lambda i: i not in testindexes) testset = IrisDataset(IRIS_PATH, lambda i: i in testindexes) problem = VectorDataClassificationProblem(dataset, dataset.target_index) # Distance without target problem.distance = lambda x, y: euclidean_vector_distance(x[:-1], y[:-1]) classifiers = { "K-Nearest Neighbours": KNearestNeighbors, "Naive Bayes": NaiveBayes, "Decision Tree": DecisionTreeLearner_Queued, } print "Precision:\n" for name, method in classifiers.iteritems(): classifier = method(dataset, problem) p = precision(classifier, testset) print "{:>20} = {:.2}".format(name, p)
N = 0 for _, filename in input_files: for _ in open(filename): N += 1 print "Corpus has {} examples".format(N) # Choose test set, either 10% or 10000 examples, whatever is less M = min(N / 10, 10000) testindexes = set(random.sample(xrange(N), M)) print "Keeping {} examples for testing".format(M) problem = LanguageClassificationProblem() train = OnlineCorpusReader(input_files, lambda i: i not in testindexes) test = OnlineCorpusReader(input_files, lambda i: i in testindexes) print "Training Naive Bayes..." classifier = NaiveBayes(train, problem) print "Testing..." p = precision(classifier, test) print "Precision Naive Bayes = {}".format(p) print "Training Decision Tree (large data)..." classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500) print "Final tree:" print tree_to_str(classifier.root) print "Testing..." p = precision(classifier, test) print "Precision Decision Tree = {}".format(p)
print("Counting examples") # line count N = 0 for _, filename in input_files: for _ in open(filename): N += 1 print("Corpus has {} examples".format(N)) # Choose test set, either 10% or 10000 examples, whatever is less M = min(N / 10, 10000) testindexes = set(random.sample(range(N), M)) print("Keeping {} examples for testing".format(M)) problem = LanguageClassificationProblem() train = OnlineCorpusReader(input_files, lambda i: i not in testindexes) test = OnlineCorpusReader(input_files, lambda i: i in testindexes) print("Training Naive Bayes...") classifier = NaiveBayes(train, problem) print("Testing...") p = precision(classifier, test) print("Precision Naive Bayes = {}".format(p)) print("Training Decision Tree (large data)...") classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500) print("Final tree:") print(tree_to_str(classifier.root)) print("Testing...") p = precision(classifier, test) print("Precision Decision Tree = {}".format(p))
N = 0 for _, filename in input_files: for _ in open(filename): N += 1 print "Corpus has {} examples".format(N) # Choose test set, either 10% or 10000 examples, whatever is less M = min(N / 10, 10000) testindexes = set(random.sample(xrange(N), M)) print "Keeping {} examples for testing".format(M) problem = LanguageClassificationProblem() train = OnlineCorpusReader(input_files, lambda i: i not in testindexes) test = OnlineCorpusReader(input_files, lambda i: i in testindexes) print "Training Naive Bayes..." classifier = NaiveBayes(train, problem) print "Testing..." p = precision(classifier, problem.target, test) print "Precision Naive Bayes = {}".format(p) print "Training Decision Tree (large data)..." classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500) print "Final tree:" print tree_to_str(classifier.root) print "Testing..." p = precision(classifier, problem.target, test) print "Precision Decision Tree = {}".format(p)