def est_acc(size=1): """Returns a list of booleans that shows if classifier guess is correct or not size is number that document would divide to; it can be used for cross validation """ all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt") all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs] split_point = int(0.8 * len(all_docs)) results = [] train_docs = all_docs[:split_point] eval_docs = all_docs[split_point:] trained_data_pices = [train_docs[i : i + size] for i in range(0, len(train_docs), size)] for n in range(0, size): trained_data = assignment1.train_nb(train_docs[n : int((n + 1) * (len(train_docs) / size))]) results.append(assignment1.evaluate_nb(trained_data, eval_docs)) return results
def cross_val(N=5): """Returns Returns a list of booleans that shows if classifier guess is correct or not for whole test iterations And it prints confidence interval of whole test iterations N is number for iteration in document to divided to training and test parts """ all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt") all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs] results = [] for fold_nbr in range(N): split_point_1 = int(float(fold_nbr) / N * len(all_docs)) split_point_2 = int(float(fold_nbr + 1) / N * len(all_docs)) train_docs = all_docs[:split_point_1] + all_docs[split_point_2:] eval_docs = all_docs[split_point_1:split_point_2] trained_data = assignment1.train_nb(train_docs) for (s, d) in eval_docs: results.append(s == assignment1.classify_nb(trained_data, d)) print acc_ci(results, 0.95) return results
def est_acc(size=1): """Returns a list of booleans that shows if classifier guess is correct or not size is number that document would divide to; it can be used for cross validation """ all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt") all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs] split_point = int(0.8 * len(all_docs)) results = [] train_docs = all_docs[:split_point] eval_docs = all_docs[split_point:] trained_data_pices = [ train_docs[i:i + size] for i in range(0, len(train_docs), size) ] for n in range(0, size): trained_data = assignment1.train_nb( train_docs[n:int((n + 1) * (len(train_docs) / size))]) results.append(assignment1.evaluate_nb(trained_data, eval_docs)) return results
def classify(classifier): """Returns a list of booleans that shows if classifier guess is correct or not classifier is either assignment1 or scikit classifier """ all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt") all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs] split_point = int(0.8 * len(all_docs)) results = [] train_docs = all_docs[:split_point] eval_docs = all_docs[split_point:] if classifier == "assignment1": trained_data = assignment1.train_nb(train_docs) for (s, d) in eval_docs: results.append(s == assignment1.classify_nb(trained_data, d)) elif classifier == "scikit": trained_data = ec.train_sk(train_docs) for (s, d) in eval_docs: results.append(s == ec.classify_sk(d, trained_data)) else: print "Please set classifier as assignment1 or scikit" return results