def eval_by_labale(lable, docs, trained_data): """Calculate precision and recall and return it as dictionary structure """ corr_lab = [ (l, doc) for (l, doc) in docs if l == lable and lable == assignment1.classify_nb(trained_data, doc) ] # number of labeled docs which is correct gss_lab = [ (l, doc) for (l, doc) in docs if lable == assignment1.classify_nb(trained_data, doc) ] # number of labeled docs which is correct all_lab = [(l, doc) for (l, doc) in docs if l == lable] # number of labeled docs which is correct return {"precision": len(corr_lab) / len(gss_lab), "recall": len(corr_lab) / len(all_lab)}
def eval_by_labale(lable, docs, trained_data): """Calculate precision and recall and return it as dictionary structure """ corr_lab = [ (l, doc) for (l, doc) in docs if l == lable and lable == assignment1.classify_nb(trained_data, doc) ] # number of labeled docs which is correct gss_lab = [(l, doc) for (l, doc) in docs if lable == assignment1.classify_nb(trained_data, doc) ] # number of labeled docs which is correct all_lab = [(l, doc) for (l, doc) in docs if l == lable] # number of labeled docs which is correct return { "precision": len(corr_lab) / len(gss_lab), "recall": len(corr_lab) / len(all_lab) }
def cross_val(N=5): """Returns Returns a list of booleans that shows if classifier guess is correct or not for whole test iterations And it prints confidence interval of whole test iterations N is number for iteration in document to divided to training and test parts """ all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt") all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs] results = [] for fold_nbr in range(N): split_point_1 = int(float(fold_nbr) / N * len(all_docs)) split_point_2 = int(float(fold_nbr + 1) / N * len(all_docs)) train_docs = all_docs[:split_point_1] + all_docs[split_point_2:] eval_docs = all_docs[split_point_1:split_point_2] trained_data = assignment1.train_nb(train_docs) for (s, d) in eval_docs: results.append(s == assignment1.classify_nb(trained_data, d)) print acc_ci(results, 0.95) return results
def classify(classifier): """Returns a list of booleans that shows if classifier guess is correct or not classifier is either assignment1 or scikit classifier """ all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt") all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs] split_point = int(0.8 * len(all_docs)) results = [] train_docs = all_docs[:split_point] eval_docs = all_docs[split_point:] if classifier == "assignment1": trained_data = assignment1.train_nb(train_docs) for (s, d) in eval_docs: results.append(s == assignment1.classify_nb(trained_data, d)) elif classifier == "scikit": trained_data = ec.train_sk(train_docs) for (s, d) in eval_docs: results.append(s == ec.classify_sk(d, trained_data)) else: print "Please set classifier as assignment1 or scikit" return results