def run_tests(include_ner_subcat=True): ids, utters = get_data("./TestingData/Input1.txt") ids, labels = get_data("./TestingData/Annotation1.txt") if include_ner_subcat: ids, labels_ner = get_data("./TestingData/Ann_NERSub.txt") for idx in xrange(len(ids)): utter_idx = utters[idx].split() labels_idx = labels[idx].split() if include_ner_subcat: labels_ner_idx = labels_ner[idx].split() assert len(utter_idx) == len( labels_idx ), "mismatch in length of label_length/utter_length in Annotation1.txt at utter %d" % ids[ idx] if include_ner_subcat: assert len(utter_idx) == len( labels_ner_idx ), "mismatch in length of label_length/utter_length in Ann_NERSub.txt at utter %d" % ids[ idx] for label in labels_idx: assert label in ALLOWED_LABELS_WITHOUT_NERSUB, "some invalid label %s found in Annotation1.txt at utter %d" % ( label, ids[idx]) if include_ner_subcat: for label in labels_ner_idx: assert label in ALLOWED_LABELS_WITH_NERSUB, "some invalid label %s found in Ann_NERSub.txt at utter %d" % ( label, ids[idx]) for sub_idx in xrange(len(labels_idx)): if labels_idx[sub_idx] == "NE": assert labels_ner_idx[sub_idx].startswith( "NE" ) or labels_ner_idx[ sub_idx] == "en", "some invalid label %s found in Ann_NERSub.txt at utter %d" % ( labels_idx[sub_idx], ids[idx])
def run_tests(include_ner_subcat = True): ids,utters = get_data("./TestingData/Input1.txt") ids,labels = get_data("./TestingData/Annotation1.txt") if include_ner_subcat: ids,labels_ner = get_data("./TestingData/Ann_NERSub.txt") for idx in xrange(len(ids)): utter_idx = utters[idx].split() labels_idx = labels[idx].split() if include_ner_subcat: labels_ner_idx = labels_ner[idx].split() assert len(utter_idx) == len(labels_idx),"mismatch in length of label_length/utter_length in Annotation1.txt at utter %d" % ids[idx] if include_ner_subcat: assert len(utter_idx) == len(labels_ner_idx),"mismatch in length of label_length/utter_length in Ann_NERSub.txt at utter %d" % ids[idx] for label in labels_idx: assert label in ALLOWED_LABELS_WITHOUT_NERSUB, "some invalid label %s found in Annotation1.txt at utter %d" % (label,ids[idx]) if include_ner_subcat: for label in labels_ner_idx: assert label in ALLOWED_LABELS_WITH_NERSUB, "some invalid label %s found in Ann_NERSub.txt at utter %d" % (label,ids[idx]) for sub_idx in xrange(len(labels_idx)): if labels_idx[sub_idx] == "NE": assert labels_ner_idx[sub_idx].startswith("NE") or labels_ner_idx[sub_idx] == "en","some invalid label %s found in Ann_NERSub.txt at utter %d" % (labels_idx[sub_idx],ids[idx])
print "confidence here" if diff >= confidence_threshold: final_class = '/'.join([key for key in class_points.keys() if class_points[key] == first_max]) if len(final_class.split('/'))>1: final_class = final_class.split('/')[0] elif self.confidence_flag: final_class = 'en' else: print "no confidence here" self.confidence_flag = 1 final_class = '/'.join([key for key in class_points.keys() if class_points[key] == first_max]) if len(final_class.split('/'))>1: final_class = final_class.split('/')[0] class_verification_files(QUERY, final_class) print "Final class selected for this token is "+final_class return final_class clf = UnsupervisedWikipediaClassifier() id_list, annotation_list = get_data("%s/Annotation1.txt" % dir_path) id_list, utterance_list = get_data("%s/Input1.txt" % dir_path) final_annotation_list = ner_sub_category(id_list, annotation_list, utterance_list) print "Storing parsed wiki content into .temp file" clf.store_wiki_summary() print "Storing done" print "Preparing Submission format" prepare_submission_nersub(id_list, final_annotation_list) print "Submission format prepared and file saved in specified directory" print "Running tests" print run_tests(include_ner_subcat=True) print annotation_count_test("./TestingData/Ann_NERSub.txt")
final_class = final_class.split('/')[0] elif self.confidence_flag: final_class = 'en' else: print "no confidence here" self.confidence_flag = 1 final_class = '/'.join([ key for key in class_points.keys() if class_points[key] == first_max ]) if len(final_class.split('/')) > 1: final_class = final_class.split('/')[0] class_verification_files(QUERY, final_class) print "Final class selected for this token is " + final_class return final_class clf = UnsupervisedWikipediaClassifier() id_list, annotation_list = get_data("%s/Annotation1.txt" % dir_path) id_list, utterance_list = get_data("%s/Input1.txt" % dir_path) final_annotation_list = ner_sub_category(id_list, annotation_list, utterance_list) print "Storing parsed wiki content into .temp file" clf.store_wiki_summary() print "Storing done" print "Preparing Submission format" prepare_submission_nersub(id_list, final_annotation_list) print "Submission format prepared and file saved in specified directory" print "Running tests" print run_tests(include_ner_subcat=True) print annotation_count_test("./TestingData/Ann_NERSub.txt")