def evaluate_on_trial_taxo(): relations_fpath = join(RES_DIR,"relations.csv") # assuming features "hyper_in_hypo_i" and "hypo2hyper_substract" taxo_fpath = relations_fpath + "-taxo.csv" print "Relations:", relations_fpath print "Unpruned taxonomy:", taxo_fpath taxo_features = TaxonomyFeatures(TaxonomyResources(), relations_fpath=relations_fpath, lang="en") taxo_predict = TaxonomyPredictor(taxo_features) taxo_predict.predict_by_global_threshold(threshold=0, field="hypo2hyper_substract", or_correct_predict=False) taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True) taxo_predict.save(taxo_fpath) taxo_predict.evaluate(field="correct_predict") for max_knn in [1, 2, 3, 5]: taxo_knn_fpath = relations_fpath + "-taxo-knn" + unicode(max_knn) + ".csv" taxo_predict.predict_by_local_threshold(threshold=0, max_knn=max_knn, field="hypo2hyper_substract", or_correct_predict=False) taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True) taxo_predict.save(taxo_knn_fpath) taxo_predict.evaluate(field="correct_predict")
def extract_semeval_taxo(input_voc_pattern, language, mode, classifiers_pattern): taxo_res_common, taxo_res_domain = load_res(language, mode) for voc_fpath in sorted(glob(input_voc_pattern)): for space in [False]: #, True]: s = "-space" if space else "" relations_fpath = voc_fpath + s + "-relations.csv" taxo_fpath = relations_fpath + "-taxo.csv" print "\n", voc_fpath, "\n", "="*50 print "Relations:", relations_fpath print "Unpruned taxonomy:", taxo_fpath taxo_res_domain_voc = get_taxo_res_domain_voc(taxo_res_domain, voc_fpath) taxo_res_voc = combine_taxo_res(taxo_res_common, taxo_res_domain_voc) taxo_features = TaxonomyFeatures(taxo_res_voc, voc_fpath, lang=language) if mode == "simple": taxo_features.fill_direct_isas() taxo_features.fill_substrings(must_have_space=space) taxo_features.hypo2hyper_ratio() taxo_predict = TaxonomyPredictor(taxo_features) taxo_predict.predict_by_global_threshold(threshold=0, field="hypo2hyper_substract", or_correct_predict=False) taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True) taxo_predict.save(taxo_fpath) for max_knn in [1, 2, 3, 5]: taxo_knn_fpath = relations_fpath + "-taxo-knn" + unicode(max_knn) + ".csv" taxo_predict.predict_by_local_threshold(threshold=0, max_knn=max_knn, field="hypo2hyper_substract", or_correct_predict=False) taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True) taxo_predict.save(taxo_knn_fpath) elif mode == "super": taxo_features.fill_super_features() for classifier_dir in glob(classifiers_pattern): try: print "Predicting with:", classifier_dir taxo_predict = TaxonomyPredictor(taxo_features) method = taxo_predict.predict_by_classifier(classifier_dir) taxo_predict.save(taxo_fpath + "-" + method + ".csv") taxo_predict.save(taxo_fpath + "-" + method + "-conf.csv", conf=True) except: print format_exc()
def extract_semeval_taxo(input_voc_pattern, language, mode, classifiers_pattern, test_en): #Laedt alle Datensaetze(auch alle Domaenen, aus vocabularies) taxo_res_common, taxo_res_domain = load_res(language, mode, test_en) for voc_fpath in sorted(glob(input_voc_pattern)): for space in [False, True]: s = "-space" if space else "" relations_fpath = voc_fpath + s + "-relations.csv" taxo_fpath = relations_fpath + "-taxo.csv" print "\n", voc_fpath, "\n", "="*50 print "Relations:", relations_fpath print "Unpruned taxonomy:", taxo_fpath #Laedt domain-datenset und kombiniert sie mit dem allgemeinen Datenset taxo_res_domain_voc = get_taxo_res_domain_voc(taxo_res_domain, voc_fpath) taxo_res_voc = combine_taxo_res(taxo_res_common, taxo_res_domain_voc) taxo_features = TaxonomyFeatures(taxo_res_voc, voc_fpath, lang=language) if mode == "simple": taxo_features.fill_direct_isas() taxo_features.fill_substrings(must_have_space=space) taxo_features.hypo2hyper_ratio() taxo_predict = TaxonomyPredictor(taxo_features) taxo_predict.predict_by_global_threshold(threshold=0, field="hypo2hyper_substract", or_correct_predict=False) taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True) taxo_predict.save(taxo_fpath) for max_knn in [1, 2, 3, 5]: #hypo2hyper fuer pattern #hyperinhypoi feur substring taxo_knn_fpath = relations_fpath + "-taxo-knn" + unicode(max_knn) + ".csv" taxo_predict.predict_by_local_threshold(threshold=0, max_knn=max_knn, field="hypo2hyper_substract", or_correct_predict=False) taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True) taxo_predict.save(taxo_knn_fpath) elif mode == "super": taxo_features.fill_super_features() for classifier_dir in glob(classifiers_pattern): try: print "Predicting with:", classifier_dir taxo_predict = TaxonomyPredictor(taxo_features) method = taxo_predict.predict_by_classifier(classifier_dir) taxo_predict.save(taxo_fpath + "-" + method + ".csv") taxo_predict.save(taxo_fpath + "-" + method + "-conf.csv", conf=True) except: print format_exc()