Example #1
0
def evaluate_on_trial_taxo():
    relations_fpath = join(RES_DIR,"relations.csv")  # assuming features "hyper_in_hypo_i" and "hypo2hyper_substract"
    taxo_fpath = relations_fpath + "-taxo.csv"
    print "Relations:", relations_fpath
    print "Unpruned taxonomy:", taxo_fpath

    taxo_features = TaxonomyFeatures(TaxonomyResources(), relations_fpath=relations_fpath, lang="en")       
    taxo_predict = TaxonomyPredictor(taxo_features)
    taxo_predict.predict_by_global_threshold(threshold=0, field="hypo2hyper_substract", or_correct_predict=False)
    taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True)
    taxo_predict.save(taxo_fpath)
    taxo_predict.evaluate(field="correct_predict")

    for max_knn in [1, 2, 3, 5]:
        taxo_knn_fpath = relations_fpath + "-taxo-knn" + unicode(max_knn) + ".csv"
        taxo_predict.predict_by_local_threshold(threshold=0, max_knn=max_knn, field="hypo2hyper_substract", or_correct_predict=False)
        taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True)
        taxo_predict.save(taxo_knn_fpath)
        taxo_predict.evaluate(field="correct_predict")       
Example #2
0
def evaluate_on_trial_taxo():
    relations_fpath = join(RES_DIR,"relations.csv")  # assuming features "hyper_in_hypo_i" and "hypo2hyper_substract"
    taxo_fpath = relations_fpath + "-taxo.csv"
    print "Relations:", relations_fpath
    print "Unpruned taxonomy:", taxo_fpath

    taxo_features = TaxonomyFeatures(TaxonomyResources(), relations_fpath=relations_fpath, lang="en")
    taxo_predict = TaxonomyPredictor(taxo_features)
    taxo_predict.predict_by_global_threshold(threshold=0, field="hypo2hyper_substract", or_correct_predict=False)
    taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True)
    taxo_predict.save(taxo_fpath)
    taxo_predict.evaluate(field="correct_predict")

    for max_knn in [1, 2, 3, 5]:
        taxo_knn_fpath = relations_fpath + "-taxo-knn" + unicode(max_knn) + ".csv"
        taxo_predict.predict_by_local_threshold(threshold=0, max_knn=max_knn, field="hypo2hyper_substract", or_correct_predict=False)
        taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True)
        taxo_predict.save(taxo_knn_fpath)
        taxo_predict.evaluate(field="correct_predict")
Example #3
0
def extract_semeval_taxo(input_voc_pattern, language, mode, classifiers_pattern):
    taxo_res_common, taxo_res_domain = load_res(language, mode) 
        
    for voc_fpath in sorted(glob(input_voc_pattern)):
        for space in [False]: #, True]:
            s = "-space" if space else ""
            relations_fpath = voc_fpath + s + "-relations.csv"
            taxo_fpath = relations_fpath + "-taxo.csv"
            print "\n", voc_fpath, "\n", "="*50
            print "Relations:", relations_fpath
            print "Unpruned taxonomy:", taxo_fpath
            
            taxo_res_domain_voc = get_taxo_res_domain_voc(taxo_res_domain, voc_fpath)
            taxo_res_voc = combine_taxo_res(taxo_res_common, taxo_res_domain_voc)
            taxo_features = TaxonomyFeatures(taxo_res_voc, voc_fpath, lang=language)       
            
            if mode == "simple":
                taxo_features.fill_direct_isas()
                taxo_features.fill_substrings(must_have_space=space)
                taxo_features.hypo2hyper_ratio()
                taxo_predict = TaxonomyPredictor(taxo_features)
                taxo_predict.predict_by_global_threshold(threshold=0, field="hypo2hyper_substract", or_correct_predict=False)
                taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True)
                taxo_predict.save(taxo_fpath)
            
                for max_knn in [1, 2, 3, 5]:
                    taxo_knn_fpath = relations_fpath + "-taxo-knn" + unicode(max_knn) + ".csv"
                    taxo_predict.predict_by_local_threshold(threshold=0, max_knn=max_knn, field="hypo2hyper_substract", or_correct_predict=False)
                    taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True)
                    taxo_predict.save(taxo_knn_fpath)
                    
            elif mode == "super":
                taxo_features.fill_super_features()

                for classifier_dir in glob(classifiers_pattern):
                    try:
                        print "Predicting with:", classifier_dir
                        taxo_predict = TaxonomyPredictor(taxo_features)
                        method = taxo_predict.predict_by_classifier(classifier_dir)
                        taxo_predict.save(taxo_fpath + "-" + method + ".csv")
                        taxo_predict.save(taxo_fpath + "-" + method + "-conf.csv", conf=True)
                    except:
                        print format_exc()
Example #4
0
def extract_semeval_taxo(input_voc_pattern, language, mode, classifiers_pattern, test_en):
    #Laedt alle Datensaetze(auch alle Domaenen, aus vocabularies)
    taxo_res_common, taxo_res_domain = load_res(language, mode, test_en)

    for voc_fpath in sorted(glob(input_voc_pattern)):
        for space in [False, True]:
            s = "-space" if space else ""
            relations_fpath = voc_fpath + s + "-relations.csv"
            taxo_fpath = relations_fpath + "-taxo.csv"
            print "\n", voc_fpath, "\n", "="*50
            print "Relations:", relations_fpath
            print "Unpruned taxonomy:", taxo_fpath

            #Laedt domain-datenset und kombiniert sie mit dem allgemeinen Datenset
            taxo_res_domain_voc = get_taxo_res_domain_voc(taxo_res_domain, voc_fpath)
            taxo_res_voc = combine_taxo_res(taxo_res_common, taxo_res_domain_voc)
            taxo_features = TaxonomyFeatures(taxo_res_voc, voc_fpath, lang=language)

            if mode == "simple":
                taxo_features.fill_direct_isas()
                taxo_features.fill_substrings(must_have_space=space)
                taxo_features.hypo2hyper_ratio()
                taxo_predict = TaxonomyPredictor(taxo_features)
                taxo_predict.predict_by_global_threshold(threshold=0, field="hypo2hyper_substract", or_correct_predict=False)
                taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True)
                taxo_predict.save(taxo_fpath)

                for max_knn in [1, 2, 3, 5]:
                    #hypo2hyper fuer pattern
                    #hyperinhypoi feur substring
                    taxo_knn_fpath = relations_fpath + "-taxo-knn" + unicode(max_knn) + ".csv"
                    taxo_predict.predict_by_local_threshold(threshold=0, max_knn=max_knn, field="hypo2hyper_substract", or_correct_predict=False)
                    taxo_predict.predict_by_global_threshold(threshold=0, field="hyper_in_hypo_i", or_correct_predict=True)
                    taxo_predict.save(taxo_knn_fpath)

            elif mode == "super":
                taxo_features.fill_super_features()

                for classifier_dir in glob(classifiers_pattern):
                    try:
                        print "Predicting with:", classifier_dir
                        taxo_predict = TaxonomyPredictor(taxo_features)
                        method = taxo_predict.predict_by_classifier(classifier_dir)
                        taxo_predict.save(taxo_fpath + "-" + method + ".csv")
                        taxo_predict.save(taxo_fpath + "-" + method + "-conf.csv", conf=True)
                    except:
                        print format_exc()