sys.path.insert(0, '../..') from lxml import etree from rdflib import Graph from nltk.corpus import framenet as fn import FrameNetNLTK from FrameNetNLTK import add_annotations_from_naf_31 from FrameNetNLTK import load, get_luid from rdflib.term import URIRef naf_path = 'test_naf_files/predicate_in_compound.naf' corpus_name = 'HDD' corpus_description = 'HistoricalDistanceData' my_fn = load(folder='test_lexicon', verbose=2) # load Dutch FrameNet in Lemon path_dfn_in_lemon = 'stats/dfn_0.1.ttl' dfn_in_lemon = Graph() dfn_in_lemon.parse(path_dfn_in_lemon, format='ttl') # update NAF file with correct LU URIs parser = etree.XMLParser(remove_blank_text=True, strip_cdata=False) doc = etree.parse(naf_path, parser) # ext_ref_el for predicate annotation ext_ref_els = doc.findall('srl/predicate/externalReferences/externalRef') assert len(ext_ref_els ) == 1, f'expected 2 externalRef elements, found {len(ext_ref_els)}' ext_ref_el = ext_ref_els[0]
verbose=2) add_lu_to_info(your_fn=fn, language='en', premon=LexicalDataD2TAnnotationTool.premon, namespace='http://rdf.cltl.nl/efn/', major_version=1, minor_version=7, output_folder=out_dir, verbose=2) add_lemma_to_pos_to_lu_urls(output_folder=out_dir, language='en', verbose=2) fn_nl = load('res/DutchFrameNet-0.1') add_lu_to_info(your_fn=fn_nl, language='nl', premon=LexicalDataD2TAnnotationTool.premon, namespace='http://rdf.cltl.nl/dfn/', major_version=0, minor_version=1, output_folder=out_dir, verbose=2) add_lemma_to_pos_to_lu_urls(output_folder=out_dir, language='nl', verbose=2)
import sys import pytest sys.path.insert(0, '../..') from FrameNetNLTK import load, remove_lu, get_luid fn = load('../test/test_lexicon') lu_id, reason = get_luid(my_fn=fn, frame_label='People_by_origin', lemma='Fransman', pos='N') remove_lu(your_lexicon_folder='test_lexicon', lu_id=lu_id, verbose=2) lu_id, reason = get_luid(my_fn=fn, frame_label='Appellations', lemma='president', pos='N') try: remove_lu(your_lexicon_folder='test_lexicon', lu_id=lu_id, verbose=2) except AssertionError: print(f'AssertionError was correctly raised for lu_id {lu_id} since it is part of an endocentric compound.')
import sys sys.path.insert(0, '../..') from FrameNetNLTK import load, get_frame_stats_df, get_lu_stats_df, get_lu_per_pos_stats_df, get_lexeme_stats_df, get_ambiguity_df from FrameNetNLTK import get_stats_html my_fn = load('test_lexicon') frame_df = get_frame_stats_df(my_fn) print(frame_df) lu_stats_df = get_lu_stats_df(my_fn) print(lu_stats_df) lu_per_pos_stats_df = get_lu_per_pos_stats_df(my_fn) print(lu_per_pos_stats_df) lexeme_stats_df = get_lexeme_stats_df(my_fn) print(lexeme_stats_df) ambiguity_df = get_ambiguity_df(my_fn) print(ambiguity_df) get_stats_html(your_fn=my_fn, html_path='stats/descriptive_statistics.html')