sys.path.insert(0, '../..')

from lxml import etree
from rdflib import Graph

from nltk.corpus import framenet as fn
import FrameNetNLTK
from FrameNetNLTK import add_annotations_from_naf_31
from FrameNetNLTK import load, get_luid
from rdflib.term import URIRef

naf_path = 'test_naf_files/predicate_in_compound.naf'
corpus_name = 'HDD'
corpus_description = 'HistoricalDistanceData'

my_fn = load(folder='test_lexicon', verbose=2)

# load Dutch FrameNet in Lemon
path_dfn_in_lemon = 'stats/dfn_0.1.ttl'
dfn_in_lemon = Graph()
dfn_in_lemon.parse(path_dfn_in_lemon, format='ttl')

# update NAF file with correct LU URIs
parser = etree.XMLParser(remove_blank_text=True, strip_cdata=False)
doc = etree.parse(naf_path, parser)

# ext_ref_el for predicate annotation
ext_ref_els = doc.findall('srl/predicate/externalReferences/externalRef')
assert len(ext_ref_els
           ) == 1, f'expected 2 externalRef elements, found {len(ext_ref_els)}'
ext_ref_el = ext_ref_els[0]
Ejemplo n.º 2
0
                  verbose=2)


add_lu_to_info(your_fn=fn,
               language='en',
               premon=LexicalDataD2TAnnotationTool.premon,
               namespace='http://rdf.cltl.nl/efn/',
               major_version=1,
               minor_version=7,
               output_folder=out_dir,
               verbose=2)

add_lemma_to_pos_to_lu_urls(output_folder=out_dir,
                            language='en',
                            verbose=2)

fn_nl = load('res/DutchFrameNet-0.1')

add_lu_to_info(your_fn=fn_nl,
               language='nl',
               premon=LexicalDataD2TAnnotationTool.premon,
               namespace='http://rdf.cltl.nl/dfn/',
               major_version=0,
               minor_version=1,
               output_folder=out_dir,
               verbose=2)

add_lemma_to_pos_to_lu_urls(output_folder=out_dir,
                            language='nl',
                            verbose=2)
Ejemplo n.º 3
0
import sys
import pytest

sys.path.insert(0, '../..')
from FrameNetNLTK import load, remove_lu, get_luid

fn = load('../test/test_lexicon')


lu_id, reason = get_luid(my_fn=fn,
                         frame_label='People_by_origin',
                         lemma='Fransman',
                         pos='N')

remove_lu(your_lexicon_folder='test_lexicon',
          lu_id=lu_id,
          verbose=2)



lu_id, reason = get_luid(my_fn=fn,
                         frame_label='Appellations',
                         lemma='president',
                         pos='N')

try:
    remove_lu(your_lexicon_folder='test_lexicon',
              lu_id=lu_id,
              verbose=2)
except AssertionError:
    print(f'AssertionError was correctly raised for lu_id {lu_id} since it is part of an endocentric compound.')
Ejemplo n.º 4
0
import sys
sys.path.insert(0, '../..')
from FrameNetNLTK import load, get_frame_stats_df, get_lu_stats_df, get_lu_per_pos_stats_df, get_lexeme_stats_df, get_ambiguity_df
from FrameNetNLTK import get_stats_html

my_fn = load('test_lexicon')

frame_df = get_frame_stats_df(my_fn)

print(frame_df)

lu_stats_df = get_lu_stats_df(my_fn)

print(lu_stats_df)

lu_per_pos_stats_df = get_lu_per_pos_stats_df(my_fn)

print(lu_per_pos_stats_df)

lexeme_stats_df = get_lexeme_stats_df(my_fn)

print(lexeme_stats_df)

ambiguity_df = get_ambiguity_df(my_fn)

print(ambiguity_df)

get_stats_html(your_fn=my_fn, html_path='stats/descriptive_statistics.html')