Beispiel #1
0
def demo_flat():
    from nltk.etree.ElementTree import ElementTree
    import sys

    tree = ElementTree(
        toolbox.xml('iu_mien_samp.db', key='lx', encoding='utf8'))
    tree.write(sys.stdout)
Beispiel #2
0
def demo():
    from nltk.etree.ElementTree import ElementTree

    settings = ToolboxSettings()
    settings.open("demos/MDF_AltH.typ")
    tree = settings.parse(unwrap=False, encoding="gbk")
    print tree.find("expset/expMDF/rtfPageSetup/paperSize").text
    settings_tree = ElementTree(tree)
    settings_tree.write("test.xml")
    print to_settings_string(settings_tree).encode("gbk")
Beispiel #3
0
def demo():
    from nltk.etree.ElementTree import ElementTree

    settings = ToolboxSettings()
    settings.open('demos/MDF_AltH.typ')
    tree = settings.parse(unwrap=False, encoding='gbk')
    print tree.find('expset/expMDF/rtfPageSetup/paperSize').text
    settings_tree = ElementTree(tree)
    settings_tree.write('test.xml')
    print to_settings_string(settings_tree).encode('gbk')
Beispiel #4
0
# -*- coding: utf-8 -*-

# Natural Language Toolkit: Toolbox Data demonstration
#
# Copyright (C) 2001-2006 NLTK Project
# Author: Greg Aumann <*****@*****.**>
# URL: <http://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
demonstration of grammar parsing
"""

from nltk.etree.ElementTree import ElementTree
from nltk_contrib import toolbox
from nltk.corpus import find_corpus_file
import os.path, sys

grammar = r"""
      lexfunc: {<lf>(<lv><ln|le>*)*}
      example: {<rf|xv><xn|xe>*}
      sense:   {<sn><ps><pn|gv|dv|gn|gp|dn|rn|ge|de|re>*<example>*<lexfunc>*}
      record:   {<lx><hm><sense>+<dt>}
    """

db = toolbox.ToolboxData()
db.open(find_corpus_file('toolbox', 'iu_mien_samp.db'))
lexicon = db.chunk_parse(grammar, encoding='utf8')
toolbox.data.indent(lexicon)
tree = ElementTree(lexicon)
tree.write(sys.stdout, encoding='utf8')
Beispiel #5
0
# Natural Language Toolkit: Toolbox Data demonstration
#
# Copyright (C) 2001-2006 NLTK Project
# Author: Greg Aumann <*****@*****.**>
# URL: <http://www.nltk.org/>
# For license information, see LICENSE.TXT

"""
corresponds to 
12.3.1   Accessing Toolbox Data
in http://nltk.sourceforge.net/lite/doc/en/data.html
"""

from nltk.corpus import toolbox

lexicon = toolbox.xml('rotokas.dic')

sum_size = num_entries = 0
for entry in lexicon.findall('record'):
    num_entries += 1
    sum_size += len(entry)
print sum_size/num_entries


from nltk.etree.ElementTree import ElementTree
import sys
fourth_entry = lexicon.findall('record')[3]
tree = ElementTree(fourth_entry)
tree.write(sys.stdout)
Beispiel #6
0
# Natural Language Toolkit: Toolbox Data demonstration
#
# Copyright (C) 2001-2006 NLTK Project
# Author: Greg Aumann <*****@*****.**>
# URL: <http://www.nltk.org/>
# For license information, see LICENSE.TXT

"""
demonstration of grammar parsing
"""

from nltk.etree.ElementTree import ElementTree
from nltk_contrib import toolbox
from nltk.corpus import find_corpus_file
import os.path, sys

grammar = r"""
      lexfunc: {<lf>(<lv><ln|le>*)*}
      example: {<rf|xv><xn|xe>*}
      sense:   {<sn><ps><pn|gv|dv|gn|gp|dn|rn|ge|de|re>*<example>*<lexfunc>*}
      record:   {<lx><hm><sense>+<dt>}
    """

db = toolbox.ToolboxData()
db.open(find_corpus_file("toolbox", "iu_mien_samp.db"))
lexicon = db.chunk_parse(grammar, encoding="utf8")
toolbox.data.indent(lexicon)
tree = ElementTree(lexicon)
tree.write(sys.stdout, encoding="utf8")
Beispiel #7
0
# -*- coding: utf-8 -*-

# Natural Language Toolkit: Toolbox Data demonstration
#
# Copyright (C) 2001-2006 NLTK Project
# Author: Greg Aumann <*****@*****.**>
# URL: <http://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
corresponds to 
12.3.1   Accessing Toolbox Data
in http://nltk.sourceforge.net/lite/doc/en/data.html
"""

from nltk.corpus import toolbox

lexicon = toolbox.xml('rotokas.dic')

sum_size = num_entries = 0
for entry in lexicon.findall('record'):
    num_entries += 1
    sum_size += len(entry)
print sum_size / num_entries

from nltk.etree.ElementTree import ElementTree
import sys
fourth_entry = lexicon.findall('record')[3]
tree = ElementTree(fourth_entry)
tree.write(sys.stdout)
Beispiel #8
0
    'entry': (('lx', ), ('hm', 'sense', 'dt')),
    'sense': (('sn', 'ps'), ('pn', 'gv', 'dv', 'gn', 'gp', 'dn', 'rn', 'ge',
                             'de', 're', 'example', 'lexfunc')),
    'example': ((
        'rf',
        'xv',
    ), ('xn', 'xe')),
    'lexfunc': (('lf', ), ('lexvalue', )),
    'lexvalue': (('lv', ), ('ln', 'le')),
}

db = toolbox.ToolboxData()
db.open(find_corpus_file('toolbox', 'iu_mien_samp.db'))
lexicon = db.grammar_parse('toolbox', grammar, encoding='utf8')
tree = ElementTree(lexicon)
tree.write('iu_mien_samp.xml', encoding='utf8')
num_lexemes = 0
num_senses = 0
num_examples = 0
for lexeme in lexicon.findall('entry'):
    num_lexemes += 1
    for sense in lexeme.findall('sense'):
        num_senses += 1
        for example in sense.findall('example'):
            num_examples += 1
print 'num. lexemes  =', num_lexemes
print 'num. senses   =', num_senses
print 'num. examples =', num_examples

#another approach
print 'num. examples =', len(lexicon.findall('.//example'))
Beispiel #9
0
def demo_flat():
    from nltk.etree.ElementTree import ElementTree    
    import sys

    tree = ElementTree(toolbox.xml('iu_mien_samp.db', key='lx', encoding='utf8'))
    tree.write(sys.stdout)
Beispiel #10
0
        'toolbox':      (('_sh',), ('_DateStampHasFourDigitYear', 'entry')),
        'entry':          (('lx',), ('hm', 'sense', 'dt')),
        'sense':          (('sn', 'ps'), ('pn', 'gv', 'dv',
                                   'gn', 'gp', 'dn', 'rn',
                                   'ge', 'de', 're',
                                   'example', 'lexfunc')),
        'example':      (('rf', 'xv',), ('xn', 'xe')),
        'lexfunc':      (('lf',), ('lexvalue',)),
        'lexvalue':    (('lv',), ('ln', 'le')),
}

db = toolbox.ToolboxData()
db.open(find_corpus_file('toolbox', 'iu_mien_samp.db'))
lexicon = db.grammar_parse('toolbox', grammar, encoding='utf8')
tree = ElementTree(lexicon)
tree.write('iu_mien_samp.xml', encoding='utf8')
num_lexemes = 0
num_senses = 0
num_examples = 0
for lexeme in lexicon.findall('entry'):
    num_lexemes += 1
    for sense in lexeme.findall('sense'):
        num_senses += 1
        for example in sense.findall('example'):
            num_examples += 1
print 'num. lexemes  =', num_lexemes
print 'num. senses   =', num_senses
print 'num. examples =', num_examples

#another approach
print 'num. examples =', len(lexicon.findall('.//example'))