import sys
sys.path.insert(
    0,
    '/disk/scratch/s1146856/project_codes/tools/sense_stuff/wiktionary-parser-xml/wiktionary_parser'
)
sys.path.insert(
    0,
    '/disk/scratch/s1146856/project_codes/tools/sense_stuff/wiktionary-parser-xml'
)
from wiktionary_parser.xml_parser import XMLPageParser
from wiktionary_parser.languages.fr.page import frPage
from wiktionary_parser.languages.fr.parseText import FrParseText

xml_file = open(
    '../../../../../datasets/sense_disambiguation_datasets/frwiktionary-20161101-pages-articles-multistream.xml'
)
xml_parser = XMLPageParser(xml_file, frPage)

french_words = set(['sauter'])

#for title, page in xml_parser.from_titles(german_words):
#    found_words.add(title)
for page in xml_parser.from_titles(french_words):
    #print page.text
    parseData = FrParseText(page.text)
    print 'Title', page.title
    #parseData.view_sections()
    #print parseData.synonyms
    print parseData.verb_translations
    break
Ejemplo n.º 2
0
# -*- coding: utf-8 -*-
"""
This example extracts a number of words from the wiktionary xml file.
"""

from wiktionary_parser.xml_parser import XMLPageParser
from wiktionary_parser.languages.de.page import dePage

xml_file = open('../../wiktionary_data/dewiktionary-20110504-pages-articles.xml')
xml_parser = XMLPageParser(xml_file, dePage)

german_words = set([u'Bank', u'Kiefer'])

found_words = set([])

for title, page in xml_parser.from_titles(german_words):
    found_words.add(title)
    page.parse()
    for word in page.words:
        print('')
        print(word.title)
        print('******************')
        if word.bedeutungen:
            print('--Bedeutungen---------------')
            print(word.bedeutungen)
        if word.beispiele:
            print('--Beispiele-----------------')
            print(word.beispiele)
        if word.gender:
            print('--Gender--------------------')
            print(word.gender)
Ejemplo n.º 3
0
# -*- coding: utf-8 -*-
"""
This example extracts a number of words from the simple.wiktionary xml file.
"""

from wiktionary_parser.xml_parser import XMLPageParser
from wiktionary_parser.languages.simple.page import simplePage

xml_file = open('../../wiktionary_data/simplewiktionary-20110514-pages-articles.xml')
xml_parser = XMLPageParser(xml_file, simplePage)

# The words we want to extract
wanted_words = set([u'fish'])

found_words = set([])

for title, page in xml_parser.from_titles(wanted_words):
    page.parse()
    # Print out a summary of the want
    for word in page.words:
        print word.summary()
    found_words.add(title)
    if wanted_words == found_words:
        break
    
# -*- coding: utf-8 -*-
"""
This example extracts a number of words from the simple.wiktionary xml file.
"""

from wiktionary_parser.xml_parser import XMLPageParser
from wiktionary_parser.languages.simple.page import simplePage

xml_file = open(
    '../../wiktionary_data/simplewiktionary-20110514-pages-articles.xml')
xml_parser = XMLPageParser(xml_file, simplePage)

# The words we want to extract
wanted_words = set([u'fish'])

found_words = set([])

for title, page in xml_parser.from_titles(wanted_words):
    page.parse()
    # Print out a summary of the want
    for word in page.words:
        print word.summary()
    found_words.add(title)
    if wanted_words == found_words:
        break
Ejemplo n.º 5
0
"""
This example extracts a number of words from the wiktionary xml file.
"""

from wiktionary_parser.xml_parser import XMLPageParser
from wiktionary_parser.languages.de.page import dePage

xml_file = open(
    '../../wiktionary_data/dewiktionary-20110504-pages-articles.xml')
xml_parser = XMLPageParser(xml_file, dePage)

german_words = set([u'Bank', u'Kiefer'])

found_words = set([])

for title, page in xml_parser.from_titles(german_words):
    found_words.add(title)
    page.parse()
    for word in page.words:
        print('')
        print(word.title)
        print('******************')
        if word.bedeutungen:
            print('--Bedeutungen---------------')
            print(word.bedeutungen)
        if word.beispiele:
            print('--Beispiele-----------------')
            print(word.beispiele)
        if word.gender:
            print('--Gender--------------------')
            print(word.gender)
# -*- coding: utf-8 -*-
"""
This example extracts a number of words from the wiktionary xml file.
"""

import sys
sys.path.insert(0, '/disk/scratch/s1146856/project_codes/tools/sense_stuff/wiktionary-parser-xml/wiktionary_parser')
sys.path.insert(0, '/disk/scratch/s1146856/project_codes/tools/sense_stuff/wiktionary-parser-xml')
from wiktionary_parser.xml_parser import XMLPageParser
from wiktionary_parser.languages.nl.page import nlPage
from wiktionary_parser.languages.nl.parseText import NlParseText

xml_file = open('../../../../../datasets/sense_disambiguation_datasets/nlwiktionary-20161120-pages-articles-multistream.xml')
xml_parser = XMLPageParser(xml_file, nlPage)

dutch_words = set(['springen'])


#for title, page in xml_parser.from_titles(german_words):
#    found_words.add(title)
for page in xml_parser.from_titles(dutch_words):
    print page.text
    parseData = NlParseText(page.text)
    print 'Title', page.title
    #parseData.view_sections()
    #print parseData.synonyms
    print parseData.verb_translations
    break