Python Morfeusz.Morfeusz Examples

Programming Language: Python

Namespace/Package Name: morfeusz2

Class/Type: Morfeusz

Method/Function: Morfeusz

Examples at hotexamples.com: 7

Python Morfeusz.Morfeusz - 7 examples found. These are the top rated real world Python examples of morfeusz2.Morfeusz.Morfeusz extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Morfeusz(7)

analyse(7)

dict_copyright(1)

dict_id(1)

generate(1)

Example #1

Show file

    def __init__(self):
        super(PolishLemmatizer, self).__init__()
        try:
            from morfeusz2 import Morfeusz
        except ImportError:
            raise ImportError(
                'The Polish lemmatizer requires the morfeusz2-python library')

        if PolishLemmatizer._morph is None:
            PolishLemmatizer._morph = Morfeusz(dict_name='polimorf')

Example #2

Show file

def process_request(params):
    option_parser = MorfeuszOptionParser(params)
    option_parser.parse_bool('expandDag', 'expand_dag')
    option_parser.parse_bool('expandTags', 'expand_tags')
    option_parser.parse_bool('expandDot', 'expand_dot')
    option_parser.parse_bool('expandUnderscore', 'expand_underscore')
    option_parser.parse_string('agglutinationRules', 'aggl',
                               AGGLUTINATION_RULES)
    option_parser.parse_string('pastTenseSegmentation', 'praet',
                               PAST_TENSE_SEGMENTATION)
    option_parser.parse_enum('tokenNumbering', 'separate_numbering',
                             TokenNumbering, TokenNumbering.separate)
    option_parser.parse_enum('caseHandling', 'case_handling', CaseHandling)
    option_parser.parse_enum('whitespaceHandling', 'whitespace',
                             WhitespaceHandling)
    option_parser.parse_actions('action')

    results = []
    response = {'results': results}

    if option_parser.validate(response):
        option_parser.set_dictionary_path('MORFEUSZ_DICT_PATH')
        morfeusz = Morfeusz(**option_parser.get_opts())

        if option_parser.action == 'analyze':
            for interp_list in morfeusz.analyse(option_parser.text):
                if isinstance(interp_list, list):
                    subitem = []
                    results.append(subitem)

                    for item in interp_list:
                        subitem.append(tag_items(item))
                else:
                    results.append(tag_items(interp_list))
        elif option_parser.action == 'generate':
            for title in option_parser.titles:
                subitem = []
                results.append(subitem)

                for interp_list in morfeusz.generate(title):
                    subitem.append(tag_items(interp_list))

        response['version'] = morfeusz2.__version__
        response['dictionaryId'] = morfeusz.dict_id()
        response['copyright'] = morfeusz.dict_copyright()

    return response

Example #3

Show file

from sys import argv, exit
#other imports

corpus_filename = 'pl.txt'
try:
    filename = argv[1]
    if filename in listdir('.'):
        corpus_filename = filename
    else:
        print('File %s not found in the current directory' % filename)
        exit(-1)
except IndexError:
    pass

exclude = string.digits  #unicode(string.digits) #
morph = Morfeusz()


def lemm(line):
    sentence = re.split(
        '\d+|\W+|_', line.lower(), flags=re.UNICODE
    )  #re.split('\W+', line.lower(), flags=re.UNICODE) #line.split()
    norm_sentence = []
    for i in xrange(0, len(sentence)):
        if sentence[i] != u'':
            #print 'sen: ', sentence[i], 'len: ', len(sentence[i])
            w_desc = morph.analyse(sentence[i])
            if len(w_desc) > 0:
                norm_sentence.append(w_desc[0][2][1].split(':')[0])
    return norm_sentence

Example #4

Show file

#! /usr/bin/python
# *-* coding: utf-8 *-*

from morfeusz2 import Morfeusz
from concraft_pl2 import Concraft, Server

try:
  morfeusz = Morfeusz(expand_tags=True)
  server = Server(model_path="/home/kuba/work/ipipan/concraft/pre-trained/Sep-18/model-04-09-2018.gz", port=3001)
  concraft = Concraft(port=3001)
  
  dag = morfeusz.analyse(u'W Szczebrzeszynie chrząszcz brzmi w trzcinie.')
  res = concraft.disamb(dag)
  print(res)
  
  dag = morfeusz.analyse(u'W Szczebrzeszynie chrząszcz brzmi w trzcinie.')
  dag_str = concraft.dag_to_str(dag)
  dag_disamb_str = concraft.disamb_str(dag_str)
  print(dag_disamb_str)
finally:
  server.terminate()

Example #5

Show file

import pathlib, random, re, sys
from typing import Callable, Optional

from morfeusz2 import Morfeusz
from wordnet import query

morfeusz = Morfeusz(analyse=False)

DATASETS = ["new"]
DICT_LINES = {}
DICT_FUNCTIONS = {}

THESAURUS = {}

# Words from the thesaurus containing these tags will be ignored:
BLACKLISTED_TAGS = [
	"(bardzo potocznie)",
	"(potocznie)",
	"(częściej, ale wg niektórych niepoprawnie)",
	"(eufemistycznie)",  # :(
	#"(nieco potocznie)",  # Eh, it's fine
	"(obraźliwe)",
	"(obraźliwie)",
	#"(pieszczotliwie)",
	"(pogardliwie)",
	"(potoczne)",
	"(potocznie)",
	"(przestarzale)",
	"(ptoocznie)",
	"(regionalnie)",  # Contains some inappropriate words
	"(rzadko, wg niektórych niepoprawnie)",

Example #6

Show file

 def __init__(self):
     """Constructor"""
     self.morf = Morfeusz()

Example #7

Show file

    r'<([a-z]*-[a-z]*)(\s+[a-z]*="\w*((\s+\w*)+)?")?>(.+?)<\/[a-z]*-[a-z]*>',
    letter_contents)
# print('one word tags')
# for tag in tags_word:
#     print(tag)
#     print('\n')
# print('two word tags')
# for tag in tags_words:
#     print(tag)
#     print('\n')


def remove_dashes(text):
    tmp_str = ''
    for letter in text:
        if letter != '-':
            tmp_str = tmp_str + letter
    return tmp_str


letter1_no_tags = remove_tags(letters[0].contents)
letter1_nt_str = ' '.join(letter1_no_tags)
# letter_ntnd_str = remove_dashes(letter1_nt_str.decode('utf-8'))
# print(letter_ntnd_str)

morf = Morfeusz()
# print(morf)
print(letter1_nt_str.decode('utf8'))
letter1_analysed = morf.analyse(letter1_nt_str)
print(letter1_analysed)