def __init__(self):
     self.__entities = []
     self.__fileCache = FileCache()
     self.__onlybase = {}
class WordStemmer(object):
    u"""Obiekt dostarczajacy mechanizmu stemmingu slow"""
    def __init__(self):
        self.__entities = []
        self.__fileCache = FileCache()
        self.__onlybase = {}

    def stemm(self, word):
        u"""Dokonuje stemmingu na slowie podanym w metodzie.
        Zwraca liste entity."""
        self.__entities = []
        if (word[len(word) - 1] == 'x'):
            utility.add_to_dict(self.__onlybase, word)
            return None

        similar_forms = self.__findSimilarForms(word)
        if similar_forms != None:
            self.__createEntities(word, similar_forms)
        else:
            utility.add_to_dict(self.__onlybase, word)
            return None

        return self.__entities

    def getOnlyBase(self):
        return self.__onlybase

    def __findSimilarForms(self, word):
        max = -1
        similar_forms = []

        for line in self.__fileCache.grabFile("formy.txt"):
            form = line.decode(default_encoding())
            sameCharsCount = self.__getCountCommonLettersFromEnd(word, form)
            if sameCharsCount > max and sameCharsCount > 1:
                max = sameCharsCount
                similar_forms = []
            if sameCharsCount == max:
                if form not in similar_forms:
                    similar_forms.append(form)
        if max > 1:
            return similar_forms
        else:
            return None

    def __getCountCommonLettersFromEnd(self, word, form):
        count = 0
        i = len(word) - 1
        j = len(form) - 1
        while (i >= 0 and j >= 0):
            if word[i] == form[j]:
                count += 1
                i -= 1
                j -= 1
            else:
                break
        return count

    def __createEntities(self, word, similar_forms):
        entities = []

        for similar_form in similar_forms:
            similar_form_entity = self.__loadEntityFromClp(similar_form)
            isDuplicated = False
            for entity in entities:
                if similar_form_entity.label == entity.label:
                    isDuplicated = True
                    break
            if isDuplicated:
                continue

            entities.append(similar_form_entity)
            self.__entities.append(self.__generateNewWordEntity(word, similar_form, similar_form_entity))

    def __generateNewWordEntity(self, word, similar_form, similar_form_entity):
        word_entity = Entity()
        postfix = similar_form[len(similar_form_entity.prefix):]
        if postfix == "":
            word_entity.prefix = word
        else:
            word_entity.prefix = word[:len(word) - len(postfix)]

        append_base_form = similar_form_entity.base[len(similar_form_entity.prefix):]
        word_entity.base = word_entity.prefix + append_base_form

        for form in similar_form_entity.forms:
            append_form = form[len(similar_form_entity.prefix):]
            word_entity.forms.append(word_entity.prefix + append_form)

        word_entity.label = similar_form_entity.label
        word_entity.probability = 100.0
        word_entity.original = similar_form_entity.base

        return word_entity

    def __loadEntityFromClp(self, similar_form):
        entity = Entity()
        for id in plp.plp_rec(similar_form.encode(default_encoding())):
            entity.base = plp.plp_bform(id).decode(default_encoding())
            entity.label = plp.plp_label(id).decode(default_encoding())
            for form in plp.plp_forms(id):
                entity.forms.append(form.decode(default_encoding()))
            break
        entity.prefix = self.__getPrefix(entity.base, entity.forms)

        return entity

    def __getPrefix(self, base, forms):
        prefix = base
        lookingForPrefix = True
        while (lookingForPrefix):
            lookingForPrefix = False
            for form in forms:
                if form.startswith(prefix):
                    continue
                lookingForPrefix = True
                break
            if lookingForPrefix:
                prefix = prefix[:len(prefix) - 1]
        return prefix