Ejemplo n.º 1
0
class AnalysisProcess:
    """ Provides methods to perform automatic morphological analysis of a set of inputs.
    """

    def __init__(self, dictionary, input_manager, output_manager):
        """ Initializes a new instance of AnalysisManager.
        :param dictionary: A morphological dictionary.
        :param input_manager: An instance of mosyn.util.io.Input as input method.
        :param output_manager: An instance of mosyn.util.io.Output as output method.
        """
        self.on_process = EventHook()
        self.input_manager = input_manager
        self.output_manager = output_manager
        self.analysis = MorphologicalAnalysis(dictionary)

        self.on_start = EventHook()
        self.on_completed = EventHook()

    def start_analysis(self):
        """ Starts the analysis of the input instance with the defined dictionary.
        """
        self.on_start.fire(self)

        with self.input_manager, self.output_manager:
            for line in self.input_manager.read():
                for result in self.analysis.analyze_text(line):
                    self.output_manager.write(result)

        self.on_completed.fire(self)
Ejemplo n.º 2
0
class MorphologicalAnalysis:
    """ Provides methods to perform a morphological analysis to a text entry.
    """

    def __init__(self, dictionary):
        """ Initializes a new instance of MorphologicalAnalysis.
        :param dictionary: A morphological dictionary.
        """
        self.dictionary = dictionary
        self.on_process = EventHook()
        self._cache_dict = {}  # Output Dictionary Cache
        self._default_eagles = u'NP00000'

        self.on_start = EventHook()
        self.on_completed = EventHook()

    def analyze_text(self, value):
        """ Analyzes a piece of text and returns their morphological information.
        :param value: The text to analyse.
        :return: An iterable collection of morphological representation of the text.
        """
        self.on_start.fire(self)

        tokens = nltk.word_tokenize(value)

        for token in tokens:  # Iterate over all tokens in the line
            data = self._cache_dict.get(token)

            if not data:
                data = self.dictionary.get_word(token.lower(), self._default_eagles)
                self._cache_dict[token] = data

            yield [token, data]

        self.on_completed.fire(self)
Ejemplo n.º 3
0
    def __init__(self, dictionary):
        """ Initializes a new instance of MorphologicalAnalysis.
        :param dictionary: A morphological dictionary.
        """
        self.dictionary = dictionary
        self.on_process = EventHook()
        self._cache_dict = {}  # Output Dictionary Cache
        self._default_eagles = u'NP00000'

        self.on_start = EventHook()
        self.on_completed = EventHook()
Ejemplo n.º 4
0
    def __init__(self, dictionary, input_manager, output_manager):
        """ Initializes a new instance of AnalysisManager.
        :param dictionary: A morphological dictionary.
        :param input_manager: An instance of mosyn.util.io.Input as input method.
        :param output_manager: An instance of mosyn.util.io.Output as output method.
        """
        self.on_process = EventHook()
        self.input_manager = input_manager
        self.output_manager = output_manager
        self.analysis = MorphologicalAnalysis(dictionary)

        self.on_start = EventHook()
        self.on_completed = EventHook()
Ejemplo n.º 5
0
    def __init__(self, filename):
        """ Initializes a new instance of MorphologicalDictionary.
        :param filename: The dictionary filename.
        """
        self.filename = filename
        self.dictionary = {}

        self.on_loaded = EventHook()
Ejemplo n.º 6
0
class MorphologicalDictionary:
    """ Represents a morphological dictionary.
    """

    def __init__(self, filename):
        """ Initializes a new instance of MorphologicalDictionary.
        :param filename: The dictionary filename.
        """
        self.filename = filename
        self.dictionary = {}

        self.on_loaded = EventHook()

    def load(self):
        """ Loads the dictionary filename.
        :return: An instance of mosyn.MorphologicalDictionary.
        """
        with codecs.open(self.filename, mode='r', encoding='utf-8') as f:
            ix = 0
            cache = []
            for line in f:
                if ix == 0:
                    ix += 1
                    continue

                temp = self._parse_line(line)

                if not cache or temp and temp[0] != cache[0]:
                    if cache:
                        self.dictionary[cache[0]] = cache[1]
                        cache = []
                    cache = [temp[0], [temp[1:3]]]
                elif temp and temp[0] == cache[0]:
                    cache[1].append(temp[1:3])

            if cache:
                self.dictionary[cache[0]] = cache[1]

        self.on_loaded.fire(self)

        return self.dictionary

    def get_word(self, value, default=u'NP00000'):
        """ Gets the morphological information of the word.
        :param value: The word value to be find.
        :param default: The eagles default classification if the word is not found.
        :return: The morphological representation of the word.
        """
        return self.dictionary.get(value, [(value.lower(), default)])

    def _parse_line(self, text):
        """ Converts a text to an array of elements.
        :param text: The value to be converted.
        :return: The array representation of the text.
        :raise Exception: if the array does not agree with the dictionary format.
        """
        initial = True
        if initial and text.startswith(u','):
            initial = False
            temp = text.rstrip().split(',')
            items = [',', ',', temp[len(temp)-1]]
        else:
            items = text.rstrip().split(u',')

        if len(items) >= 3:
            return items[0], items[1], items[2]
        else:
            raise Exception('Error on dictionary format')