class AnalysisProcess: """ Provides methods to perform automatic morphological analysis of a set of inputs. """ def __init__(self, dictionary, input_manager, output_manager): """ Initializes a new instance of AnalysisManager. :param dictionary: A morphological dictionary. :param input_manager: An instance of mosyn.util.io.Input as input method. :param output_manager: An instance of mosyn.util.io.Output as output method. """ self.on_process = EventHook() self.input_manager = input_manager self.output_manager = output_manager self.analysis = MorphologicalAnalysis(dictionary) self.on_start = EventHook() self.on_completed = EventHook() def start_analysis(self): """ Starts the analysis of the input instance with the defined dictionary. """ self.on_start.fire(self) with self.input_manager, self.output_manager: for line in self.input_manager.read(): for result in self.analysis.analyze_text(line): self.output_manager.write(result) self.on_completed.fire(self)
class MorphologicalAnalysis: """ Provides methods to perform a morphological analysis to a text entry. """ def __init__(self, dictionary): """ Initializes a new instance of MorphologicalAnalysis. :param dictionary: A morphological dictionary. """ self.dictionary = dictionary self.on_process = EventHook() self._cache_dict = {} # Output Dictionary Cache self._default_eagles = u'NP00000' self.on_start = EventHook() self.on_completed = EventHook() def analyze_text(self, value): """ Analyzes a piece of text and returns their morphological information. :param value: The text to analyse. :return: An iterable collection of morphological representation of the text. """ self.on_start.fire(self) tokens = nltk.word_tokenize(value) for token in tokens: # Iterate over all tokens in the line data = self._cache_dict.get(token) if not data: data = self.dictionary.get_word(token.lower(), self._default_eagles) self._cache_dict[token] = data yield [token, data] self.on_completed.fire(self)
def __init__(self, dictionary): """ Initializes a new instance of MorphologicalAnalysis. :param dictionary: A morphological dictionary. """ self.dictionary = dictionary self.on_process = EventHook() self._cache_dict = {} # Output Dictionary Cache self._default_eagles = u'NP00000' self.on_start = EventHook() self.on_completed = EventHook()
def __init__(self, dictionary, input_manager, output_manager): """ Initializes a new instance of AnalysisManager. :param dictionary: A morphological dictionary. :param input_manager: An instance of mosyn.util.io.Input as input method. :param output_manager: An instance of mosyn.util.io.Output as output method. """ self.on_process = EventHook() self.input_manager = input_manager self.output_manager = output_manager self.analysis = MorphologicalAnalysis(dictionary) self.on_start = EventHook() self.on_completed = EventHook()
def __init__(self, filename): """ Initializes a new instance of MorphologicalDictionary. :param filename: The dictionary filename. """ self.filename = filename self.dictionary = {} self.on_loaded = EventHook()
class MorphologicalDictionary: """ Represents a morphological dictionary. """ def __init__(self, filename): """ Initializes a new instance of MorphologicalDictionary. :param filename: The dictionary filename. """ self.filename = filename self.dictionary = {} self.on_loaded = EventHook() def load(self): """ Loads the dictionary filename. :return: An instance of mosyn.MorphologicalDictionary. """ with codecs.open(self.filename, mode='r', encoding='utf-8') as f: ix = 0 cache = [] for line in f: if ix == 0: ix += 1 continue temp = self._parse_line(line) if not cache or temp and temp[0] != cache[0]: if cache: self.dictionary[cache[0]] = cache[1] cache = [] cache = [temp[0], [temp[1:3]]] elif temp and temp[0] == cache[0]: cache[1].append(temp[1:3]) if cache: self.dictionary[cache[0]] = cache[1] self.on_loaded.fire(self) return self.dictionary def get_word(self, value, default=u'NP00000'): """ Gets the morphological information of the word. :param value: The word value to be find. :param default: The eagles default classification if the word is not found. :return: The morphological representation of the word. """ return self.dictionary.get(value, [(value.lower(), default)]) def _parse_line(self, text): """ Converts a text to an array of elements. :param text: The value to be converted. :return: The array representation of the text. :raise Exception: if the array does not agree with the dictionary format. """ initial = True if initial and text.startswith(u','): initial = False temp = text.rstrip().split(',') items = [',', ',', temp[len(temp)-1]] else: items = text.rstrip().split(u',') if len(items) >= 3: return items[0], items[1], items[2] else: raise Exception('Error on dictionary format')