def main(file_path): # # Instantiation and start all the actors # - No Args constructor to separate instantiation and initialization # storage_manager = DataStorageManager() word_freq_manager = WordFrequencyManager() word_indexer = WordsIndexer() word_index_controller = WordIndexController() # # Initialization and wiring (ref of an actor passed to another one) # send_message(storage_manager, ['init', file_path]) send_message(word_freq_manager, ['init', FREQUENCY_THRESHOLD]) send_message(word_indexer, ['init', PAGE_SIZE]) # Passing references to other actors is not a violation. BUT DIRECTLY CALLING METHODS OF THE ACTORS IS! send_message(word_index_controller, ['init', storage_manager, word_freq_manager, word_indexer]) # Start the execution send_message(word_index_controller, ['run']) # At this point we need to "block" main thread and wait for the all the actors to finish [ t.join() for t in [ storage_manager, word_freq_manager, word_indexer, word_index_controller ] ]
def _get_pages(self, words): self.words = words # self.count = len(words) # Tell word_indexer to get you back the pages corresponding to each word [ send_message(self.word_indexer, ['get_pages_for_word', self, word]) for word in words ] # Force and "EOF" here? or die ? send_message(self.word_indexer, ['eof', self])
def _get_words(self, recipient): # Filter _filtered_word_freqs = {} for word in self._word_freqs.keys(): if self._word_freqs[word] <= self._frequency_threshold: _filtered_word_freqs[word] = self._word_freqs[word] self._word_freqs = _filtered_word_freqs # Sort and send. TODO Really necessary? # Assume the medium will deliver words in order words = [ entry[0] for entry in sorted(self._word_freqs.items(), key=operator.itemgetter(0), reverse=False) ] send_message(recipient, ['words', words])
def _process_lines(self, lines): for line_number, line in enumerate(lines): for w in line.split(): send_message(self.word_freq_manager, ['increment_count', w]) send_message(self.word_indexer, ['index_word', w, line_number]) # Question: At this point we assume that word_freq_manager processed already all the words # but how one can be sure about that? send_message(self.word_freq_manager, ['get_words', self])
def _get_lines(self, recipient): """ Returns the list of lines in storage """ send_message(recipient, ['lines', self._lines])
def _run(self): send_message(self.storage_manager, ['lines', self])
def _done(self): for recipient in [ self.storage_manager, self.word_freq_manager, self.word_indexer, self ]: send_message(recipient, ['die'])
def _eof(self, recipient): send_message(recipient, ['eof'])
def _get_pages_for_word(self, recipient, word): if word in self._words_and_pages.keys(): send_message(recipient, ['pages_for_word', word, self._words_and_pages[word]])