Ejemplo n.º 1
0
def main(file_path):
    #
    # Instantiation and start all the actors
    # - No Args constructor to separate instantiation and initialization
    #
    storage_manager = DataStorageManager()
    word_freq_manager = WordFrequencyManager()
    word_indexer = WordsIndexer()
    word_index_controller = WordIndexController()

    #
    # Initialization and wiring (ref of an actor passed to another one)
    #
    send_message(storage_manager, ['init', file_path])
    send_message(word_freq_manager, ['init', FREQUENCY_THRESHOLD])
    send_message(word_indexer, ['init', PAGE_SIZE])
    # Passing references to other actors is not a violation. BUT DIRECTLY CALLING METHODS OF THE ACTORS IS!
    send_message(word_index_controller,
                 ['init', storage_manager, word_freq_manager, word_indexer])

    # Start the execution
    send_message(word_index_controller, ['run'])

    # At this point we need to "block" main thread and wait for the all the actors to finish
    [
        t.join() for t in [
            storage_manager, word_freq_manager, word_indexer,
            word_index_controller
        ]
    ]
Ejemplo n.º 2
0
 def _get_pages(self, words):
     self.words = words
     # self.count = len(words)
     # Tell word_indexer to get you back the pages corresponding to each word
     [
         send_message(self.word_indexer, ['get_pages_for_word', self, word])
         for word in words
     ]
     # Force and "EOF" here? or die ?
     send_message(self.word_indexer, ['eof', self])
Ejemplo n.º 3
0
    def _get_words(self, recipient):

        # Filter
        _filtered_word_freqs = {}
        for word in self._word_freqs.keys():
            if self._word_freqs[word] <= self._frequency_threshold:
                _filtered_word_freqs[word] = self._word_freqs[word]
        self._word_freqs = _filtered_word_freqs

        # Sort and send. TODO Really necessary?
        # Assume the medium will deliver words in order
        words = [
            entry[0] for entry in sorted(self._word_freqs.items(),
                                         key=operator.itemgetter(0),
                                         reverse=False)
        ]
        send_message(recipient, ['words', words])
Ejemplo n.º 4
0
    def _process_lines(self, lines):
        for line_number, line in enumerate(lines):
            for w in line.split():
                send_message(self.word_freq_manager, ['increment_count', w])
                send_message(self.word_indexer, ['index_word', w, line_number])

        # Question: At this point we assume that word_freq_manager processed already all the words
        #   but how one can be sure about that?
        send_message(self.word_freq_manager, ['get_words', self])
Ejemplo n.º 5
0
 def _get_lines(self, recipient):
     """ Returns the list of lines in storage  """
     send_message(recipient, ['lines', self._lines])
Ejemplo n.º 6
0
 def _run(self):
     send_message(self.storage_manager, ['lines', self])
Ejemplo n.º 7
0
 def _done(self):
     for recipient in [
             self.storage_manager, self.word_freq_manager,
             self.word_indexer, self
     ]:
         send_message(recipient, ['die'])
Ejemplo n.º 8
0
 def _eof(self, recipient):
     send_message(recipient, ['eof'])
Ejemplo n.º 9
0
 def _get_pages_for_word(self, recipient, word):
     if word in self._words_and_pages.keys():
         send_message(recipient,
                      ['pages_for_word', word, self._words_and_pages[word]])