예제 #1
0
    def tag(self):
        iqueue = parallel.manager.Queue(self.settings.QUEUE_SIZE)

        process = self._start_streaming(iqueue)
        count = parallel.run(do, aggregate, iqueue, self.num_processes)
        process.join()

        return count
예제 #2
0
    def test_run_without_return(self):
        iqueue = parallel.manager.Queue()

        process = multiprocessing.Process(
                target=stream, args=(list(range(self.count)), iqueue, 2)
            )
        process.start()

        expected = [(i + 10) for i in range(self.count)]
        aggregate = aggregate_without_return
        actual = parallel.run(do, aggregate, iqueue, 2)
        self.assertIsNone(actual)

        process.join()
예제 #3
0
파일: bug.py 프로젝트: andymeneely/sira-nlp
    def load(self):
        """
        Grabs all of the bugs from within the specified range of years,
        parses through them, cleans them up, then saves them. Returns the
        total number of bugs loaded.
        """
        count = 0

        iqueue = parallel.manager.Queue(self.settings.QUEUE_SIZE)
        process = self._start_streaming(iqueue)
        count = parallel.run(do, aggregate, iqueue, self.num_processes)
        process.join()

        return count
예제 #4
0
def compute(review_ids, idf, num_procs, key='lemma'):
    if idf is None or type(idf) is not dict:
        raise ValueError('Argument IDF must be a dictionary!')

    global IDF, KEY
    IDF = idf
    KEY = key

    iqueue = parallel.manager.Queue()
    proc = multiprocessing.Process(target=stream,
                                   args=(review_ids, iqueue, num_procs))
    proc.start()
    tfidfs = parallel.run(do, aggregate, iqueue, num_procs)
    proc.join()

    return tfidfs
예제 #5
0
    def load(self):
        """
        Grabs all of the reviews created within the specified range of years,
        parses them, cleans them up, and saves them. Returns the total number
        of loaded reviews.
        """
        count = 0

        iqueue = parallel.manager.Queue(self.settings.QUEUE_SIZE)
        process = self._start_streaming(iqueue)
        count = parallel.run(do, aggregate, iqueue, self.num_processes)
        process.join()

        self._cluster()

        return count