Python Manager.Value Examples

Programming Language: Python

Namespace/Package Name: multiprocess

Class/Type: Manager

Method/Function: Value

Examples at hotexamples.com: 2

Python Manager.Value - 2 examples found. These are the top rated real world Python examples of multiprocess.Manager.Value extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Manager(30)

dict(11)

Lock(10)

Queue(8)

list(7)

Namespace(3)

Value(2)

acquire(1)

has_key(1)

keys(1)

Example #1

Show file

File: batch.py Project: kathefter/yass

    def _multi_channel_apply_disk_parallel(self, function, cleanup_function,
                                           output_path, from_time, to_time,
                                           channels, cast_dtype,
                                           pass_batch_info, pass_batch_results,
                                           processes, **kwargs):

        self.logger.debug('Starting parallel operation...')

        if pass_batch_results:
            raise NotImplementedError("pass_batch_results is not "
                                      "implemented on 'disk' mode")

        # need to convert to a list, oherwise cannot be pickled
        data = list(
            self.multi_channel(from_time, to_time, channels,
                               return_data=False))
        n_batches = self.indexer.n_batches(from_time, to_time, channels)

        self.logger.info('Data will be splitted in %s batches', n_batches)

        output_path = Path(output_path)

        # create local variables to avoid pickling problems
        _path_to_recordings = copy(self.path_to_recordings)
        _dtype = copy(self.dtype)
        _n_channels = copy(self.n_channels)
        _data_order = copy(self.data_order)
        _loader = copy(self.loader)
        _buffer_size = copy(self.buffer_size)

        reader = partial(RecordingsReader,
                         path_to_recordings=_path_to_recordings,
                         dtype=_dtype,
                         n_channels=_n_channels,
                         data_order=_data_order,
                         loader=_loader,
                         return_data_index=True)

        m = Manager()
        mapping = m.dict()
        next_to_write = m.Value('i', 0)

        def parallel_runner(element):
            i, _ = element

            res = util.batch_runner(element,
                                    function,
                                    reader,
                                    pass_batch_info,
                                    cast_dtype,
                                    kwargs,
                                    cleanup_function,
                                    _buffer_size,
                                    save_chunks=False,
                                    output_path=output_path)

            if i == 0:
                mapping['dtype'] = str(res.dtype)

            while True:
                if next_to_write.value == i:
                    with open(str(output_path), 'wb' if i == 0 else 'ab') as f:
                        res.tofile(f)

                    next_to_write.value += 1
                    break

        # run jobs
        self.logger.debug('Creating processes pool...')

        p = Pool(processes)
        res = p.map_async(parallel_runner, enumerate(data))

        finished = 0

        if self.show_progress_bar:
            pbar = tqdm(total=n_batches)

        if self.show_progress_bar:

            while True:
                if next_to_write.value > finished:
                    update = next_to_write.value - finished
                    pbar.update(update)
                    finished = next_to_write.value

                if next_to_write.value == n_batches:
                    break

            pbar.close()
        else:
            res.get()

        # save metadata
        params = util.make_metadata(channels, self.n_channels,
                                    mapping['dtype'], output_path)

        return output_path, params

Example #2

Show file

    def writeEventsToCsv(self, urls, processedUrlsFName, batchSize=20):
        numUrls = len(urls)
        origNumUrls = numUrls
        urlsWithEvents = 0
        totalEvents = 0
        processedListings = 0
        numTimeouts = 0

        try:
            with open(processedUrlsFName, 'r') as pus:
                pUrls = list(set(pus.read().split('\r\n')))
            logging.info(
                'Already processed {0} of {1} urls. Picking up where we'
                ' left off.'.format(len(pUrls), numUrls))
            urls = [url for url in urls if url not in pUrls]
            numUrls = len(urls)
        except IOError:
            pass

        with open(processedUrlsFName, 'a+') as pus:
            pUrls_writer = csv.writer(pus)
            with open(self.eventFile, 'a+') as f:
                writer = csv.writer(f)
                sttm = time.time()

                if self.eventMode == 'parallel':
                    batches = [
                        urls[x:x + batchSize]
                        for x in xrange(0, len(urls), batchSize)]
                    for b, batch in enumerate(batches):
                        logging.info('Starting batch {0} of  {1}'.format(
                            b + 1, len(batches)))
                        manager = Manager()
                        batchQueue = Queue()
                        batchTimeoutList = manager.list()
                        batchProcessedUrls = manager.list()
                        batchEventQueue = manager.Queue()
                        batchEventsSaved = manager.Value('i', 0)
                        jobs = []
                        for i, url in enumerate(batch):
                            batchQueue.put(
                                [self.eventMode, url, batchEventQueue,
                                 batchProcessedUrls, batchTimeoutList])
                        for i in range(len(batch)):
                            proc = Process(
                                target=self.eventWorker, args=(batchQueue,))
                            proc.start()
                            jobs.append(proc)
                        writeProc = Process(
                            target=self.writeToCsvWorker, args=(
                                batchEventQueue, batchEventsSaved))
                        time.sleep(2)
                        writeProc.start()
                        for j, job in enumerate(jobs):
                            # 5 seconds per url for each process before timeout
                            job.join(max(60, 5 * len(batch)))
                            if job.is_alive():
                                job.terminate()
                                logging.info(
                                    'Subprocess {0} of {1} timed out'.format(
                                        j + 1, min(24, len(batch))))
                        writeProc.join(max(60, 8 * len(batch)))
                        totalEvents += batchEventsSaved.value
                        processedListings += len(batch)
                        for url in set(list(batchProcessedUrls)):
                            pUrls_writer.writerow([url])
                        urlsWithEvents += len(set(list(batchProcessedUrls)))
                        numTimeouts += len(set(list(batchTimeoutList)))
                        durMins, minsLeft = self.timeElapsedLeft(
                            sttm, b + 1, len(batches))
                        logging.info(
                            'Saved {0} new events from {1} of {2} listings. '
                            '\nEstimated time to '
                            'completion: ~{3} min.'.format(
                                batchEventsSaved.value,
                                len(batchProcessedUrls), len(batch), minsLeft))
                        os.system(
                            "ps aux | grep chrome | awk ' { print $2 } ' |"
                            " xargs kill -9")

                elif self.eventMode == 'series':
                    for i, url in enumerate(urls):
                        numEvents = 0
                        events = self.getEventsFromListingUrl(
                            self.eventMode, url, None, urls, [])
                        if events is None:
                            durMins, minsLeft = self.timeElapsedLeft(
                                sttm, i + 1, numUrls)
                            logging.info(
                                'No sales events scraped from listing'
                                ' {0} of {1}. Check url: {2}. {3} min.'
                                'elapsed. {4} min. remaining.'.format(
                                    i + 1, numUrls, url, durMins,
                                    minsLeft))
                            continue
                        for event in events:
                            totalEvents += 1
                            numEvents += 1
                            writer.writerow(event)
                        urlsWithEvents += 1
                        pUrls_writer.writerow([url])
                        durMins, minsLeft = self.timeElapsedLeft(
                            sttm, i, numUrls)
                        if (i + 1) % 1 == 0:
                            logging.info(
                                'Scraped {0} sales events from listing {1}'
                                ' of {2}. Scraped {3} total sales events in'
                                ' {4} min. Estimated time to completion:'
                                ' ~{5} min.'.format(
                                    numEvents, i + 1, numUrls, totalEvents,
                                    durMins, minsLeft))
                else:
                    raise ValueError(
                        'Must specify valid event scraping '
                        'mode: ["parallel", "series"]')
        if numUrls > 0:
            self.pctUrlsWithEvents = round(
                urlsWithEvents / origNumUrls * 100.0, 1)
        else:
            self.pctUrlsWithEvents = -999

        logging.info('#' * 100)
        logging.info('#' * 100)
        logging.info(
            'Scraped events from {0} of {1} ({2}%) urls.'.format(
                urlsWithEvents, numUrls, self.pctUrlsWithEvents).center(
                90, ' ').center(100, '#').upper())
        logging.info(
            ('{0} of {1} urls timed out while scraping events.'.format(
                numTimeouts, numUrls).upper().center(90, ' ').center(
                100, '#')))
        logging.info(
            ('Saved {0} events to {1}'.format(
                totalEvents, self.eventFile).upper().center(
                90, ' ').center(100, '#')))
        logging.info('#' * 100)
        logging.info('#' * 100)