Esempio n. 1
0
    def _parse(self, input, parser_class):
        queues_callbacks = {}
        if self.coords_callback:
            queues_callbacks['coords'] = (multiprocessing.JoinableQueue(512),
                                          self.coords_callback)
        if self.nodes_callback:
            queues_callbacks['nodes'] = (multiprocessing.JoinableQueue(128),
                                         self.nodes_callback)
        if self.ways_callback:
            queues_callbacks['ways'] = (multiprocessing.JoinableQueue(128),
                                        self.ways_callback)
        if self.relations_callback:
            queues_callbacks['relations'] = (multiprocessing.JoinableQueue(128),
                                             self.relations_callback)

        def parse_it():
            setproctitle('imposm parser')
            queues = dict([(type, q) for type, (q, c) in queues_callbacks.items()])
            
            parser = parser_class(self.concurrency,
                ways_queue=queues.get('ways'),
                coords_queue=queues.get('coords'),
                nodes_queue=queues.get('nodes'),
                relations_queue=queues.get('relations'),
                marshal_elem_data=self.marshal_elem_data
            )
            parser.nodes_tag_filter = self.nodes_tag_filter
            parser.ways_tag_filter = self.ways_tag_filter
            parser.relations_tag_filter = self.relations_tag_filter
            parser.parse(input)
            for q in queues.values():
                q.put(None)
            
        proc = multiprocessing.Process(target=parse_it)
        proc.start()
        
        while queues_callbacks:
            processed = False
            for items_type, (queue, callback) in queues_callbacks.items():
                try:
                    items = None
                    while True:
                        items = queue.get_nowait()
                        if items is None:
                            queue.task_done()
                            del queues_callbacks[items_type]
                            break
                        else:
                            callback(items)
                    if items:
                        processed = True
                except Empty:
                    pass
            if not processed:
                # wait a ms if all queues were empty
                # to give the parser a chance to fill them up
                time.sleep(0.001)
        proc.join()
    def parse(self, filename):
        pos_queue = multiprocessing.JoinableQueue(32)
        pool = []
        for _ in xrange(self.pool_size):
            proc = PBFParserProcess(
                pos_queue,
                nodes_callback=self.nodes_callback,
                coords_callback=self.coords_callback,
                ways_callback=self.ways_callback,
                relations_callback=self.relations_callback,
                nodes_tag_filter=self.nodes_tag_filter,
                ways_tag_filter=self.ways_tag_filter,
                relations_tag_filter=self.relations_tag_filter,
                marshal=self.marshal)
            pool.append(proc)
            proc.start()

        reader = PBFFile(filename)

        for pos in reader.blob_offsets():
            pos_queue.put(pos)

        pos_queue.join()

        for proc in pool:
            pos_queue.put(None)
        for proc in pool:
            proc.join()
Esempio n. 3
0
    def run(self):
        tasks = mp.Queue()
        results = mp.JoinableQueue()
        interim = []
        args = (tasks, results)
        # n_procs = min(mp.cpu_count(), len(self._videos))
        n_procs = mp.cpu_count()
        all_jobs = []

        for video_gt_pair in self._videos:
            gt = video_gt_pair[0]
            fp = video_gt_pair[1]

            for func in self._funcs:
                func_name = func[0]
                func_ptr = func[1]

                base_params = {
                    'gt_path': gt,
                    'video_path': fp,
                    'metric_func': func_ptr,
                    'init': False
                }

                for classifier in self._classifiers:
                    params = base_params.copy()
                    params.update(self._experiment_args)
                    params['classifier'] = classifier(metric=func_name)
                    log.info("Params ({}): {}".format(id(params), params))

                    all_jobs.append((params, self._experiment))

        for job in all_jobs:
            tasks.put(job)

        for _ in range(n_procs):
            p = mp.Process(target=train_classifier, args=args).start()

        for _ in range(len(all_jobs)):
            interim.append(results.get())
            results.task_done()

        for _ in range(n_procs):
            tasks.put(None)

        results.join()
        tasks.close()
        results.close()

        return interim
Esempio n. 4
0
 def __init__(self,
              pool_size,
              nodes_queue=None,
              ways_queue=None,
              relations_queue=None,
              coords_queue=None,
              marshal_elem_data=False):
     self.pool_size = pool_size
     self.pool = []
     self.nodes_callback = nodes_queue.put if nodes_queue else None
     self.ways_callback = ways_queue.put if ways_queue else None
     self.relations_callback = relations_queue.put if relations_queue else None
     self.coords_callback = coords_queue.put if coords_queue else None
     xml_chunk_size = READ_SIZE
     self.mmap_pool = MMapPool(pool_size * 8, xml_chunk_size * 8)
     self.mmap_queue = multiprocessing.JoinableQueue(8)
     self.marshal_elem_data = marshal_elem_data
Esempio n. 5
0
    def __init__(self, blocking=True, db_path=None, ncpu=1):
        """
        Init function

        Parameter
        ---------
        blocking: bool
            determines whether join() blocks or not
        db_path: str
            the string to a LevelDB for command persistence
        """
        self.__blocking = blocking
        self.__broker_queue = mp.Queue()
        self.__job_queue = mp.JoinableQueue()
        self.__pending_dict = mp.Manager().dict()
        self.__results_queue = mp.Queue()
        self.__results_queue_worker = mp.Queue()

        if db_path is None:
            tmp_db = NamedTemporaryFile(delete=False,
                                        dir=os.getcwd(),
                                        suffix=".db")
            tmp_db.close()
            self.__is_temp_db = True
            self.__db_path = tmp_db.name
        else:
            self.__is_temp_db = False
            self.__db_path = db_path

        self.__broker = _Broker(self.__broker_queue,
                                self.__job_queue,
                                self.__results_queue,
                                self.__results_queue_worker,
                                self.__pending_dict,
                                db_path=self.__db_path)
        self.__broker.daemon = False
        self.__broker.start()

        self.__worker = []
        for i in range(ncpu):
            p = _Worker(self.__broker_queue, self.__job_queue,
                        self.__results_queue_worker)
            p.daemon = False
            self.__worker.append(p)
            p.start()
    def count_proc(type, queue):
        def count():
            count = 0
            while True:
                nodes = queue.get()
                if nodes is None:
                    queue.task_done()
                    break
                count += len(nodes)
                queue.task_done()
            print type, count

        return count

    nodes_queue = multiprocessing.JoinableQueue(128)
    ways_queue = multiprocessing.JoinableQueue(128)
    relations_queue = multiprocessing.JoinableQueue(128)

    procs = [
        multiprocessing.Process(target=count_proc('nodes', nodes_queue)),
        multiprocessing.Process(target=count_proc('ways', ways_queue)),
        multiprocessing.Process(
            target=count_proc('relations', relations_queue))
    ]
    for proc in procs:
        proc.start()

    parser = PBFMultiProcParser(2,
                                nodes_queue=nodes_queue,
                                ways_queue=ways_queue,
Esempio n. 7
0
    def run_exp_for_all_classifiers(save_dir=DIR_CLASSIFIERS, parallel=True):
        """
        Runs all the saved classifiers that are located in save_dir.
        parallel, if True, will use the multiprocessing module to run
        multiple experiments at the same time.

        At present, however, this is broken due to the way in which Python
        processes match up to C-lib extensions. In this case, OpenCV just
        kinda dies when processing is attempted in this manner.

        Currently investigating a fix -- until then, just run linear or
        via threads.
        """
        classifiers = EXPClassifierHandler.get_all_saved_classifiers(
            DIR_CLASSIFIERS)
        classifiers = [x for x in classifiers if not x.endswith(".csv")]

        if len(classifiers) == 0:
            log.info("No more experiments to run, exiting.")
            return

        if parallel:
            videos_to_classifiers = {}

            for c in classifiers:
                clf = load_saved_classifier(save_dir + c)
                file_name = clf.video_path.split("/")[-1]

                if file_name not in videos_to_classifiers:
                    videos_to_classifiers[file_name] = []

                clfid = (clf.identifier, c)
                videos_to_classifiers[file_name].append(clfid)

            # So now we've mapped video_file: [classifiers], multiproc by k
            tasks = mp.Queue()
            results = mp.JoinableQueue()
            interim = []
            args = (tasks, results, save_dir)
            n_procs = min(mp.cpu_count(), len(videos_to_classifiers.keys()))

            for k in videos_to_classifiers.keys():
                these_classifiers = videos_to_classifiers[k]
                tasks.put(these_classifiers)

            delegator = EXPClassifierHandler.run_exp_from_mp_queue

            for _ in range(n_procs):
                p = mp.Process(target=delegator, args=args).start()

            for _ in range(len(videos_to_classifiers.keys())):
                interim.append(results.get())
                results.task_done()

            for _ in range(n_procs):
                tasks.put(None)

            results.join()
            tasks.close()
            results.close()
        else:
            for c in classifiers:
                EXPClassifierHandler.run_exp_for_classifier(c, save_dir)

        # Maybe by the time we get here more will be waiting... keep going
        EXPClassifierHandler.run_exp_for_all_classifiers(save_dir, parallel)
Esempio n. 8
0
 def __init__(self, n, mmap_size):
     self.n = n
     self.mmap_size = mmap_size
     self.pool = [mmap.mmap(-1, mmap_size) for _ in range(n)]
     self.free_mmaps = set(range(n))
     self.free_queue = multiprocessing.JoinableQueue()