Ejemplo n.º 1
0
def spin_crawl_threads(state, classifiers, MAX_BIT_SIZE, MAX_DL_THREADS, image_path):
    print("Running threads...")
    manager = Manager()

    location_q = manager.Queue(maxsize=16)
    image_q = manager.Queue(maxsize=64)
    state_lock = manager.Lock()

    generate_location = Process(target=generate_location_thread,
                                args=(location_q, MAX_BIT_SIZE),
                                name="generate_location")
    classification = Process(target=classification_thread,
                             args=(image_q, classifiers, image_path,
                                   state, state_lock), name="classification")
    download_image_t = Process(target=download_image_thread,
                               args=(location_q, image_q, MAX_DL_THREADS),
                               name="download_image")

    download_image_t.start()
    classification.start()
    generate_location.start()

    def kill_threads():
        for thread in active_children():
            thread.terminate()

    atexit.register(kill_threads)

    download_image_t.join()
    classification.join()
    generate_location.join()
Ejemplo n.º 2
0
    def _save_data(self, path, cb, concat):
        if not os.path.exists(path):
            os.makedirs(path)
        target_filename = os.path.join(path, "{0}.jb".format(self.name))
        if self._saving_cache:
            while True in [p.is_alive() for p in self._saving_ps]:
                time.sleep(1e-3)

            p = Process(
                target=_caches_to_file,
                args=(
                    self._save_path,
                    self._start_data_segment,
                    self._cur_data_segment,
                    target_filename,
                    cb,
                    concat,
                ),
            )
            p.start()
            self._start_data_segment = self._cur_data_segment
        else:
            data = self._extract_q(0)
            p = Process(target=_dump, args=(data, target_filename, cb))
            p.start()
Ejemplo n.º 3
0
    def _launch_aggregators(self):
        """
        Launches the various data aggregators, which serialize data from all processes.
        * DataAggregator - sqlite database for crawl data
        * LevelDBAggregator - leveldb database for javascript files
        """
        # DataAggregator
        self.aggregator_status_queue = Queue()
        self.data_aggregator = Process(target=DataAggregator.DataAggregator,
                                       args=(self.manager_params,
                                             self.aggregator_status_queue))
        self.data_aggregator.daemon = True
        self.data_aggregator.start()
        self.manager_params[
            'aggregator_address'] = self.aggregator_status_queue.get(
            )  # socket location: (address, port)

        # LevelDB Aggregator
        if self.ldb_enabled:
            self.ldb_status_queue = Queue()
            self.ldb_aggregator = Process(
                target=LevelDBAggregator.LevelDBAggregator,
                args=(self.manager_params, self.ldb_status_queue))
            self.ldb_aggregator.daemon = True
            self.ldb_aggregator.start()
            self.manager_params['ldb_address'] = self.ldb_status_queue.get(
            )  # socket location: (address, port)
Ejemplo n.º 4
0
def test2():
    """
    Launch planetary_imager and process its data
    """
    import os
    Process(target=worker_process, args=()).start()
    Process(target=sink_process, args=()).start()
    os.system('planetary_imager &')
Ejemplo n.º 5
0
def test_worker_sink():
    """
    Launch planetary_imager and process its data
    """
    from multiprocess import Process
    import os
    Process(target=worker_process, args=()).start()
    Process(target=sink_process, args=()).start()
Ejemplo n.º 6
0
def test1():
    """
    Run the ventilator defined in this function.
    """
    Process(target=start_ventilator, args=()).start()
    #time.sleep(0.1)
    Process(target=worker_process, args=()).start()
    Process(target=sink_process, args=()).start()
Ejemplo n.º 7
0
def test_client():
    """
    Launch planetary_imager and process its data
    """
    Process(target=start_ventilator, args=()).start()
    Process(target=worker_process, args=()).start()
    Process(target=sink_process, args=()).start()
    Process(target=client_process, args=()).start()
Ejemplo n.º 8
0
    def mp_process(self,nprocs,func,*args):
        images=args[0]
#        def worker(imgs,i,chunksize, out_q,func,*args):
#            """ The worker function, invoked in a process. 'images' is a
#                list of images to span the process upon. The results are placed in
#                a dictionary that's pushed to a queue.
#            """
#            outdict = {}
#            for imn in range(len(imgs)-1):
#                print(i*chunksize+imn)
#                outdict[i*chunksize+imn] = func(imgs[imn],imgs[imn+1],*args[1:],i*chunksize+imn)
#            out_q.put(outdict)
    
        # Each process will get 'chunksize' nums and a queue to put his out
        # dict into
        out_q = Queue()
        chunksize = int(math.ceil((len(images)-1) / float(nprocs)))
        procs = []
        print("Chunks of size:",chunksize)
        for i in range(nprocs):
            if i == nprocs-1:
                p = Process(
                        target=worker,
                        args=(images[chunksize * i:len(images)-1],i,chunksize,out_q,func,*args))
                procs.append(p)
                p.start()
                self.loading.progress2['value']+=chunksize
                self.update()
            else:                
                p = Process(
                        target=worker,
                        args=(images[chunksize * i:chunksize * (i + 1)+1],i,chunksize,out_q,func,*args))
                procs.append(p)
                p.start()
                self.loading.progress2['value']+=chunksize
                self.update()
    
        # Collect all results into a single result dict. We know how many dicts
        # with results to expect.
        resultdict = {}
        for i in range(nprocs):
            resultdict.update(out_q.get())
    
        # Wait for all worker processes to finish
        for p in procs:
            p.join()
            
        results=[]
        for j in range(len(resultdict)):
            results.append(resultdict[j])

        return results
Ejemplo n.º 9
0
 def __run_processes(target, inqueue, outqueue=None):
     threads = max(1, (NUM_THREADS - 1))
     for n in range(threads):
         if outqueue:
             p = Process(target=target, args=(n, inqueue, outqueue))
         else:
             p = Process(target=target, args=(n, inqueue))
         p.daemon = True
         p.start()
     pp = Process(target=__progress, args=(-1, inqueue))
     pp.daemon = True
     pp.start()
     inqueue.close()
     inqueue.join()
Ejemplo n.º 10
0
def main(args):
    lines = []
    word2int = {}
    int2word = {}
    count = 0
    line_count = 0
    pid = 0
    readFile = open(args.FILE_PATH, 'r')
    writeFile = open(args.OUTPUT_FILE_PATH, 'w')
    start = time()
    cpu_count = mp.cpu_count()
    pool = Pool(cpu_count - 1)
    processes = []
    print('Starting everything...')
    lock = mp.Lock()
    for line in readFile:
        print('line count: {}'.format(line_count))
        word1 = line.split('\n')[0].split('\t')[0].split('/')[0]
        word2 = line.split('\n')[0].split('\t')[1].split('/')[1]
        weight = line.split('\n')[0].split('\t')[-1]
        lines.append([word1, word2, weight])
        for word in [word1, word2]:
            if not word in word2int:
                word2int[word] = count
                int2word[count] = word
                count += 1
        line_count += 1
        if line_count % 8000000 == 0:
            # senf the lines to be written in new file
            p = Process(target=assignNumbers,
                        args=(pid, args, word2int, writeFile, lock, lines,
                              len(lines)))
            processes.append(p)
            p.start()
            pid += 1
            lines = []
    for process in processes:
        process.join()
    end = time()
    print('Total time for the whole process : {} seconds'.format(end - start))
    print('proceddings with writign mappings')
    # pool.map(writeFileModule, [(word2int, 'word2int.eng'), (int2word, 'int2word.eng')])
    P = Process(target=writeFileModule, args=(word2int, 'word2int.eng'))
    Q = Process(target=writeFileModule, args=(int2word, 'int2word.eng'))
    P.start()
    Q.start()
    P.join()
    Q.join()
    readFile.close()
    writeFile.close()
    def mp_processTR(self, nprocs, func, *args):
        images = args[0]
        TrMat = args[1]
        TrMatNull = np.array([[[1., 0., 0.], [0., 1., 0.]]])
        TrM = np.vstack((TrMatNull, TrMat))
        fnames = args[2]
        print(len(images), len(TrM), len(fnames))
        out_q = Queue()
        chunksize = int(math.ceil((len(images) - 1) / float(nprocs)))
        procs = []
        print("Chunks of size:", chunksize)
        for i in range(nprocs):
            if i == nprocs - 1:
                p = Process(target=workerTR,
                            args=(images[chunksize * i:len(images)],
                                  TrM[chunksize * i:len(images)],
                                  fnames[chunksize * i:len(images)], i,
                                  chunksize, out_q, func, *args[3:]))
                procs.append(p)
                p.start()
                self.loading.progress2['value'] += chunksize
                self.update()
            else:
                p = Process(target=workerTR,
                            args=(images[chunksize * i:chunksize * (i + 1)],
                                  TrM[chunksize * i:chunksize * (i + 1)],
                                  fnames[chunksize * i:chunksize * (i + 1)], i,
                                  chunksize, out_q, func, *args[3:]))
                procs.append(p)
                p.start()
                self.loading.progress2['value'] += chunksize
                self.update()

        # Collect all results into a single result dict. We know how many dicts
        # with results to expect.
        resultdict = {}
        for i in range(nprocs):
            resultdict.update(out_q.get())

        # Wait for all worker processes to finish
        for p in procs:
            p.join()

        results = []
        for j in range(len(resultdict)):
            results.append(resultdict[j])

        return results
Ejemplo n.º 12
0
def api_curtains_control(status):
    if status in STATES:
        curtain_status_all = get_curtain_status_all()
        if status == "open" and (curtain_status_all == "closed"
                                 or curtain_status_all == "partlyopen"):
            curtain_status_right = get_curtain_status("right")
            p_right = None
            if curtain_status_right == "closed":
                p_right = Process(target=open_curtain, args=('right', ))
                p_right.start()

            curtain_status_left = get_curtain_status("left")
            p_left = None
            if curtain_status_left == "closed":
                p_left = Process(target=open_curtain, args=('left', ))
                p_left.start()

            if p_right is not None:
                p_right.join()
            if p_left is not None:
                p_left.join()
            return get_curtain_status_all()

        elif status == "closed" and (curtain_status_all == "open"
                                     or curtain_status_all == "partlyopen"):
            curtain_status_right = get_curtain_status("right")
            p_right = None
            if curtain_status_right == "open":
                p_right = Process(target=close_curtain, args=('right', ))
                p_right.start()

            curtain_status_left = get_curtain_status("left")
            p_left = None
            if curtain_status_left == "open":
                p_left = Process(target=close_curtain, args=('left', ))
                p_left.start()

            if p_right is not None:
                p_right.join()
            if p_left is not None:
                p_left.join()
            return get_curtain_status_all()

    return {
        "error": 400,
        "curtain_status_all": curtain_status_all,
        "status": status
    }, 400
Ejemplo n.º 13
0
    def _launch_aggregators(self):
        """Launch the necessary data aggregators"""
        if self.manager_params["output_format"] == "local":
            self.data_aggregator = SqliteAggregator.SqliteAggregator(
                self.manager_params, self.browser_params)
        elif self.manager_params["output_format"] == "s3":
            self.data_aggregator = S3Aggregator.S3Aggregator(
                self.manager_params, self.browser_params)
        else:
            raise Exception("Unrecognized output format: %s" %
                            self.manager_params["output_format"])
        self.data_aggregator.launch()
        self.manager_params[
            'aggregator_address'] = self.data_aggregator.listener_address

        # open connection to aggregator for saving crawl details
        self.sock = clientsocket(serialization='dill')
        self.sock.connect(*self.manager_params['aggregator_address'])

        # TODO refactor ldb aggregator to use new base classes
        if self.ldb_enabled:
            self.ldb_status_queue = Queue()
            self.ldb_aggregator = Process(
                target=LevelDBAggregator.LevelDBAggregator,
                args=(self.manager_params, self.ldb_status_queue))
            self.ldb_aggregator.daemon = True
            self.ldb_aggregator.start()
            # socket location: (address, port)
            self.manager_params['ldb_address'] = self.ldb_status_queue.get()
Ejemplo n.º 14
0
    def start(self, initializer=None, initargs=()):
        '''
        Spawn a server process for this manager object
        '''
        assert self._state.value == State.INITIAL

        if initializer is not None and not hasattr(initializer, '__call__'):
            raise TypeError('initializer must be a callable')

        # pipe over which we will retrieve address of server
        reader, writer = connection.Pipe(duplex=False)

        # spawn process which runs a server
        self._process = Process(
            target=type(self)._run_server,
            args=(self._registry, self._address, self._authkey,
                  self._serializer, writer, initializer, initargs),
            )
        ident = ':'.join(str(i) for i in self._process._identity)
        self._process.name = type(self).__name__  + '-' + ident
        self._process.start()

        # get address of server
        writer.close()
        self._address = reader.recv()
        reader.close()

        # register a finalizer
        self._state.value = State.STARTED
        self.shutdown = util.Finalize(
            self, type(self)._finalize_manager,
            args=(self._process, self._address, self._authkey,
                  self._state, self._Client),
            exitpriority=0
            )
Ejemplo n.º 15
0
def test():
    NUMBER_OF_PROCESSES = 4
    TASKS1 = [(mul, (i, 7)) for i in range(20)]
    TASKS2 = [(plus, (i, 8)) for i in range(10)]

    # Create queues
    task_queue = Queue()
    done_queue = Queue()

    # Submit tasks
    list(map(task_queue.put, TASKS1))

    # Start worker processes
    for i in range(NUMBER_OF_PROCESSES):
        Process(target=worker, args=(task_queue, done_queue)).start()

    # Get and print results
    print('Unordered results:')
    for i in range(len(TASKS1)):
        print('\t', done_queue.get())

    # Add more tasks using `put()` instead of `putMany()`
    for task in TASKS2:
        task_queue.put(task)

    # Get and print some more results
    for i in range(len(TASKS2)):
        print('\t', done_queue.get())

    # Tell child processes to stop
    for i in range(NUMBER_OF_PROCESSES):
        task_queue.put('STOP')
Ejemplo n.º 16
0
    def _solve(self, X, Y, batch_size):
        '''
        Solve the multi-objective problem by multiple scalarized single-objective solvers.
        '''
        # generate scalarization weights
        weights = np.random.random((batch_size, self.problem.n_obj))
        weights /= np.expand_dims(np.sum(weights, axis=1), 1)

        # initial solutions
        X = np.vstack([X, lhs(X.shape[1], batch_size)])
        F = self.problem.evaluate(X, return_values_of=['F'])

        # optimization
        xs, ys = [], []
        queue = Queue()
        n_active_process = 0
        for i in range(batch_size):
            x0 = X[np.argmin(augmented_tchebicheff(F, weights[i]))]
            Process(target=optimization,
                    args=(self.problem, x0, weights[i], queue)).start()
            n_active_process += 1
            if n_active_process >= self.n_process:
                x, y = queue.get()
                xs.append(x)
                ys.append(y)
                n_active_process -= 1

        # gather result
        for _ in range(n_active_process):
            x, y = queue.get()
            xs.append(x)
            ys.append(y)

        return np.array(xs), np.array(ys)
    def fit(self, X, Y):
        assert not self.fit_done
        assert len(X) == len(Y)

        possible_labels = list(set(y_val for y in Y for y_val in y))
        job_labels = np.array_split(possible_labels, self.n_jobs)

        with Manager() as manager:
            X_proxy = manager.list(X)
            Y_proxy = manager.list(Y)
            output_queue = Queue()
            processes = [
                Process(target=sequential_execute,
                        args=(output_queue, get_binary_clf_from_multilabel, [{
                            'X':
                            X_proxy,
                            'Y':
                            Y_proxy,
                            'label':
                            lbl,
                            'return_label':
                            True
                        } for lbl in job])) for job in job_labels
            ]
            [p.start() for p in processes]
            results = [output_queue.get()
                       for lbl in possible_labels]  # needs to be flattened
            [p.join() for p in processes]

        self.classifiers = dict(results)
        self.fit_done = True
Ejemplo n.º 18
0
    def make_csv(self, lock):
        file1 = open(self.out_csv1, "w")
        file1.write("id" + ',' + "level" + '\n')
        file2 = open(self.out_csv2, "w")
        file2.write("id" + ',' + "object_name" + '\n')
        file1.close()
        file2.close()

        if __name__ == '__main__':
            list_of_process = []
            list_of_queue1 = []
            list_of_queue2 = []
            for i in range(len(self.list_of_zips)):
                list_of_queue1.append(Queue())
                list_of_queue2.append(Queue())
                list_of_process.append(
                    Process(target=self.parse_Zip,
                            args=(i, lock, list_of_queue1[i],
                                  list_of_queue2[i])))
            for i in range(len(self.list_of_zips)):
                list_of_process[i].start()

            file1 = open(self.out_csv1, "a")
            for i in range(len(self.list_of_zips)):
                while list_of_queue1[i].empty() is False:
                    file1.write(list_of_queue1[i].get()[0] + ',' +
                                list_of_queue1[i].get()[1] + '\n')
            file1.close()
            '''
Ejemplo n.º 19
0
    def multiprocessor(inpipe, outpipe, controlpipe):
        def returner_process(inp, outp, task):
            args, kwargs = inp.get()
            outpipe.put(task(*args, **kwargs))
            return True

        jobs = []
        while True:
            done = [x for x in jobs if x.ready()]
            if done:
                jobs = [x for x in jobs
                        if x not in done]  # Avoids race condition!
            else:
                sleep(0.1)

            for thing in done:
                thing.successful()
                assert thing.get()
            while len(jobs) < process_count:
                cmd = controlpipe.get()
                if cmd == stop_signal:
                    break
                elif cmd == True:
                    newjob = Process(target=returner_process,
                                     args=(inpipe, outpipe))
                    newjob.start()
                    jobs.append(newjob)
                    # I *think* the pipes have to be passed explicitly,
                    # but I haven't checked.
                else:
                    raise Exception
        outpipe.put(stop_signal)
Ejemplo n.º 20
0
def test_replace(delay=0.):
    zwo.init_workers()
    zwo.init_sink()
    Process(target=start_ventilator, args=(delay, )).start()
    while True:
        time.sleep(10)
        zwo.replace_workers(zwo.dummy_worker)
Ejemplo n.º 21
0
def run_parkinglot_expt(net, n):
    "Run experiment"

    seconds = args.time

    # Start the bandwidth and cwnd monitors in the background
    monitor = Process(target=monitor_devs_ng,
                      args=('%s/bwm.txt' % args.dir, 1.0))
    monitor.start()
    start_tcpprobe()

    # Get receiver and clients
    recvr = net.getNodeByName('receiver')
    #sender1 = net.getNodeByName('h1')

    # Start the receiver
    port = 5001
    recvr.cmd('iperf -s -p', port, '> %s/iperf_server.txt' % args.dir, '&')

    #waitListening(sender1, recvr, port)

    # TODO: start the sender iperf processes and wait for the flows to finish
    # Hint: Use getNodeByName() to get a handle on each sender.
    # Hint: Use sendCmd() and waitOutput() to start iperf and wait for them to finish
    # Hint: waitOutput waits for the command to finish allowing you to wait on a particular process on the host
    # iperf command to start flow: 'iperf -c %s -p %s -t %d -i 1 -yc > %s/iperf_%s.txt' % (recvr.IP(), 5001, seconds, args.dir, node_name)
    # Hint (not important): You may use progress(t) to track your experiment progress

    ### begin my code

    # get list of all hosts
    h = []  # Python list of hosts
    for i in range(n):
        #print "DEBUG", 'h%s' % (i+1)
        h.append(net.getNodeByName('h%s' % (i + 1)))

    # wait for ports on all iperf clients
    for i in range(n):
        waitListening(h[i], recvr, port)

    # send iperf cmd to all hosts
    for i in range(n):
        node_name = 'h%s' % (i + 1)
        h[i].sendCmd('iperf -c %s -p %s -t %d -i 1 -yc > %s/iperf_%s.txt' %
                     (recvr.IP(), port, seconds, args.dir, node_name))

    # wait for commands to finish
    iperf_results = {}
    progress(seconds)  # show progress while waiting
    for i in range(n):
        iperf_results[h[i].name] = h[i].waitOutput()

    ### end my code

    recvr.cmd('kill %iperf')

    # Shut down monitors
    monitor.terminate()
    stop_tcpprobe()
Ejemplo n.º 22
0
def start_httpd(handler_class=SimpleHTTPRequestHandler):
    clear_httpd_messages()

    httpd_process = Process(target=run_httpd_forever, args=(handler_class,))
    httpd_process.start()

    httpd_url = HTTPD_MESSAGE_QUEUE.get()
    return httpd_process, httpd_url
Ejemplo n.º 23
0
 def _launch_loggingserver(self):
     """ sets up logging server """
     self.logging_status_queue = Queue()
     loggingserver = Process(target=MPLogger.loggingserver,
                          args=(self.manager_params['log_file'], self.logging_status_queue, ))
     loggingserver.daemon = True
     loggingserver.start()
     return loggingserver
Ejemplo n.º 24
0
    def call_job_fn(self, key, job_fn, args):
        # pylint: disable-next=import-outside-toplevel,no-name-in-module,import-error
        from multiprocess import Process

        # pylint: disable-next=not-callable
        proc = Process(target=job_fn,
                       args=(key, self._make_progress_key(key), args))
        proc.start()
        return proc.pid
Ejemplo n.º 25
0
 def launch(self, listener_process_runner, *args):
     """Launch the aggregator listener process"""
     args = (self.manager_params, self.status_queue,
             self.shutdown_queue) + args
     self.listener_process = Process(target=listener_process_runner,
                                     args=args)
     self.listener_process.daemon = True
     self.listener_process.start()
     self.listener_address = self.status_queue.get()
Ejemplo n.º 26
0
def test_replace_decluster(delay=0.):
    worker = zwo.make_worker_function(1, decluster=True)
    #Process(target = worker).start()
    zwo.init_workers(worker)
    zwo.init_sink()
    Process(target=start_ventilator_bigarr, args=(delay, )).start()
    while True:
        time.sleep(10)
        zwo.replace_workers(worker)
Ejemplo n.º 27
0
 def launch(self, listener_process_runner):
     """Launch the aggregator listener process"""
     self.status_queue = Queue()
     self.listener_process = Process(target=listener_process_runner,
                                     args=(self.manager_params,
                                           self.status_queue))
     self.listener_process.daemon = True
     self.listener_process.start()
     self.listener_address = self.status_queue.get()
Ejemplo n.º 28
0
 def submit(self, func, *args, **kwargs):
     # noinspection PyUnresolvedReferences
     from multiprocess import Process, Pipe
     from concurrent.futures import Future
     fut, (c0, c1) = Future(), Pipe(False)
     task = Process(target=self._target, args=(c1.send, func, args, kwargs))
     self.tasks[fut] = task
     task.start()
     return self._set_future(fut, c0.recv())
Ejemplo n.º 29
0
def test_kill():
    zwo.init_workers()
    zwo.init_sink()
    Process(target=start_ventilator).start()
    time.sleep(1)
    while True:
        zwo.kill_workers()
        time.sleep(0.5)
        zwo.init_workers()
        time.sleep(0.5)
Ejemplo n.º 30
0
def runpool(address, number_of_processes):    
    # create a single server object -- children will each inherit a copy
    server = HTTPServer(address, RequestHandler)
    
    # create child processes to act as workers
    for i in range(number_of_processes-1):
        Process(target=serve_forever, args=(server,)).start()

    # main process also acts as a worker
    serve_forever(server)