Example #1
0
 def __init__(self, dataset, maxsize=2):
     self.queue = mp.Queue(maxsize=maxsize)
     self.dataset = dataset
     self.is_done = mp.Event()
     self.is_shutdown = mp.Event()
     self.process = mp.Process(target=self._run)
     self.process.start()
Example #2
0
    def __init__(self, env, hidden_layer=[64, 64]):
        self.env = env
        #self.env.env.disableViewer = False
        self.num_inputs = env.observation_space.shape[0]
        self.num_outputs = env.action_space.shape[0]
        self.hidden_layer = hidden_layer

        self.params = Params()

        self.Net = ActorCriticNet
        self.model = self.Net(self.num_inputs, self.num_outputs,
                              self.hidden_layer)
        self.model.share_memory()
        self.shared_obs_stats = Shared_obs_stats(self.num_inputs)
        self.memory = ReplayMemory(10000000)
        self.value_memory = ReplayMemory(10000000)
        self.test_mean = []
        self.test_std = []

        self.noisy_test_mean = []
        self.noisy_test_std = []
        self.fig = plt.figure()
        #self.fig2 = plt.figure()
        self.lr = self.params.lr
        plt.show(block=False)

        self.test_list = []
        self.noisy_test_list = []
        self.queue = mp.Queue()
        self.value_queue = mp.Queue()

        self.mpdone = [mp.Event(), mp.Event(), mp.Event(), mp.Event()]

        self.process = []
        self.traffic_light = TrafficLight()
        self.counter = Counter()

        self.best_trajectory = ReplayMemory(5000)
        self.best_score_queue = mp.Queue()
        self.best_score = mp.Value("f", 0)
        self.max_reward = mp.Value("f", 1)

        self.expert_trajectory = ReplayMemory(1e7)

        self.validation_trajectory = ReplayMemory(6000 * 9)

        self.best_validation = 1.0
        self.current_best_validation = 1.0

        self.return_obs_stats = Shared_obs_stats(1)

        self.gpu_model = self.Net(self.num_inputs, self.num_outputs,
                                  self.hidden_layer)

        self.base_controller = None
Example #3
0
 def __init__(self, num_processes: int):
     self.writer = SummaryWriter("tensorboard/{}".format(uuid.uuid4()))
     self.queue = mp.Queue()
     self.evaluation_queue = mp.Queue()
     self.buffer = deque(maxlen=config.BUFFER_SIZE)
     self.trigger = mp.Event()
     self.network = create_network().to(th.device("cuda: 0"))
     self.optimizer = torch.optim.SGD(self.network.parameters(),
                                      lr=config.LEARNING_RATE,
                                      momentum=config.MOMENTUM)
     self.scheduler = LambdaLR(self.optimizer, self.scheduler_fn)
     self.evaluator = Evaluator(create_gomoku(),
                                create_network().to(th.device("cuda: 0")),
                                self.network,
                                self.trigger,
                                self.evaluation_queue,
                                num_rounds=config.NUM_ROUNDS)
     self.selfplayers = []
     self.rounds_selfplay = 0
     self.loss = None
     self.steps = 0
     for i in range(num_processes):
         game = create_gomoku()
         network = create_network().to(th.device("cuda: 1"))
         player = AlphaPlayer(game, network)
         selfplayer = SelfPlayer(game, player, network, self.network,
                                 self.queue)
         self.selfplayers.append(selfplayer)
Example #4
0
def _test_proper_exit(use_workers, pin_memory, exit_method, hold_iter_reference,
                      loader_setup_event, tester_setup_event):
    num_workers = 2 if use_workers else 0

    if exit_method == 'worker_error' or exit_method == 'worker_kill':
        assert use_workers is True

    if exit_method == 'worker_error':
        worker_error_event = mp.Event()
    else:
        worker_error_event = None

    ds = TestProperExitDataset(12, worker_error_event)

    loader = DataLoader(ds, batch_size=1, shuffle=False,
                        num_workers=num_workers, pin_memory=pin_memory)
    error_it = 2

    if use_workers:
        # 2 is the magical per-worker prefetch number...
        # FIXME: change this after the number becomes configurable.
        assert len(loader) > (error_it + 2 + 1) * num_workers

    it = iter(loader)
    if use_workers:
        workers = it.workers

    def kill_pid(pid):
        psutil_p = psutil.Process(pid)
        psutil_p.kill()
        psutil_p.wait(JOIN_TIMEOUT)
        assert not psutil_p.is_running()

    for i, _ in enumerate(it):
        if i == 0:
            if not hold_iter_reference:
                del it
            loader_setup_event.set()
            tester_setup_event.wait()
            # ensure that the workers are still alive
            if use_workers:
                for w in workers:
                    assert w.is_alive()
            if worker_error_event is not None:
                worker_error_event.set()

        if i == error_it:
            if exit_method == 'loader_error':
                raise RuntimeError('Loader error')
            elif exit_method == 'loader_kill':
                kill_pid(os.getpid())
            elif exit_method == 'worker_kill':
                kill_pid(workers[0].pid)

    if not hold_iter_reference:
        # Tries to trigger the __del__ clean-up rather than the automatic
        # exiting of daemonic children. Technically it should be automatically
        # triggered, but I don't want to rely on the implementation detail of
        # Python gc.
        gc.collect()
Example #5
0
    def __init__(self,
                 port='2222',
                 width=640,
                 height=480,
                 depth=3,
                 num=2,
                 VERBOSE=False,
                 BGR2RGB=False,
                 saveRoot=None,
                 feedProxy=None):

        super(VideoStreamClient, self).__init__()

        self.port = port
        self.width = width
        self.height = height
        self.depth = depth
        self.num = num
        self.bufsize = width * height * depth * num
        self.ts = 0
        self.VERBOSE = VERBOSE
        self.BGR2RGB = BGR2RGB
        self.saveRoot = saveRoot
        self.frameLock = mp.Lock()
        self.frameNotifier = mp.Event()
        self.sharedFrame = torch.ByteTensor(height, width, depth)
        self.sharedFrame.storage().share_memory_()

        # If RobotCameraFeed Pyro4 proxy has been provided
        if feedProxy is not None:
            self.feed = feedProxy
        else:
            self.feed = None
Example #6
0
    def _collect_traj_parallel(self, task, debug=False):
        workers = []
        event = mp.Event()
        queue = mp.Queue()
        rollout_nums = np.full(self.num_threads,
                               self.rollout_num // self.num_threads,
                               dtype=np.int)
        rollout_nums[:self.rollout_num % self.num_threads] += 1
        for pid, rollout_num_per_thread in zip(range(self.num_threads),
                                               rollout_nums):
            if rollout_num_per_thread > 0:
                worker_args = (pid, event, queue, task, self.controller,
                               self.theta, rollout_num_per_thread,
                               self.rollout_len, self.M, self.phi,
                               self.adaptation_update_num, self.loss_func,
                               debug)
                workers.append(
                    mp.Process(target=_collect_traj_per_thread,
                               args=worker_args))
        for worker in workers:
            worker.start()

        rollouts = []
        for _ in workers:
            rollouts_per_thread, _n_model_steps_total, _n_task_steps_total = queue.get(
            )
            rollouts.extend(rollouts_per_thread)
            self._n_model_steps_total += _n_model_steps_total
            self._n_task_steps_total += _n_task_steps_total
        event.set()
        return rollouts
Example #7
0
    def test_main_process_unclean_exit(self):
        '''There might be ConnectionResetError or leaked semaphore warning (due to dirty process exit), \
but they are all safe to ignore'''
        worker_pids = mp.Array('i', [0] * 4)

        manager_exit_event = mp.Event()
        p = mp.Process(target=TestDataLoader._manager_process,
                       args=(self.dataset, worker_pids, manager_exit_event))
        p.start()

        manager_exit_event.wait()

        exit_status = [False] * len(worker_pids)
        start_time = time.time()
        pname = 'python'
        while True:
            for i in range(len(worker_pids)):
                pid = worker_pids[i]
                if not exit_status[i]:
                    if not TestDataLoader._is_process_alive(pid, pname):
                        exit_status[i] = True
            if all(exit_status):
                break
            else:
                time.sleep(1)
                self.assertFalse(time.time() - start_time > MANAGER_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT,
                                 'subprocess not terminated')
    def __init__(
        self,
        id: int,
        hparams: utils.Hyperparameters,
        policy: MlpPolicy,
        learner: Learner,
        q: mp.Queue,
        update_counter: utils.Counter,
        log_path: Union[Path, str, None] = None,
        timeout=10,
    ):
        self.id = id
        self.hp = hparams
        self.policy = policy
        for p in self.policy.parameters():
            p.requires_grad = False
        self.learner = learner
        self.timeout = timeout
        self.q = q
        self.update_counter = update_counter
        self.log_path = log_path
        if self.log_path is not None:
            self.log_path = Path(self.log_path) / Path(f"a{self.id}")
            self.log_path.mkdir(parents=True, exist_ok=False)

        self.completion = mp.Event()
        self.p = mp.Process(target=self._act, name=f"actor_{self.id}")
        print(f"[main] actor_{self.id} Initialized")
Example #9
0
 def __init__(self, data_dir, batch_size, queue):
     super().__init__()
     logging.basicConfig(level=logging.INFO)
     self.logger = logging.getLogger(__name__)
     self.generator = DataGenerator(data_dir, batch_size, self.logger)
     self.queue = queue
     self.exit = mp.Event()
Example #10
0
    def __init__(self, loader):
        self.dataset = loader.dataset
        self.scale = loader.scale
        self.collate_fn = loader.collate_fn
        self.batch_sampler = loader.batch_sampler
        self.num_workers = loader.num_workers
        self.pin_memory = loader.pin_memory and torch.cuda.is_available()
        self.timeout = loader.timeout

        self.sample_iter = iter(self.batch_sampler)

        base_seed = torch.LongTensor(1).random_().item()

        if self.num_workers > 0:
            self.worker_init_fn = loader.worker_init_fn
            self.worker_queue_idx = 0
            self.worker_result_queue = multiprocessing.Queue()
            self.batches_outstanding = 0
            self.worker_pids_set = False
            self.shutdown = False
            self.send_idx = 0
            self.rcvd_idx = 0
            self.reorder_dict = {}
            self.done_event = multiprocessing.Event()

            base_seed = torch.LongTensor(1).random_()[0]

            self.index_queues = []
            self.workers = []
            for i in range(self.num_workers):
                index_queue = multiprocessing.Queue()
                index_queue.cancel_join_thread()
                w = multiprocessing.Process(
                    target=_ms_loop,
                    args=(self.dataset, index_queue, self.worker_result_queue,
                          self.done_event, self.collate_fn, self.scale,
                          base_seed + i, self.worker_init_fn, i))
                w.start()
                self.index_queues.append(index_queue)
                self.workers.append(w)

            if self.pin_memory:
                self.data_queue = queue.Queue()
                pin_memory_thread = threading.Thread(
                    target=_pin_memory_loop,
                    args=(self.worker_result_queue, self.data_queue,
                          torch.cuda.current_device(), self.done_event))
                pin_memory_thread.daemon = True
                pin_memory_thread.start()
                self.pin_memory_thread = pin_memory_thread
            else:
                self.data_queue = self.worker_result_queue

            _update_worker_pids(id(self), tuple(w.pid for w in self.workers))
            _set_SIGCHLD_handler()
            self.worker_pids_set = True

            for _ in range(2 * self.num_workers):
                self._put_indices()
Example #11
0
    def __init__(self, loader):
        self.dataset = loader.dataset
        self.collate_fn = loader.collate_fn
        self.batch_sampler = loader.batch_sampler
        self.num_workers = loader.num_workers
        self.pin_memory = loader.pin_memory and torch.cuda.is_available()
        self.timeout = loader.timeout

        self.sample_iter = iter(self.batch_sampler)

        base_seed = torch.LongTensor(1).random_().item()

        if self.num_workers > 0:
            self.worker_init_fn = loader.worker_init_fn
            self.index_queues = [
                multiprocessing.Queue() for _ in range(self.num_workers)
            ]
            self.worker_queue_idx = 0
            self.worker_result_queue = multiprocessing.Queue()
            self.batches_outstanding = 0
            self.worker_pids_set = False
            self.shutdown = False
            self.send_idx = 0
            self.rcvd_idx = 0
            self.reorder_dict = {}
            self.done_event = multiprocessing.Event()

            self.workers = [
                multiprocessing.Process(
                    target=_worker_loop,
                    args=(self.dataset, self.index_queues[i],
                          self.worker_result_queue, self.done_event,
                          self.collate_fn, base_seed + i, self.worker_init_fn,
                          i)) for i in range(self.num_workers)
            ]

            if self.pin_memory:
                self.data_queue = queue.Queue()
                self.pin_memory_thread = threading.Thread(
                    target=_pin_memory_loop,
                    args=(self.worker_result_queue, self.data_queue,
                          self.done_event, self.pin_memory,
                          torch.cuda.current_device()))
                self.pin_memory_thread.daemon = True
                self.pin_memory_thread.start()
            else:
                self.data_queue = self.worker_result_queue

            for w in self.workers:
                w.daemon = True  # ensure that the worker exits on process exit
                w.start()

            _update_worker_pids(id(self), tuple(w.pid for w in self.workers))
            _set_SIGCHLD_handler()
            self.worker_pids_set = True

            # prime the prefetch loop
            for _ in range(2 * self.num_workers):
                self._put_indices()
Example #12
0
    def __init__(self, wait=15):
        self._wait = wait
        self.canceller = mp.Event()

        self._coroutines = {}
        self._exited = []

        self.serial = True
Example #13
0
    def __init__(self, wait=15):
        self._wait = wait
        self._processes = {}
        self._references = []
        self.canceller = mp.Event()
        set_start_method()

        self.serial = False
Example #14
0
    def test_needs_reset(self):

        outdir = tempfile.mkdtemp()

        agent = mock.Mock()
        env = mock.Mock()
        # First episode: 0 -> 1 -> 2 -> 3 (reset)
        # Second episode: 4 -> 5 -> 6 -> 7 (done)
        env.reset.side_effect = [("state", 0), ("state", 4)]
        env.step.side_effect = [
            (("state", 1), 0, False, {}),
            (("state", 2), 0, False, {}),
            (("state", 3), 0, False, {
                "needs_reset": True
            }),
            (("state", 5), -0.5, False, {}),
            (("state", 6), 0, False, {}),
            (("state", 7), 1, True, {}),
        ]

        counter = mp.Value("i", 0)
        episodes_counter = mp.Value("i", 0)
        stop_event = mp.Event()
        exception_event = mp.Event()
        train_loop(
            process_idx=0,
            env=env,
            agent=agent,
            steps=5,
            outdir=outdir,
            counter=counter,
            episodes_counter=episodes_counter,
            stop_event=stop_event,
            exception_event=exception_event,
        )

        self.assertEqual(agent.act.call_count, 5)
        self.assertEqual(agent.observe.call_count, 5)
        self.assertEqual(agent.observe.call_count, 5)
        # done=False and reset=True at state 3
        self.assertFalse(agent.observe.call_args_list[2][0][2])
        self.assertTrue(agent.observe.call_args_list[2][0][3])

        self.assertEqual(env.reset.call_count, 2)
        self.assertEqual(env.step.call_count, 5)
Example #15
0
    def __init__(self, trainable_gan, devices):
        self.trainable_gan = trainable_gan
        self.sync = 0
        self.processes = []
        self.report_weights_event = []
        self.report_weights_queue = []
        self.set_weights_queue = []
        loaded_events = []
        head_device = torch.device(
            list(self.trainable_gan.parameters())[0].device).index
        self.head_device = head_device
        if devices == "-1":
            print("Running on all available devices: ",
                  torch.cuda.device_count())
            devices = list(range(torch.cuda.device_count()))
        else:
            devices = [int(d) for d in devices.split(",")]
        self.devices = devices
        print("Devices:", devices)
        save_complete_event = mp.Event()
        save_event = mp.Event()
        self.save_event = save_event
        self.save_complete_event = save_complete_event

        for device in devices:
            loaded_event = mp.Event()
            report_weights_event = mp.Event()
            set_weights_queue = mp.Queue()
            report_weights_queue = mp.Queue()
            inputs = self.trainable_gan.gan.inputs.to(device)
            p = mp.Process(target=train,
                           args=(device, head_device, trainable_gan.gan,
                                 inputs, loaded_event, report_weights_queue,
                                 set_weights_queue, report_weights_event,
                                 save_event, save_complete_event,
                                 self.trainable_gan.save_file))
            p.start()
            self.processes.append(p)
            self.report_weights_event.append(report_weights_event)
            self.report_weights_queue.append(report_weights_queue)
            self.set_weights_queue.append(set_weights_queue)
            loaded_event.wait()
            save_event = None
Example #16
0
 def __init__(self, dataset, num_workers):
     # Publics
     self.dataset = dataset
     self.num_workers = num_workers
     self.job_queue = mp.Queue()
     self.result_queue = mp.Queue()
     # Privates
     self._processes = []
     self._workers_started = False
     self._workers_killed = False
     self._interrupt_event = mp.Event()
     self._samples_outstanding = len(dataset)
Example #17
0
    def _test_autograd_sharing(self, var):
        ready = mp.Event()
        master_modified = mp.Event()
        queue = mp.Queue()
        p = mp.Process(target=autograd_sharing, args=(queue, ready, master_modified))
        p.start()
        queue.put(var)

        ready.wait()
        var.data[0,0] = 1000
        if var.grad is not None:
            var.grad.data[:] = torch.ones(5, 5) * 4
        master_modified.set()

        worker_ok = queue.get()
        self.assertTrue(worker_ok)

        self.assertEqual(var.data, torch.ones(5, 5))
        if var.grad is not None:
            self.assertEqual(var.grad.data, torch.ones(5, 5))
        p.join()
    def _test_autograd_sharing(self, var):
        ready = mp.Event()
        master_modified = mp.Event()
        queue = mp.Queue()
        p = mp.Process(target=autograd_sharing,
                       args=(queue, ready, master_modified))
        p.start()
        var._grad = Variable(torch.zeros(5, 5), requires_grad=False)
        queue.put(var)

        ready.wait()
        var.data[0, 0] = 1000
        var.grad.data[:] = torch.ones(5, 5) * 4
        master_modified.set()

        worker_ok = queue.get()
        self.assertTrue(worker_ok)

        self.assertEqual(var.data, torch.ones(5, 5))
        self.assertEqual(var.grad.data, torch.ones(5, 5) * 4)
        p.join()
def train(rank,
          model,
          train_data,
          val_data,
          test_data,
          optimizer,
          epochs=2,
          log_every=100):
    ema_loss = None
    criterion = nn.BCEWithLogitsLoss()
    best_iter = (0., 0, 0)
    best_test = 0.
    embeds = None
    for epoch in range(epochs):
        random.shuffle(train_data)
        for i, batch in enumerate(train_data):
            _, text, users, subs, lengths, metafeats, labels = batch
            text, users, subs, metafeats, labels = Variable(text), Variable(
                users), Variable(subs), Variable(metafeats), Variable(labels)
            # if os.path.isfile('checkpoint.pt'):
            #     checkpoint = torch.load('checkpoint.pt')
            #     model.load_state_dict(checkpoint['model'])
            #     optimizer.load_state_dict(checkpoint['optimizer'])
            optimizer.zero_grad()
            if constants.CUDA:
                outputs = model(text, users, subs, metafeats, lengths).cuda()
            else:
                outputs = model(text, users, subs, metafeats, lengths)
            loss = criterion(outputs.squeeze(), labels)
            optimizer.step()

            if ema_loss is None:
                ema_loss = loss.data
            else:
                ema_loss = 0.01 * loss.data + 0.99 * ema_loss

            if i % 10 == 0:
                print((epoch, i, ema_loss))
            if i % log_every == 0:
                auc = evaluate_auc(model, val_data)
                print(("Val AUC", epoch, i, auc))
                if auc > best_iter[0]:
                    best_iter = (auc, epoch, i)
                    print(("New best val!", best_iter))
                    best_test = evaluate_auc(model, test_data)
                    if auc > 0.7:
                        ids, embeds = get_embeddings(train_data + val_data +
                                                     test_data)
    print(("Overall best val:", best_iter))
    # save_checkpoint(model, optimizer)
    mp.Event().wait
    return best_iter[0]
Example #20
0
def validate(evm_model, args_evm, class_partition, feature_dic, data_name,
             gpu):
    with torch.no_grad():
        print(f"start evaluating {data_name}")
        t1 = time.time()
        NG = min(len(gpu), number_of_classes)
        assert NG > 0
        classes_to_process = list(range(1, int(number_of_classes) + 1))
        list_acc = [0.0] * NG
        list_count = [0] * NG
        Q = mp.Queue()
        done_event = [mp.Event() for k in range(NG)]

        process_list = []
        if data_name != "unknown":
            for k in range(NG):
                p = mp.Process(
                    target=val_process,
                    args=(class_partition[k], feature_dic[k], evm_model,
                          args_evm, gpu[k], Q, done_event[k]),
                )
                p.start()
                process_list.append(p)
        else:
            for k in range(NG):
                p = mp.Process(
                    target=val_process_unknown,
                    args=(class_partition[k], feature_dic[k], evm_model,
                          args_evm, gpu[k], Q, done_event[k]),
                )
                p.start()
                process_list.append(p)

        for k in range(NG):
            g, a, c = Q.get()
            print(g, a, c)
            i = gpu.index(g)
            list_acc[i] = a
            list_count[i] = c
            done_event[i].set()

        for p in process_list:
            p.join()

        print(data_name, "total accuracy = ",
              np.average(np.array(list_acc), weights=np.array(list_count)))

        del Q, done_event
        del p, process_list, g, a, c, list_acc, list_count
        t2 = time.time()
        print("validation time = ", t2 - t1)
        return
 def do_test():
     x = torch.zeros(5, 5)
     q = mp.Queue()
     e = mp.Event()
     data = [x, x[:, 1]]
     q.put(data)
     p = mp.Process(target=simple_fill, args=(q, e))
     lc.check_pid(p.pid)
     p.start()
     e.wait()
     self.assertTrue(data[0].eq(4).all())
     self.assertTrue(data[1].eq(4).all())
     p.join(1)
     self.assertFalse(p.is_alive())
Example #22
0
def sample(env, policy, batchsz, process_num, warm_up=False):
    """
    Given batchsz number of task, the batchsz will be splited equally to each processes
    and when processes return, it merge all data and return
	:param env:
	:param policy:
    :param batchsz:
	:param process_num:
    :return: batch
    """

    # batchsz will be splitted into each process,
    # final batchsz maybe larger than batchsz parameters
    process_batchsz = np.ceil(batchsz / process_num).astype(np.int32)
    # buffer to save all data
    queue = mp.Queue()

    # start processes for pid in range(1, processnum)
    # if processnum = 1, this part will be ignored.
    # when save tensor in Queue, the process should keep alive till Queue.get(),
    # please refer to : https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847
    # however still some problem on CUDA tensors on multiprocessing queue,
    # please refer to : https://discuss.pytorch.org/t/cuda-tensors-on-multiprocessing-queue/28626
    # so just transform tensors into numpy, then put them into queue.
    evt = mp.Event()
    processes = []
    for i in range(process_num):
        process_args = (i, queue, evt, env, policy, process_batchsz)
        if warm_up:
            processes.append(
                mp.Process(target=warmupsampler, args=process_args))
        else:
            processes.append(mp.Process(target=sampler, args=process_args))
    for p in processes:
        # set the process as daemon, and it will be killed once the main process is stoped.
        p.daemon = True
        p.start()

    # we need to get the first Memory object and then merge others Memory use its append function.
    pid0, buff0 = queue.get()
    for _ in range(1, process_num):
        pid, buff_ = queue.get()
        buff0.append(buff_)  # merge current Memory into buff0
    evt.set()

    # now buff saves all the sampled data
    buff = buff0

    return buff
 def __init__(self, args, runner_maker):
     self.state_queues = []
     self.prev = []
     self.episodes = []
     self.stop_queueing = mp.Event()
     self.done = mp.Event()
     self.stop_queueing.clear()
     self.done.clear()
     self.num = args.nthreads
     self.processes = []
     for i in range(self.num):
         # queue size of 1 seems to be pretty ideal, longer queue and the resetting takes time
         queue = mp.Queue(1)
         self.state_queues.append(queue)
         self.prev.append(None)
         self.episodes.append([])
         worker = ThreadedWorker(runner_maker, queue, self.done,
                                 self.stop_queueing)
         worker.start()
         self.processes.append(worker)
     self.args = args
     runner_temp = runner_maker()
     self.policy_net = runner_temp.policy_net
     self.value_net = runner_temp.value_net
Example #24
0
 def __init__(self, env, actor, config, memory = None, logwriter = None, name = ""):
   self.config = deepcopy(config)
   self.logwriter = logwriter
   self.memory = memory
   self.episode_num = mp.Value(c_uint)
   self.runcondition = mp.Event()
   self.memory_queue = mp.Queue(maxsize = config['replay_skip'] + 1) if memory is not None else None
   self.logqueue = mp.Queue(maxsize = 1) if logwriter is not None else None
   with self.episode_num.get_lock():
     self.episode_num.value = 1
   self.runner = mp.Process(target=envrun, 
     args=(actor, env, self.episode_num, config, self.runcondition),
     kwargs = {'iterations': config['replay_skip'] + 1, 
       'memoryqueue' : self.memory_queue, 'logqueue' : self.logqueue, 'name' : name})
   self.runner.start()
 def __init__(self,
              n_processes: int = 1,
              process_cls=None,
              cancel=None,
              verbose: str = False,
              regular_get: bool = True,
              tracker=None):
     store_attr(but='process_cls')
     self.process_cls = ifnone(process_cls, DataFitProcess)
     self.queue = mp.JoinableQueue(maxsize=self.n_processes)
     self.cancel = ifnone(self.cancel, mp.Event())
     self.pipe_in, self.pipe_out = mp.Pipe(False) if self.verbose else (
         None, None)
     self.cached_items = []
     self._place_holder_out = None
     self.step_idx = 0
Example #26
0
    def sample(self, batchsz):
        """
		Given batchsz number of task, the batchsz will be splited equally to each threads
		and when threads return, it merge all data and return
		:param batchsz:
		:return: batch
		"""

        # batchsz will be splitted into each thread,
        # final batchsz maybe larger than batchsz parameters
        thread_batchsz = np.ceil(batchsz / self.thread_num).astype(np.int32)
        # buffer to save all data
        queue = multiprocessing.Queue()

        # start threads for pid in range(1, threadnum)
        # if threadnum = 1, this part will be ignored.
        # when save tensor in Queue, the thread should keep alive till Queue.get(),
        # please refer to : https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847/2
        evt = multiprocessing.Event()
        threads = []
        for i in range(self.thread_num):
            thread_args = (i, queue, self.env_list[i], self.policy,
                           thread_batchsz)
            threads.append(
                multiprocessing.Process(target=sampler, args=thread_args))
        for t in threads:
            # set the thread as daemon, and it will be killed once the main thread is stoped.
            t.daemon = True
            t.start()

        # we need to get the first ReplayMemory object and then merge others ReplayMemory use its append function.
        pid0, buff0, avg_reward0 = queue.get()
        avg_reward = [avg_reward0]
        for _ in range(1, self.thread_num):
            pid, buff_, avg_reward_ = queue.get()
            buff0.append(buff_)  # merge current ReplayMemory into buff0
            avg_reward.append(avg_reward_)

        # now buff saves all the sampled data and avg_reward is the average reward of current sampled data
        buff = buff0
        avg_reward = np.array(avg_reward).mean()

        print('avg reward:', avg_reward)

        return buff.sample()
Example #27
0
 def init(self, evaluator, ckpt_dir):
     self.ckpt_dir = os.path.abspath(ckpt_dir)
     self.logger.info("checkpoint dir: %s", self.ckpt_dir)
     self.evaluator = evaluator
     self.stop_event = multiprocessing.Event()
     self.req_queue = multiprocessing.Queue()
     self.ans_queue = multiprocessing.Queue()
     self._register_signal_handler()
     backup_handlers = _logger.handlers
     _logger.handlers = [logging.NullHandler()]
     for gpu_id in self.gpu_ids:
         worker_p = multiprocessing.Process(target=self._worker, args=(
             self.evaluator, gpu_id, self.ckpt_dir,
             self.stop_event, self.req_queue, self.ans_queue))
         self.workers.append(worker_p)
         worker_p.start()
     _logger.handlers = backup_handlers
     self._inited = True
 def __init__(self,
              model: torch.nn.Module,
              device: torch.device,
              input_queues: mp.Queue,
              output_queues: mp.Queue,
              scenes,
              h5_file_path,
              evt):
     super(GPUThread, self).__init__()
     self.model = model.eval()
     self.model = self.model.to(device)
     self.device = device
     self.i_queues = input_queues
     self.o_queues = output_queues
     self.exit = mp.Event()
     self.scenes = scenes
     self.evt = evt
     self.preprocess = transforms.Normalize(mean=[123.68, 116.779, 103.939],
                                            std=[1.0, 1.0, 1.0])
Example #29
0
def get_queue_feeder(batch_size=1, maxsize_queue=10, n_process=1):
    if batch_size != 1:
        raise NotImplementedError()

    if n_process < 1:
        raise ValueError(
            "n_process should be positive. Got {}".format(n_process))

    queue_feed = mp.Queue(maxsize=maxsize_queue)
    stop_event = mp.Event()

    batch_loaders = []
    for _ in range(n_process):
        batch_loader = mp.Process(target=feeder,
                                  args=(queue_feed, stop_event, batch_size))
        batch_loader.start()
        batch_loaders.append(batch_loader)

    return queue_feed, stop_event, batch_loaders
Example #30
0
    def test_main_process_unclean_exit(self):
        r'''There might be ConnectionResetError or leaked semaphore warning (due to dirty process exit), \
but they are all safe to ignore'''

        # `raise_error` controls if the main process is KILL-ed by OS or just
        # simply raises an error. Both cases are interesting because
        # 1. In case of it is KILL-ed by OS, the workers need to automatically
        #    discover that their parent is dead and exit gracefully.
        # 2. In case of it raises an error itself, the parent process needs to
        #    take care of exiting the worker and then exits itself gracefully.
        for raise_error in (True, False):
            worker_pids = mp.Array('i', [0] * 4)

            main_exit_event = mp.Event()
            p = mp.Process(target=TestDataLoader._main_process,
                           args=(self.dataset, worker_pids, main_exit_event,
                                 raise_error))
            p.start()
            worker_pids[-1] = p.pid

            main_exit_event.wait()

            exit_status = [False] * len(worker_pids)
            start_time = time.time()
            pname = 'python'
            while True:
                for i in range(len(worker_pids)):
                    pid = worker_pids[i]
                    if not exit_status[i]:
                        if not TestDataLoader._is_process_alive(pid, pname):
                            exit_status[i] = True
                if all(exit_status):
                    break
                else:
                    if time.time(
                    ) - start_time > MANAGER_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT:
                        self.fail('subprocess not terminated')
                    time.sleep(1)
            p.join(MANAGER_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT -
                   (time.time() - start_time))
            self.assertFalse(p.is_alive(), 'main process not terminated')