コード例 #1
0
    def __init__(self, *args, **kwargs):
        self._shared_size = mp.Value(ctypes.c_long, 0)
        ObsDictRelabelingBuffer.__init__(self, *args, **kwargs)

        self._mp_array_info = {}
        self._shared_obs_info = {}
        self._shared_next_obs_info = {}

        for obs_key, obs_arr in self._obs.items():
            ctype = ctypes.c_double
            if obs_arr.dtype == np.uint8:
                ctype = ctypes.c_uint8

            self._shared_obs_info[obs_key] = (
                mp.Array(ctype, obs_arr.size),
                obs_arr.dtype,
                obs_arr.shape,
            )
            self._shared_next_obs_info[obs_key] = (
                mp.Array(ctype, obs_arr.size),
                obs_arr.dtype,
                obs_arr.shape,
            )

            self._obs[obs_key] = to_np(*self._shared_obs_info[obs_key])
            self._next_obs[obs_key] = to_np(
                *self._shared_next_obs_info[obs_key])
        self._register_mp_array("_actions")
        self._register_mp_array("_terminals")
コード例 #2
0
    def __init__(self,
                 make_env,
                 array_dim,
                 batchsize,
                 max_q_size,
                 num_threads,
                 collate_fn,
                 epoch=0):
        num_threads = max(num_threads, 1)
        self.readyQs = [
            mp.Queue(maxsize=max_q_size) for i in range(num_threads)
        ]
        self.array_dim = array_dim
        self.num_threads = num_threads
        self.num_videos_per_thread = batchsize // num_threads
        self.max_q_size = max_q_size
        self.batchsize = batchsize
        self.make_env = make_env
        self.batch = np.zeros(
            (self.num_threads, self.num_videos_per_thread, *array_dim),
            dtype=np.float32)

        array_dim2 = (self.max_q_size, self.num_videos_per_thread, *array_dim)

        self.m_arrays = (mp.Array('f',
                                  int(np.prod(array_dim2)),
                                  lock=mp.Lock()) for _ in range(num_threads))
        self.arrays = [(m, np.frombuffer(m.get_obj(),
                                         dtype='f').reshape(array_dim2))
                       for m in self.m_arrays]
        self.dataset = make_env(proc_id=0, num_procs=0, num_envs=0)
        self.max_iter = self.dataset.max_iter
        self.collate_fn = collate_fn
        self.epoch = epoch
コード例 #3
0
ファイル: test_dataloader.py プロジェクト: yidongVSI/pytorch
    def test_main_process_unclean_exit(self):
        '''There might be ConnectionResetError or leaked semaphore warning (due to dirty process exit), \
but they are all safe to ignore'''
        worker_pids = mp.Array('i', [0] * 4)

        manager_exit_event = mp.Event()
        p = mp.Process(target=TestDataLoader._manager_process,
                       args=(self.dataset, worker_pids, manager_exit_event))
        p.start()

        manager_exit_event.wait()

        exit_status = [False] * len(worker_pids)
        start_time = time.time()
        pname = 'python'
        while True:
            for i in range(len(worker_pids)):
                pid = worker_pids[i]
                if not exit_status[i]:
                    if not TestDataLoader._is_process_alive(pid, pname):
                        exit_status[i] = True
            if all(exit_status):
                break
            else:
                time.sleep(1)
                self.assertFalse(time.time() - start_time > MANAGER_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT,
                                 'subprocess not terminated')
コード例 #4
0
ファイル: zip.py プロジェクト: vzinche/inferno
 def remove_rejected(self):
     # remove the indices belonging to samples that were rejected from the dataset
     # this changes the length of the dataset
     rejected = np.array(self.rejected[:])
     self.index_mapping = np.argwhere(1 - rejected)[:, 0]
     self.rejected = mp.Array('b', len(self))
     # just in case of num_workers == 0
     self.available_indices = None
コード例 #5
0
def main():
    params = Params()

    mp.set_start_method('spawn')
    lock = mp.Lock()

    actions = mp.Array('i', [-1] * params.n_process, lock=lock)
    count = mp.Value('i', 0)
    best_acc = mp.Value('d', 0.0)

    state_Queue = mp.JoinableQueue()
    action_done = mp.SimpleQueue()
    reward_Queue = mp.JoinableQueue()

    # shared_model = A3C_LSTM_GA()
    # shared_model = shared_model.share_memory()
    #
    # shared_optimizer = SharedAdam(shared_model.parameters(), lr=params.lr, amsgrad=params.amsgrad, weight_decay=params.weight_decay)
    # shared_optimizer.share_memory()
    #run_sim(0, params, shared_model, None,  count, lock)
    #test(params, shared_model, count, lock, best_acc)

    processes = []

    train_process = 0
    test_process = 0

    p = mp.Process(target=learning,
                   args=(
                       params,
                       state_Queue,
                       action_done,
                       actions,
                       reward_Queue,
                   ))
    p.start()
    processes.append(p)
    # test_process += 1

    for rank in range(params.n_process):
        p = mp.Process(target=run_sim,
                       args=(
                           train_process,
                           params,
                           state_Queue,
                           action_done,
                           actions,
                           reward_Queue,
                           lock,
                       ))

        train_process += 1
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
コード例 #6
0
    def __init__(self, array_dim, max_q_size, num_videos_per_thread, dtype):
        self.array_dim = array_dim
        self.num_videos_per_thread = num_videos_per_thread
        self.max_q_size = max_q_size
        array_dim2 = (self.max_q_size, self.num_videos_per_thread, *array_dim)

        mp_dtype = "f" if dtype == np.float32 else "b"
        self.m = mp.Array(mp_dtype, int(np.prod(array_dim2)), lock=mp.Lock())
        self.n = np.frombuffer(self.m.get_obj(),
                               dtype=dtype).reshape(array_dim2)
コード例 #7
0
ファイル: zip.py プロジェクト: vzinche/inferno
    def __init__(self,
                 *datasets,
                 sync=False,
                 transforms=None,
                 rejection_dataset_indices,
                 rejection_criterion,
                 random_jump_after_reject=True):
        """
        Parameters
        ----------
        datasets : list or tuple
            Datasets to zip.
        sync : bool
            Whether to synchronize zipped datasets if a synchronization primitive is available.
        transforms : callable
            Transforms to apply on the fetched batch.
        rejection_dataset_indices : int or list or tuple
            Indices (or index) corresponding to the datasets which are used to determine whether
            a batch should be rejected.
        rejection_criterion : callable
            Criterion for rejection of batch. Must be a callable that accepts one or more
            arrays / tensors and returns True if the corresponding batch should be rejected,
            False otherwise. Should accept as many inputs as the number of elements in
            `rejection_dataset_indices` if the latter is a list, and 1 otherwise. Note that
            the order of the inputs to the `rejection_criterion` is the same as the order of
            the indices in `rejection_dataset_indices`.
        random_jump_after_reject: bool
            Whether to try a random index or the rejected index incremented by one after rejection.
        """
        super(ZipReject, self).__init__(*datasets,
                                        sync=sync,
                                        transforms=transforms)
        for rejection_dataset_index in pyu.to_iterable(
                rejection_dataset_indices):
            assert_(
                rejection_dataset_index < len(datasets),
                "Index of the dataset to be used for rejection (= {}) is larger "
                "than the number of datasets (= {}) minus one.".format(
                    rejection_dataset_index, len(datasets)), IndexError)
        self.rejection_dataset_indices = pyu.to_iterable(
            rejection_dataset_indices)
        assert_(callable(rejection_criterion),
                "Rejection criterion is not callable as it should be.",
                TypeError)
        # return true if fetched should be rejected
        self.rejection_criterion = rejection_criterion
        # Array shared over processes to keep track of which indices have been rejected
        self.rejected = mp.Array('b', len(self))
        self.available_indices = None
        # optional index mapping to exclude rejected indices, reducing dataset size (see remove_rejected())
        self.index_mapping = None

        self.random_jump_after_reject = random_jump_after_reject
コード例 #8
0
    def __init__(self, array_dim, num_threads):
        self.batch = np.zeros(
            (self.num_threads, self.num_videos_per_thread, *array_dim),
            dtype=np.float32)

        array_dim2 = (self.max_q_size, self.num_videos_per_thread, *array_dim)
        self.m_arrays = (mp.Array('f',
                                  int(np.prod(array_dim2)),
                                  lock=mp.Lock()) for _ in range(num_threads))
        self.arrays = [(m, np.frombuffer(m.get_obj(),
                                         dtype='f').reshape(array_dim2))
                       for m in self.m_arrays]
コード例 #9
0
    def _register_mp_array(self, arr_instance_var_name):
        """
        Use this function to register an array to be shared. This will wipe arr.
        """
        assert hasattr(self, arr_instance_var_name), arr_instance_var_name
        arr = getattr(self, arr_instance_var_name)

        ctype = ctypes.c_double
        if arr.dtype == np.uint8:
            ctype = ctypes.c_uint8

        self._mp_array_info[arr_instance_var_name] = (
            mp.Array(ctype, arr.size),
            arr.dtype,
            arr.shape,
        )
        setattr(self, arr_instance_var_name,
                to_np(*self._mp_array_info[arr_instance_var_name]))
コード例 #10
0
ファイル: test_dataloader.py プロジェクト: yzqcjdtc/pytorch
    def test_main_process_unclean_exit(self):
        r'''There might be ConnectionResetError or leaked semaphore warning (due to dirty process exit), \
but they are all safe to ignore'''

        # `raise_error` controls if the main process is KILL-ed by OS or just
        # simply raises an error. Both cases are interesting because
        # 1. In case of it is KILL-ed by OS, the workers need to automatically
        #    discover that their parent is dead and exit gracefully.
        # 2. In case of it raises an error itself, the parent process needs to
        #    take care of exiting the worker and then exits itself gracefully.
        for raise_error in (True, False):
            worker_pids = mp.Array('i', [0] * 4)

            main_exit_event = mp.Event()
            p = mp.Process(target=TestDataLoader._main_process,
                           args=(self.dataset, worker_pids, main_exit_event,
                                 raise_error))
            p.start()
            worker_pids[-1] = p.pid

            main_exit_event.wait()

            exit_status = [False] * len(worker_pids)
            start_time = time.time()
            pname = 'python'
            while True:
                for i in range(len(worker_pids)):
                    pid = worker_pids[i]
                    if not exit_status[i]:
                        if not TestDataLoader._is_process_alive(pid, pname):
                            exit_status[i] = True
                if all(exit_status):
                    break
                else:
                    if time.time(
                    ) - start_time > MANAGER_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT:
                        self.fail('subprocess not terminated')
                    time.sleep(1)
            p.join(MANAGER_STATUS_CHECK_INTERVAL + JOIN_TIMEOUT -
                   (time.time() - start_time))
            self.assertFalse(p.is_alive(), 'main process not terminated')
コード例 #11
0
    # value_target = np.array(value_target)
    # value_target = (value_target - value_target.min()) / (value_target.max() - value_target.min())

    loss = l_net.loss_func(vwrap(np.vstack(bs)), vwrap(np.vstack(ba)),
                           vwrap(np.vstack(value_target)))

    opt.zero_grad()
    loss.backward()
    for lp, gp in zip(l_net.parameters(), g_net.parameters()):
        gp._grad = lp.grad
    opt.step()

    l_net.load_state_dict(g_net.state_dict())


if __name__ == "__main__":
    g_net = Net(N_S, N_A)
    g_net.share_memory()
    opt = SharedAdam(g_net.parameters(), lr=ADAM_LR)
    g_ep = mp.Value('i', 0)
    g_rewards = mp.Array(ctypes.c_double, MAX_EP * 2)

    workers = [
        Worker(g_net, opt, g_ep, g_rewards, i) for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]
    [w.join() for w in workers]
    save_rewards = np.frombuffer(g_rewards.get_obj())
    plt.plot(save_rewards[:g_ep.value])
    plt.show()
コード例 #12
0
ファイル: mp.py プロジェクト: wlxer/DC-DRL
def mp_agent(args):

    assert True, 'annotate the code in agent preceive'

    if args.seed != -1:
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)

    if args.gpu and False:
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    else:
        device = torch.device('cpu')

    # TensorboardX is incompatible with asynchronous event writing:
    # https://github.com/lanpa/tensorboardX/issues/123
    # writer = SummaryWriter()
    writer = None

    agent_population = []
    for i_population in range(args.n_population):
        agent_i = init_agent(args, device, writer, i_population)
        agent_population.append(agent_i)

    best_history_agent = init_agent(args, device, writer, args.n_population)
    best_history = -100000

    fitness_values = [0. for i in range(args.n_process)]
    fitness_values = torch_mp.Array('f', fitness_values)
    done_signals = torch_mp.Array(
        'i', [0 for i in range(args.n_process)])  # as a compromise
    share_signals = torch_mp.Value('i', 0)
    collected_exp = []
    evo = Evolution(args)

    for i_process in range(args.n_process):  # change to population:
        q = torch_mp.Queue()  # change according to local_interval
        collected_exp.append(q)

    for i_generation in range(args.n_generation):
        processes = []
        done_signals[:] = [0 for i in range(args.n_process)]
        share_signals.value = 0

        for i_process in range(args.n_process):
            p = torch_mp.Process(
                target=train_agent,
                args=(args, agent_population[i_process], i_process, i_process,
                      i_generation, device, fitness_values,
                      collected_exp[i_process], done_signals, share_signals))
            p.start()
            processes.append(p)

        # for p in processes:
        #     p.join()

        while not all(done_signals[:]):
            # continues to check
            time.sleep(1)

        for i_process in range(args.n_process):
            q = collected_exp[i_process]
            q_counter = 0
            while q.qsize() > 0:
                q_counter += 1
                state, action, mask, next_state, reward = q.get()
                for agent_i in agent_population:
                    agent_i.preceive(state, action, mask, next_state, reward)
                    if q_counter % agent_i.eval_env.spec.timestep_limit == 0:
                        agent_i.reset_storage()

        share_signals.value = 1
        time.sleep(2)  # wait for the subprocess to close
        all_fitness = fitness_values[:]

        best_pop_fitness = max(all_fitness)
        best_index = all_fitness.index(max(all_fitness))

        if best_history < best_pop_fitness:
            best_history = best_pop_fitness
            best_history_agent.copy_model_mp(agent_population[best_index])

        print('eval_performance:', best_history_agent.eval_performance())

        for i_fitness in range(len(all_fitness)):  # manual syncs
            # change to your own environment
            if all_fitness[i_fitness] < best_history + best_history * 0.1:
                agent_population[i_fitness].copy_model_mp(best_history_agent)

        # evo.next_g(agent_population, all_fitness)
        evo.torch_next_g(agent_population, all_fitness)

    if writer:
        writer.close()
コード例 #13
0
if __name__ == '__main__':
    os.system('cls')

    vis.close()

    num_processes = 1

    shared_queue = mp.Queue()
    shared_state = dict()

    shared_state["p"] = Actor(s_dim, a_dim, dev).share_memory()
    shared_state["q"] = QCritic(s_dim, a_dim, dev).share_memory()
    shared_state["v"] = VCritic(s_dim, a_dim, dev).share_memory()

    shared_state["update"] = mp.Array('i', [0 for i in range(num_processes)])
    #    shared_state["wait"] = mp.Array('i', [0 for i in range(num_processes)])
    shared_state["vis"] = mp.Value('i', 0)
    shared_state["wait"] = mp.Value('i', 0)
    shared_state["wait"].value = start_frame * 10

    act = actor_worker(0, num_frames, shared_state, shared_queue, 0.1, False)
    act.run()
    act.run()
    act.run()
    lea = learner_worker(1, num_frames, shared_state, shared_queue, False)
    lea.push_buffer()
    lea.push_buffer()
    lea.push_buffer()

    #    for i in range(100):
コード例 #14
0
    torch.save({'model': net.state_dict(), 'best_idx': best_idx,
                'opt': optimizer.state_dict()}, file_name)

    print("Net evaluation started")
    net.eval()
    if os.name == 'nt' and args.cuda:
        cd = torch.device("cpu")
        net.to(cd)
        best_net.to(cd)
        cpuf = True
    else: cpuf = False

    mp.set_start_method("spawn", force=True)
    lock = mp.Lock()
    processes = [];
    mar = mp.Array('i', 3);
    mar[0] = 1
    for i in range(NUM_PROC):
        p = mp.Process(target=eval, args=(mar, lock, net, best_net, device, cpuf), daemon=True)
        p.start()
        processes.append(p)
    while 1:
        lock.acquire()
        if mar[0] > 0 and (mar[1] >= EVALUATION_ROUNDS*BEST_NET_WIN_RATIO or
                           mar[2]>EVALUATION_ROUNDS*(1-BEST_NET_WIN_RATIO)):
            mar[0] = 0
        lock.release()
        running = any(p.is_alive() for p in processes)
        if not running:
            break
        time.sleep(0.5)
コード例 #15
0
ファイル: test_dataloader.py プロジェクト: zraocn/pytorch
    def test_proper_exit(self):
        (r'''There might be ConnectionResetError or leaked semaphore warning '''
         r'''(due to dirty process exit), but they are all safe to ignore''')

        # TODO: test the case where the pin_memory_thread triggers an
        #       error/fatal signal. I haven't found out how to properly do that.

        # Array to store the worker pids.
        worker_pids = mp.Array('i', [-1 for _ in range(10)])

        def wait_pids(pids, timeout):
            r"""Wait for all process specified in pids to exit in given timeout."""
            exit_status = [False for _ in pids]
            start_time = time.time()
            pname = 'python'
            while True:
                for i in range(len(pids)):
                    pid = pids[i]
                    if not exit_status[i]:
                        if not TestDataLoader._is_process_alive(pid, pname):
                            exit_status[i] = True
                if all(exit_status):
                    break
                else:
                    if time.time() - start_time > timeout:
                        break
                    time.sleep(0.5)
            return exit_status

        for use_workers, pin_memory, hold_iter_reference in itertools.product(
            [True, False], repeat=3):
            # `hold_iter_reference` specifies whether we hold a reference to the
            # iterator. This is interesting because Python3 error traces holds a
            # reference to the frames, which hold references to all the local
            # variables including the iterator, and then the iterator dtor may
            # not be called before process end. It is important to see that the
            # processes still exit in both cases.

            if pin_memory and (not TEST_CUDA or NO_MULTIPROCESSING_SPAWN):
                # Can't use CUDA without spawn
                continue

            # `exit_method` controls the way the loader process ends.
            #   - `*_kill` means that `*` is killed by OS.
            #   - `*_error` means that `*` raises an error.
            #   - `None` means that no error happens.
            # In all cases, all processes should end properly.
            if use_workers:
                exit_methods = [
                    None, 'main_error', 'main_kill', 'worker_kill',
                    'worker_error'
                ]
            else:
                exit_methods = [None, 'main_error', 'main_kill']

            for exit_method in exit_methods:

                # clear pids array first
                for i in range(len(worker_pids)):
                    worker_pids[i] = -1

                # Event that the loader process uses to signal testing process
                # that various things are setup, including that the worker pids
                # are specified in `worker_pids` array.
                setup_event = mp.Event()

                p = ErrorTrackingProcess(
                    target=_test_proper_exit,
                    args=(use_workers, pin_memory, exit_method,
                          hold_iter_reference, worker_pids, setup_event))
                p.start()

                # Wait for loader process to set everything up, i.e., filling
                # worker pids in `worker_pids`.
                setup_event.wait(timeout=JOIN_TIMEOUT)
                self.assertTrue(setup_event.is_set(),
                                'loader process setup timed out')

                pids = [pid for pid in worker_pids if pid > 0]

                try:
                    exit_status = wait_pids(pids,
                                            timeout=(MP_STATUS_CHECK_INTERVAL +
                                                     JOIN_TIMEOUT))
                    if not all(exit_status):
                        self.fail(
                            'subprocess (pid(s) {}) not terminated'.format(
                                ', '.join(
                                    p for p, exited in zip(pids, exit_status)
                                    if not exited)))
                    p.join(JOIN_TIMEOUT + MP_STATUS_CHECK_INTERVAL)
                    self.assertFalse(p.is_alive(),
                                     'loader process not terminated')
                    if exit_method is None:
                        self.assertEqual(p.exitcode, 0)
                    else:
                        self.assertNotEqual(p.exitcode, 0)
                finally:
                    p.terminate()
コード例 #16
0
                    type=int,
                    default=3,
                    help='number of channels of output data')
parser.add_argument('--cuda',
                    type=bool,
                    default=True,
                    help='use GPU computation')
parser.add_argument(
    '--n_cpu',
    type=int,
    default=16,
    help='number of cpu threads to use during batch generation')
opt = parser.parse_args()

population = 32
current_fitness_base_A2B = mp.Array('f', range(population))
current_fitness_A2B = np.asarray(current_fitness_base_A2B.get_obj(),
                                 dtype=np.float32)

current_fitness_base_B2A = mp.Array('f', range(population))
current_fitness_B2A = np.asarray(current_fitness_base_B2A.get_obj(),
                                 dtype=np.float32)

# Lossess
criterion_GAN = torch.nn.MSELoss()
criterion_cycle = torch.nn.L1Loss()
criterion_identity = torch.nn.L1Loss()


def caculate_fitness_for_first_time(mask_input, gpu_id, fitness_id,
                                    A2B_or_B2A):
コード例 #17
0
                if '-' not in command:
                    target_ids = [int(i.strip()) for i in command.split(",")]
                else:
                    target_ids = list(
                        range(int(command.split('-')[0]),
                              int(command.split('-')[1]) + 1))

                chosen_objects = [
                    all_visible_objects[target_id] for target_id in target_ids
                ]
                check_phase = lambda c: 'train' if os.path.isfile(
                    os.path.join(args.folder, "net_{}.pth".format(c))
                ) else 'test'
                chosen_phases = [check_phase(c) for c in chosen_objects]

                results = mp.Array('f', len(chosen_objects))
                processes = []
                for rank, obj in enumerate(chosen_objects):
                    p = mp.Process(target=test, args=(training_scene, obj, rank, shared_model, \
                                    results, config, arguments))
                    p.start()
                    processes.append(p)

                for p in processes:
                    p.join()

                print("Testing accuracies:",
                      list(zip(chosen_objects, chosen_phases, results[:])))

            else:
                arguments['test'] = 1
コード例 #18
0
ファイル: self-play.py プロジェクト: cjssh1002/AlphaJanggi
             best_idx=best_idx,
             url=URL,
             username=username,
             device=device)
         game_nodes = len(mcts_store)
         dt = time.time() - t
         speed_steps = game_steps / dt
         speed_nodes = game_nodes / dt
         step_idx += 1
         print(
             "Step %d, steps %3d, leaves %4d, steps/s %5.2f, leaves/s %6.2f, best_idx %d"
             % (step_idx, game_steps, game_nodes, speed_steps,
                speed_nodes, best_idx))
 else:
     processes = []
     mar = mp.Array('i', 2)
     mar[0] = 1
     for i in range(num_proc):
         mcts_store = mcts.MCTS()
         p = mp.Process(target=play,
                        args=(mar, lock, mcts_store, net, best_idx,
                              username, device, step_idx),
                        daemon=True)
         p.start()
         processes.append(p)
     while 1:
         lock.acquire()
         if mar[0] > 0 and mar[1] >= PLAY_EPISODE * num_proc: mar[0] = 0
         lock.release()
         running = any(p.is_alive() for p in processes)
         if not running:
コード例 #19
0
ファイル: main.py プロジェクト: Cute77/Adaptive-Lambda
        env.observation_space.shape[0], env.action_space)
    shared_model.share_memory()

    if args.no_shared:
        optimizer = None
    else:
        optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr)
        optimizer.share_memory()

    processes = []

    counter = mp.Value('i', 0)
    num_done = mp.Value('i', 0)
    num_episode = mp.Value('i', 0)
    reward_sum = mp.Value('i', 0)
    arr = mp.Array('i', [])
    lock = mp.Lock()
    writer = SummaryWriter("logs/fig"+str(args.gae_lambda1)+"_"+ str(args.gae_lambda2), max_queue = 1)    

    p = mp.Process(target=test, args=(args.num_processes, args, shared_model, counter, num_done, num_episode, reward_sum, lock))
    p.start()
    processes.append(p)
    def write(a,b,c):
        i = 0
        while counter.value < 120000000:
            print(a.value, b.value, c.value, counter.value / 10000)
            writer.add_scalar("test/reward", a.value, counter.value / 10000)
            writer.add_scalar("train/rate", b.value * 1.0 / c.value, counter.value / 10000)
            i = i + 1
            time.sleep(10)
    for rank in range(0, args.num_processes):
コード例 #20
0
ファイル: async_runner.py プロジェクト: dohnala/GridWorld
    def train(self, train_steps, eval_every_sec, eval_episodes, goal=None):
        """
        Train agent for given number of steps.

        :param train_steps: number of steps to train agent
        :param eval_every_sec: evaluate agent every `eval_every_sec` seconds
        :param eval_episodes: number of episode to evaluate agent for
        :param goal: goal which can terminate training if it is reached
        :return: result
        """
        # Set one thread per core
        os.environ['OMP_NUM_THREADS'] = '1'

        # Flag indicating that training is finished
        stop_flag = mp.Event()

        # Number of steps for each worker
        workers_train_steps = int(train_steps / self.num_processes)

        # Workers' current steps
        workers_steps = mp.Array('i', self.num_processes)

        # Queue where the final result is put
        result_queue = mp.Queue()

        processes = []

        start = timer()

        # Create and start evaluation process
        eval_process = EvalProcess(
            env_fn_serialized=serialize(self.env_fn),
            agent=self.agent,
            seed=self.seed +
            self.num_processes if self.seed is not None else None,
            train_steps=train_steps,
            eval_every_sec=eval_every_sec,
            eval_episodes=eval_episodes,
            goal_serialized=serialize(goal),
            stop_flag=stop_flag,
            workers_steps=workers_steps,
            result_queue=result_queue)

        eval_process.start()
        processes.append(eval_process)

        # Create and start worker processes
        for worker in self.agent.create_workers(self.num_processes):
            worker_process = WorkerProcess(
                env_fn_serialized=serialize(self.env_fn),
                worker=worker,
                seed=self.seed +
                worker.worker_id if self.seed is not None else None,
                train_steps=workers_train_steps,
                workers_steps=workers_steps,
                stop_flag=stop_flag)

            worker_process.start()
            processes.append(worker_process)

        # Wait until all processes finish execution
        [process.join() for process in processes]

        # Get result from queue
        result = result_queue.get()
        result.train_time = timer() - start

        return result