Beispiel #1
0
        args.num_traj_buffers,
        args.num_steps,
    )
    
    next_obs = share_memory(np.zeros(dimensions[:-1]+env.observation_space.shape, dtype=np.float32))
    next_done = share_memory(np.zeros(dimensions[:-1], dtype=np.float32))
    obs = share_memory(np.zeros(dimensions+env.observation_space.shape, dtype=np.float32))
    actions = share_memory(np.zeros(dimensions+env.action_space.shape, dtype=env.action_space.dtype))
    logprobs = share_memory(np.zeros(dimensions, dtype=np.float32))
    rewards = share_memory(np.zeros(dimensions, dtype=np.float32))
    dones = share_memory(np.zeros(dimensions, dtype=np.float32))
    values = share_memory(np.zeros(dimensions, dtype=np.float32))
    traj_availables = share_memory(np.ones(dimensions, dtype=np.float32))
    actor_processes = []
    policy_workers = []
    stats_queue = MpQueue()
    rollout_task_queues = [MpQueue() for i in range(args.num_rollout_workers)]
    
    policy_request_queues = [MpQueue() for i in range(args.num_policy_workers)]
    learner_request_queue = MpQueue()
    data_process_back_queues = []


    for i in range(args.num_rollout_workers):
        actor = mp.Process(
            target=act,
            args=[[args, experiment_name, i, lock, stats_queue, 0,
                  next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables,
                  rollout_task_queues[i], policy_request_queues, learner_request_queue]],
        )
        actor.start()
    num_steps = 32

    lock = mp.Lock()
    dimensions = (
        mp.cpu_count(),
        num_envs,
        1,
        1,
        num_steps,
    )
    # obs = share_memory_numpy(np.zeros(dimensions + (84,84,3)))
    obs = share_memory_torch_numpy_mixed(np.zeros(dimensions + (84, 84, 3)), 5)
    # raise
    actor_processes = []
    ctx = mp.get_context("forkserver")
    stats_queue = MpQueue()
    for i in range(num_cpus):
        actor = mp.Process(
            target=act,
            args=[obs, num_envs, num_steps],
        )
        actor.start()
        actor_processes.append(actor)
    import timeit
    timer = timeit.default_timer
    existing_video_files = []
    global_step = 0
    global_step_increment = 0
    start_time = time.time()
    update_step = 0
    try: