コード例 #1
0
    # env = UnityEnvironment(file_name="Reacher_Linux/Reacher.x86_64", no_graphics=args.no_graphics)
    env = UnityEnvironment(file_name="Reacher.app",
                           no_graphics=args.no_graphics)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=False)[brain_name]
    action_size = brain.vector_action_space_size
    state = env_info.vector_observations[0]
    reward = env_info.rewards[0]
    action = 0
    state_size = len(state)
    #

    b_agent = Agent(args.model_name, state_size, action_size)
    try:
        b_agent.load()  # try to load to continue training
    except:
        pass

    for epx in range(1, args.episodes + 1):
        at_step = 0
        env_info = env.reset(train_mode=False)[brain_name]
        b_agent.reset_episode()
        while True:
            action = b_agent.act(state)
            env_info = env.step(action)[brain_name]
            at_step += 1
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]
            if at_step % 100 == 0:
コード例 #2
0
ファイル: main.py プロジェクト: moliqingwa/coppeliasim_deeprl
    args = parser.parse_args()
    is_training = args.mode == 'training'

    env = RLBenchEnv("ReachTarget", state_type_list=STATE_TYPE_LIST)
    state = env.reset()
    action_dim = env.action_space.shape[0]
    state_space = env.observation_space

    agent = Agent(state_space, HIDDEN_SIZE, action_dim, 1,
                  seed=SEED, buffer_size=MEMORY_BUFFER_SIZE,
                  actor_lr=ACTOR_LR, actor_hidden_sizes=ACTOR_HIDDEN_UNITS, actor_weight_decay=ACTOR_WEIGHT_DECAY,
                  critic_lr=CRITIC_LR, critic_hidden_sizes=CRITIC_HIDDEN_UNITS, critic_weight_decay=CRITIC_WEIGHT_DECAY,
                  batch_size=BATCH_SIZE, gamma=GAMMA, tau=TAU
                  )
    print(agent)
    agent.load()

    scores, actor_losses, critic_losses = run_ddpg(n_episodes=N_EPISODES, is_training=is_training,
                                                   eps_start=EPS_START if is_training else EPS_END,
                                                   eps_decay=EPS_DECAY, eps_end=EPS_END,
                                                   max_t=MAX_STEPS, learn_every_step=LEARN_EVERY_STEP)

    if is_training:
        agent.save()

    fig = plt.figure()
    ax1 = fig.add_subplot(311)
    ax1.plot(np.arange(1, len(scores) + 1), scores)
    ax1.set_ylabel('Score')
    ax1.set_xlabel('Episode #')