Esempio n. 1
0
def test(args):
    print('start testing')

    ddpg = DDPG()
    ddpg.load_model(args.model, load_memory=False)
    env = RunEnv(visualize=args.visualize, max_obstacles=args.max_obs)

    np.random.seed(args.seed)
    for i in range(1):
        step = 0
        state = env.reset(difficulty=2)
        fg = FeatureGenerator()

        state = fg.gen(state)
        #obs = fg.traj[0]
        #print(obs.left_knee_r, obs.right_knee_r)

        ep_reward = 0
        ep_memories = []
        while True:
            action = ddpg.select_action(list(state))
            next_state, reward, done, info = env.step(action.tolist())
            next_state = fg.gen(next_state)

            #obs = fg.traj[0]
            #print(obs.left_knee_r, obs.right_knee_r)

            print('step: {0:03d}'.format(step), end=', action: ')
            for act in action:
                print('{0:.3f}'.format(act), end=', ')
            print()

            state = next_state
            ep_reward += reward
            step += 1

            print('reward:', ep_reward)

            if done:
                break

        print('\nEpisode: {} Reward: {}, n_steps: {}'.format(
            i, ep_reward, step))
Esempio n. 2
0
def submit(args):
    print('start submitting')

    remote_base = 'http://grader.crowdai.org:1733'
    client = Client(remote_base)

    ddpg = DDPG()
    ddpg.load_model(args.model, load_memory=False)

    state = client.env_create(TOKEN)
    fg = FeatureGenerator()
    state = fg.gen(state)

    step = 0
    ep_reward = 0

    while True:
        print('selecting action ...', end=' ')
        action = ddpg.select_action(list(state))

        print('client.env_step ...')
        next_state, reward, done, info = client.env_step(action.tolist())
        next_state = fg.gen(next_state)

        print('step: {0:03d}, ep_reward: {1:02.08f}'.format(step, ep_reward))
        state = next_state
        ep_reward += reward
        step += 1

        if done:
            print('done')
            state = client.env_reset()
            if not state:
                break

            step = 0
            ep_reward = 0

            fg = FeatureGenerator()
            state = fg.gen(state)

    client.submit()