https://github.com/crowdAI/osim-rl/blob/master/osim/redis/service.py
The client and the grading service communicate with each other by 
pointing to the same redis server.
"""
"""
Please ensure that `visualize=False`, else there might be unexpected errors 
in your submission
"""
env = ProstheticsEnv(visualize=False)
"""
Define evaluator end point from Environment variables
The grader will pass these env variables when evaluating
"""
REMOTE_HOST = os.getenv("CROWDAI_EVALUATOR_HOST", "127.0.0.1")
REMOTE_PORT = os.getenv("CROWDAI_EVALUATOR_PORT", 6379)
client = Client(remote_host=REMOTE_HOST, remote_port=REMOTE_PORT)

# Create environment
observation = client.env_create()
"""
The grader runs N simulations of at most 1000 steps each. 
We stop after the last one
A new simulation starts when `clinet.env_step` returns `done==True`
and all the simulations end when the subsequent `client.env_reset()` 
returns a False
"""
while True:
    _action = env.action_space.sample().tolist()
    [observation, reward, done, info] = client.env_step(_action)
    print(observation)
    if done:
예제 #2
0
#!/usr/bin/env python

#  Script location : /home/round2_submit.py

import opensim as osim
from osim.redis.client import Client
from osim.env import *
import numpy as np
import argparse
import os
import tensorflow as tf
import json

env = RunEnv(visualize=False)
client = Client()

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
saver = tf.train.import_meta_graph('/home/model-1480000.ckpt.meta')
saver.restore(sess, "/home/model-1480000.ckpt")
obs = tf.get_default_graph().get_tensor_by_name("observation_for_act:0")
act = tf.get_default_graph().get_tensor_by_name("taking_action/actor_action:0")
num_actions = 18


def preprocess_obs(obs):

    obs = np.array(obs)
    x = obs[1]
    y = obs[2]
예제 #3
0
from osim.redis.client import Client
from osim.env import *
import numpy as np
import argparse
import os
"""
NOTE: For testing your submission scripts, you first need to ensure that redis-server is running in the background
and you can locally run the grading service by running this script : https://github.com/crowdAI/osim-rl/blob/master/osim/redis/service.py

The client and the grading service communicate with each other by pointing to the same redis server.
"""
"""
Please ensure that `visualize=False`, else there might be unexpected errors in your submission
"""
env = RunEnv(visualize=False)
client = Client()

# Create environment
observation = client.env_create()
"""
The grader runs N simulations of at most 1000 steps each. We stop after the last one
A new simulation start when `clinet.env_step` returns `done==True`
and all the simulatiosn end when the subsequent `client.env_reset()` returns a False
"""
while True:
    _action = env.action_space.sample().tolist()
    [observation, reward, done, info] = client.env_step(_action)
    print(observation)
    if done:
        observation = client.env_reset()
        if not observation:
예제 #4
0
def main(args, extra_args):
    # env and algorithm config; update defaults with extra_args
    if args.load_path:
        extra_args.__dict__.update(
            load_json(
                os.path.join(os.path.dirname(args.load_path),
                             'config_alg.json')))
        if args.explore:
            extra_args.__dict__.update(
                load_json(
                    os.path.join(os.path.dirname(args.load_path),
                                 'config_exp.json')))
    env_args = get_env_config(args.env, extra_args.__dict__)
    alg_args = get_alg_config(args.alg, args.env, extra_args.__dict__)
    expl_args = None
    if args.explore:
        expl_args = getattr(import_module('algs.explore'),
                            'defaults')(args.explore)
        expl_args.update(
            {k: v
             for k, v in extra_args.__dict__.items() if k in expl_args})

    # mpi config
    args.rank = 0 if MPI is None else MPI.COMM_WORLD.Get_rank()
    args.world_size = 1 if MPI is None else MPI.COMM_WORLD.Get_size()

    # logging config
    if args.load_path:
        args.output_dir = os.path.dirname(args.load_path)
    if not args.output_dir:  # if not given use results/file_name/time_stamp
        logdir = args.exp_name + '_' + args.env + '_' + time.strftime(
            "%Y-%m-%d_%H-%M")
        args.output_dir = os.path.join('results', logdir)
        if args.rank == 0: os.makedirs(args.output_dir)

    # build environment
    env = build_env(args, env_args)

    # build exploration module and defaults
    exploration = None
    if args.explore:
        exploration = getattr(import_module('algs.explore'), args.explore)
        exploration = exploration(env.observation_space.shape,
                                  env.action_space.shape, **expl_args)

    # init session
    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               inter_op_parallelism_threads=1,
                               intra_op_parallelism_threads=1)
    tf_config.gpu_options.allow_growth = True
    sess = tf.InteractiveSession(config=tf_config)

    # print and save all configs
    if args.rank == 0:
        print_and_save_config(args, env_args, alg_args, expl_args)

    # build and train agent
    learn = getattr(import_module('algs.' + args.alg), 'learn')
    agent = learn(env, exploration, args.seed, args.n_total_steps,
                  args.max_episode_length, alg_args, args)

    if args.play:
        if env_args: env_args['visualize'] = True
        env = make_single_env(args.env, args.rank, args.n_env + 100, args.seed,
                              env_args, args.output_dir)
        obs = env.reset()
        episode_rewards = 0
        episode_steps = 0
        while True:
            #            if episode_steps % 5 == 0: i = input('press key to continue ...')
            action = agent.get_actions(
                obs)  # (n_samples, batch_size, action_dim)
            action = exploration.select_best_action(np.atleast_2d(obs), action)
            next_obs, rew, done, info = env.step(action.flatten())
            r_bonus = exploration.get_exploration_bonus(
                np.atleast_2d(obs), action, np.atleast_2d(next_obs)).squeeze()
            episode_rewards += rew
            episode_steps += 1
            #            print('q value: {:.4f}; reward: {:.2f}; aug_rewards: {:.2f}; bonus: {:.2f}; reward so far: {:.2f}'.format(
            #                agent.get_action_value(np.atleast_2d(obs), action).squeeze(), rew, info.get('rewards', 0), r_bonus, episode_rewards))
            obs = next_obs
            env.render()
            if done:
                print('Episode length {}; cumulative reward: {:.2f}'.format(
                    episode_steps, episode_rewards))
                episode_rewards = 0
                episode_steps = 0
                i = input('enter random seed: ')
                obs = env.reset(seed=int(i) if i is not '' else None)

    if args.submission:
        import opensim as osim
        from osim.redis.client import Client

        REMOTE_HOST = os.getenv("AICROWD_EVALUATOR_HOST", "127.0.0.1")
        REMOTE_PORT = os.getenv("AICROWD_EVALUATOR_PORT", 6379)
        client = Client(remote_host=REMOTE_HOST, remote_port=REMOTE_PORT)

        env = L2M2019ClientWrapper(client)
        env = ActionAugEnv(env)
        env = PoolVTgtEnv(env, **env_args)
        env = SkipEnv(env)
        env = Obs2VecEnv(env)

        obs = env.create()

        while True:
            action = agent.get_actions(obs)
            action = exploration.select_best_action(np.atleast_2d(obs), action)
            next_obs, rew, done, _ = env.step(action.flatten())
            obs = next_obs
            if done:
                obs = env.reset()
                if not obs:
                    break

        env.submit()

    return agent