https://github.com/crowdAI/osim-rl/blob/master/osim/redis/service.py The client and the grading service communicate with each other by pointing to the same redis server. """ """ Please ensure that `visualize=False`, else there might be unexpected errors in your submission """ env = ProstheticsEnv(visualize=False) """ Define evaluator end point from Environment variables The grader will pass these env variables when evaluating """ REMOTE_HOST = os.getenv("CROWDAI_EVALUATOR_HOST", "127.0.0.1") REMOTE_PORT = os.getenv("CROWDAI_EVALUATOR_PORT", 6379) client = Client(remote_host=REMOTE_HOST, remote_port=REMOTE_PORT) # Create environment observation = client.env_create() """ The grader runs N simulations of at most 1000 steps each. We stop after the last one A new simulation starts when `clinet.env_step` returns `done==True` and all the simulations end when the subsequent `client.env_reset()` returns a False """ while True: _action = env.action_space.sample().tolist() [observation, reward, done, info] = client.env_step(_action) print(observation) if done:
#!/usr/bin/env python # Script location : /home/round2_submit.py import opensim as osim from osim.redis.client import Client from osim.env import * import numpy as np import argparse import os import tensorflow as tf import json env = RunEnv(visualize=False) client = Client() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) saver = tf.train.import_meta_graph('/home/model-1480000.ckpt.meta') saver.restore(sess, "/home/model-1480000.ckpt") obs = tf.get_default_graph().get_tensor_by_name("observation_for_act:0") act = tf.get_default_graph().get_tensor_by_name("taking_action/actor_action:0") num_actions = 18 def preprocess_obs(obs): obs = np.array(obs) x = obs[1] y = obs[2]
from osim.redis.client import Client from osim.env import * import numpy as np import argparse import os """ NOTE: For testing your submission scripts, you first need to ensure that redis-server is running in the background and you can locally run the grading service by running this script : https://github.com/crowdAI/osim-rl/blob/master/osim/redis/service.py The client and the grading service communicate with each other by pointing to the same redis server. """ """ Please ensure that `visualize=False`, else there might be unexpected errors in your submission """ env = RunEnv(visualize=False) client = Client() # Create environment observation = client.env_create() """ The grader runs N simulations of at most 1000 steps each. We stop after the last one A new simulation start when `clinet.env_step` returns `done==True` and all the simulatiosn end when the subsequent `client.env_reset()` returns a False """ while True: _action = env.action_space.sample().tolist() [observation, reward, done, info] = client.env_step(_action) print(observation) if done: observation = client.env_reset() if not observation:
def main(args, extra_args): # env and algorithm config; update defaults with extra_args if args.load_path: extra_args.__dict__.update( load_json( os.path.join(os.path.dirname(args.load_path), 'config_alg.json'))) if args.explore: extra_args.__dict__.update( load_json( os.path.join(os.path.dirname(args.load_path), 'config_exp.json'))) env_args = get_env_config(args.env, extra_args.__dict__) alg_args = get_alg_config(args.alg, args.env, extra_args.__dict__) expl_args = None if args.explore: expl_args = getattr(import_module('algs.explore'), 'defaults')(args.explore) expl_args.update( {k: v for k, v in extra_args.__dict__.items() if k in expl_args}) # mpi config args.rank = 0 if MPI is None else MPI.COMM_WORLD.Get_rank() args.world_size = 1 if MPI is None else MPI.COMM_WORLD.Get_size() # logging config if args.load_path: args.output_dir = os.path.dirname(args.load_path) if not args.output_dir: # if not given use results/file_name/time_stamp logdir = args.exp_name + '_' + args.env + '_' + time.strftime( "%Y-%m-%d_%H-%M") args.output_dir = os.path.join('results', logdir) if args.rank == 0: os.makedirs(args.output_dir) # build environment env = build_env(args, env_args) # build exploration module and defaults exploration = None if args.explore: exploration = getattr(import_module('algs.explore'), args.explore) exploration = exploration(env.observation_space.shape, env.action_space.shape, **expl_args) # init session tf_config = tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) tf_config.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=tf_config) # print and save all configs if args.rank == 0: print_and_save_config(args, env_args, alg_args, expl_args) # build and train agent learn = getattr(import_module('algs.' + args.alg), 'learn') agent = learn(env, exploration, args.seed, args.n_total_steps, args.max_episode_length, alg_args, args) if args.play: if env_args: env_args['visualize'] = True env = make_single_env(args.env, args.rank, args.n_env + 100, args.seed, env_args, args.output_dir) obs = env.reset() episode_rewards = 0 episode_steps = 0 while True: # if episode_steps % 5 == 0: i = input('press key to continue ...') action = agent.get_actions( obs) # (n_samples, batch_size, action_dim) action = exploration.select_best_action(np.atleast_2d(obs), action) next_obs, rew, done, info = env.step(action.flatten()) r_bonus = exploration.get_exploration_bonus( np.atleast_2d(obs), action, np.atleast_2d(next_obs)).squeeze() episode_rewards += rew episode_steps += 1 # print('q value: {:.4f}; reward: {:.2f}; aug_rewards: {:.2f}; bonus: {:.2f}; reward so far: {:.2f}'.format( # agent.get_action_value(np.atleast_2d(obs), action).squeeze(), rew, info.get('rewards', 0), r_bonus, episode_rewards)) obs = next_obs env.render() if done: print('Episode length {}; cumulative reward: {:.2f}'.format( episode_steps, episode_rewards)) episode_rewards = 0 episode_steps = 0 i = input('enter random seed: ') obs = env.reset(seed=int(i) if i is not '' else None) if args.submission: import opensim as osim from osim.redis.client import Client REMOTE_HOST = os.getenv("AICROWD_EVALUATOR_HOST", "127.0.0.1") REMOTE_PORT = os.getenv("AICROWD_EVALUATOR_PORT", 6379) client = Client(remote_host=REMOTE_HOST, remote_port=REMOTE_PORT) env = L2M2019ClientWrapper(client) env = ActionAugEnv(env) env = PoolVTgtEnv(env, **env_args) env = SkipEnv(env) env = Obs2VecEnv(env) obs = env.create() while True: action = agent.get_actions(obs) action = exploration.select_best_action(np.atleast_2d(obs), action) next_obs, rew, done, _ = env.step(action.flatten()) obs = next_obs if done: obs = env.reset() if not obs: break env.submit() return agent