def main(): env = bullet_cartpole.BulletCartpole(gui=opts.gui, action_force=opts.action_force, max_episode_len=opts.max_episode_len, initial_force=opts.initial_force, delay=opts.delay, discrete_actions=False, event_log_file=opts.event_log) with tf.Session() as sess: #config=tf.ConfigProto(log_device_placement=True)) as sess: agent = DeepDeterministicPolicyGradientAgent(env=env, agent_opts=opts) # setup saver util and either load latest ckpt, or init if none... saver_util = None ckpt_dir = None if opts.run_id is not None: ckpt_dir = "ckpts/%s" % opts.run_id elif opts.ckpt_dir is not None: ckpt_dir = opts.ckpt_dir if ckpt_dir is not None: saver_util = util.SaverUtil(sess, ckpt_dir, opts.ckpt_freq) else: sess.run(tf.initialize_all_variables()) # now that we've either init'd from scratch, or loaded up a checkpoint, # we can hook together target networks agent.hook_up_target_networks(opts.target_update_rate) # run either eval or training if opts.num_eval > 0: agent.run_eval(opts.num_eval) else: agent.run_training(opts.max_num_actions, opts.batch_size, saver_util, opts.run_id) if saver_util is not None: saver_util.force_save()
def main(): env = bullet_cartpole.BulletCartpole(gui=opts.gui, action_force=opts.action_force, max_episode_len=opts.max_episode_len, initial_force=opts.initial_force, delay=opts.delay, discrete_actions=True) with tf.Session() as sess: agent = LikelihoodRatioPolicyGradientAgent( env=env, gui=opts.gui, hidden_dim=opts.num_hidden, optimiser=tf.train.AdamOptimizer()) # setup saver util; will load latest ckpt, or init if none... saver_util = None ckpt_dir = None if opts.run_id is not None: ckpt_dir = "ckpts/%s" % opts.run_id elif opts.ckpt_dir is not None: ckpt_dir = opts.ckpt_dir if ckpt_dir is not None: saver_util = util.SaverUtil(sess, ckpt_dir, opts.ckpt_freq) else: sess.run(tf.initialize_all_variables()) # run either eval or training if opts.num_eval > 0: agent.run_eval(opts.num_eval) else: agent.run_training(opts.num_train_batches, opts.rollouts_per_batch, saver_util) if saver_util is not None: saver_util.force_save()
type=str, default='discrete', help="either 'discrete' or 'continuous'") bullet_cartpole.add_opts(parser) opts = parser.parse_args() actions = map(int, opts.actions.split(",")) if opts.action_type == 'discrete': discrete_actions = True elif opts.action_type == 'continuous': discrete_actions = False else: raise Exception("Unknown action type [%s]" % opts.action_type) env = bullet_cartpole.BulletCartpole(opts=opts, discrete_actions=discrete_actions) for _ in xrange(opts.num_eval): env.reset() done = False total_reward = 0 steps = 0 while not done: if discrete_actions: action = random.choice(actions) else: action = env.action_space.sample() _state, reward, done, info = env.step(action) steps += 1 total_reward += reward if opts.max_episode_len is not None and steps > opts.max_episode_len:
type=str, default='discrete', help="either 'discrete' or 'continuous'") opts = parser.parse_args() actions = map(int, opts.actions.split(",")) if opts.action_type == 'discrete': discrete_actions = True elif opts.action_type == 'continuous': discrete_actions = False else: raise Exception("Unknown action type [%s]" % opts.action_type) env = bullet_cartpole.BulletCartpole(gui=opts.gui, initial_force=opts.initial_force, discrete_actions=discrete_actions, event_log_file=opts.event_log) for _ in xrange(opts.num_eval): env.reset() done = False steps = 0 while not done: if discrete_actions: action = random.choice(actions) else: action = env.action_space.sample() _state, _reward, done, info = env.step(action) steps += 1 if opts.delay > 0: time.sleep(opts.delay)
type=float, default=50.0, help="magnitude of action push") parser.add_argument('--num-train', type=int, default=100) parser.add_argument('--num-eval', type=int, default=0) parser.add_argument('--load-file', type=str, default=None) parser.add_argument('--save-file', type=str, default=None) parser.add_argument('--delay', type=float, default=0.0) opts = parser.parse_args() print "OPTS", opts ENV_NAME = 'BulletCartpole' # Get the environment and extract the number of actions. env = bullet_cartpole.BulletCartpole(gui=opts.gui, action_force=opts.action_force, initial_force=opts.initial_force, delay=opts.delay) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(32)) model.add(Activation('tanh')) #model.add(Dense(16)) #model.add(Activation('relu')) #model.add(Dense(16)) #model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary())