예제 #1
0
    def __init__(self,
                 seed=0,
                 env_name='MultiGoal',
                 mdn_hidden_spec=None,
                 v_epochs=50,
                 v_hdim=32,
                 v_lr=1e-3,
                 p_epochs=30,
                 p_hdim=32,
                 p_lr=3e-4,
                 clip_range=0.2,
                 alpha=40.0,
                 batch_size=128,
                 episode_size=100,
                 nupdates=150,
                 gamma=0.99,
                 max_step=1000):
        # Fix the numpy random seed
        seed = 0
        np.random.seed(seed)

        # Set session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        env = MultiGoalEnv()

        # Get environment information
        obs_dim = env.observation_space.shape[0]
        act_dim = env.action_space.shape[0]

        # Set network spec
        if mdn_hidden_spec is None:
            mdn_hidden_spec = [{
                'dim': 32,
                'activation': tf.nn.tanh
            }, {
                'dim': 32,
                'activation': tf.nn.tanh
            }]

        # Initialize Tensorflow Graph
        #tf.reset_default_graph()

        # Gen value network
        value_func = network.Value(sess,
                                   obs_dim,
                                   epochs=v_epochs,
                                   hdim=v_hdim,
                                   lr=v_lr,
                                   seed=seed)

        # Initialize tf variable and old variable of value network
        tf.set_random_seed(seed)

        # Gen policy function
        policy_func = policy.Policy(sess,
                                    obs_dim,
                                    act_dim,
                                    epochs=p_epochs,
                                    hdim=p_hdim,
                                    lr=p_lr,
                                    clip_range=clip_range,
                                    seed=seed,
                                    alpha=alpha)

        sess.run(tf.global_variables_initializer())

        # Store All Variable to Class
        self.seed = seed
        self.env_name = env_name
        self.mdn_hidden_spec = mdn_hidden_spec

        self.v_epochs = v_epochs
        self.v_hdim = v_hdim
        self.v_lr = v_lr

        self.p_epochs = p_epochs
        self.p_hdim = p_hdim
        self.p_lr = p_lr
        self.clip_range = clip_range
        self.alpha = alpha

        self.batch_size = batch_size
        self.episode_size = episode_size
        self.nupdates = nupdates
        self.gamma = gamma

        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.env = env
        self.value_func = value_func
        self.policy_func = policy_func
        self.sess = sess
        self.config = config
예제 #2
0
            episode_length += 1

            #print('re:', reward)
            score += reward
            observation = observation_
        paths.append(path)

        score = score / 200
        score_history.append(score)
        avg_score = np.mean(score_history[-20:])

    env.render_rollouts(paths, fout="test_%d.png" % i)


if __name__ == '__main__':
    env = MultiGoalEnv()
    # print(env.observation_space.shape)
    # print(env.action_space.shape)
    agent = Agent(input_dims=env.observation_space.shape,
                  env=env,
                  n_actions=env.action_space.shape[0])
    n_games = 500

    # uncomment this line and do a mkdir tmp && mkdir video if you want to
    # record video of the agent playing the game.
    #env = wrappers.Monitor(env, 'tmp/video', video_callable=lambda episode_id: True, force=True)
    filename = 'inverted_pendulum.png'
    figure_file = 'plots/' + filename

    #print(env.action_space.high)
import gym
import numpy as np
import pybulletgym
from SQL_torch import Agent
from gym import wrappers
import math
from multigoal import MultiGoalEnv
import torch as T
from plotter import QFPolicyPlotter

from copy import deepcopy

if __name__ == "__main__":

    env = MultiGoalEnv()

    agent = Agent(env,
                  hidden_dim=[256, 256],
                  replay_size=int(1e6),
                  pi_lr=1e-3,
                  q_lr=1e-3,
                  batch_size=100,
                  n_particles=16,
                  gamma=0.99,
                  polyak=0.995)

    epochs = 100
    update_after = 0
    max_ep_len = 30
    steps_per_epoch = 400
    # Prepare for interaction with environment