Esempio n. 1
0
    def __init__(self, env, configuration=config, logger=None):
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyper-params
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)

        self.env = env
        self.action_dim = self.env.action_space[0].n
        self.observation_dim = self.env.observation_space[0].shape[0]

        # create n DDPGActorCritic object for n agents
        agent_networks = []
        for i in range(env.n):
            agent_networks.append(
                DDPGActorCritic(i,
                                env,
                                configuration=self.config,
                                logger=logger))

        self.agent_networks = agent_networks
    def __init__(self, config, logger=None):
        """
    Initialize Policy Gradient Class
  
    Args:
            config: class with hyperparameters
            logger: logger instance from logging module

    """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyper-params
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)

        # discrete action space or continuous action space
        self.discrete = False
        self.observation_dim = 19 + 3
        self.action_dim = 1 + int(self.config.throttle)
        self.learning_rate = self.config.learning_rate

        # build model
        self.build()

        #Enable saving the model
        self.saver = tf.train.Saver()
Esempio n. 3
0
  def __init__(self, agent_idx, env, configuration, logger=None):
    self.agent_idx = agent_idx
    self.config = configuration

    # directory for training outputs
    if not os.path.exists(self.config.output_path):
      os.makedirs(self.config.output_path)
            
    # store hyper-params
    self.logger = logger
    if logger is None:
      self.logger = get_logger(self.config.log_path)
    self.env = env
  
    # action space for a given agent - is Discrete(5) for simple_spread
    # NOTE: assumes that all agents have the same action space for now
    # TODO: action_dim as a argument to this function, so it can vary by agent
    
    # TODO: for simple_spread we don't need to worry about communication space
    # however, for senarios with communication channels, we will need to re-look at this
    #  as action_space seems to be a tuple of movement space and comm space
    self.action_dim = self.env.action_space[0].n

    # observation space for a given agent - is Box(18) for simple_spread
    # NOTE: assumes that all agents have the same observation space for now
    # TODO: observation_dim as a argument to this function, so it can vary by agent
    self.observation_dim = self.env.observation_space[0].shape[0]
    
    self.lr = self.config.learning_rate
Esempio n. 4
0
    def __init__(self, env, config, logger=None):
        '''
        初始化策略梯度类
        Args:
            env: open-ai中的环境或者自己写的环境,满足gym的接口
            config: class with hyperparameters
            logger: logger instance from logging module
        '''
        # 被保存模型的路径
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)
        self.config = config

        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)

        self.env = env
        # 判断环境的动作空间是离散的还是连续的
        self.discrete = isinstance(env.action_space, gym.spaces.Discrete)
        # 对于CartPole-v0, observation_dim = 4
        # InvertedPendulum-v2, observation_dim.shape = 4
        self.observation_dim = self.env.observation_space.shape[0]
        # 离散情况是多少种动作,连续情况一个向量
        # 对于CartPole-v0, action_dim.shape = 2
        # InvertedPendulum-v2, action_dim.shape = 1
        self.action_dim = self.env.action_space.n if self.discrete else self.env.action_space.shape[
            0]
        self.lr = self.config.learning_rate
        # build model
        self.build()
Esempio n. 5
0
    def __init__(self, env, config, logger=None, name=None):
        """
        Initialize Q Network and env

        Args:
            config: class with hyperparameters
            logger: logger instance from logging module
        """
        # directory for training outputs
        self.name = name
        self.action_space = 3
        if name == None:
            raise Exception("Must supply network name")
        name = time.strftime("_%m%d_%H%M") + "/" + name

        config.output_path = config.output_path.format(name)
        config.model_output = config.model_output.format(name)
        config.log_path = config.log_path.format(name)
        config.plot_output = config.plot_output.format(name)
        config.record_path = config.record_path.format(name)

        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyper params
        # Customise the config

        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        self.env = env

        # build model
        self.build()
Esempio n. 6
0
    def __init__(self, env, config, logger=None, student=False):
        """
        Initialize Q Network and env

        Args:
            config: class with hyperparameters
            logger: logger instance from logging module
        """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyper params
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        self.env = env
        self.student = student

        # build model
        self.build(student=student)

        self.size = sum(v.get_shape().num_elements()
                        for v in tf.trainable_variables())
        if self.student:
            self.size -= self.teachermodel.size
        self.logger.info('Num params: %d' % self.size)
Esempio n. 7
0
    def __init__(self, env, config, logger=None):
        """
        Initialize Q Network and env

        Args:
            config: class with hyperparameters
            logger: logger instance from logging module
        """

        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyper params
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        self.env = env

        # double q learning conf
        self.is_double_q = getattr(self.config, 'double_q', False)

        # my flag to control whether to use original code
        # or modified with respect to schedule update
        self.original_schedule = getattr(self.config, 'original_schedule',
                                         True)

        # build model
        self.build()
Esempio n. 8
0
    def __init__(self, config, logger=None):
        """
        Initialize Q Network and env

        Args:
            config: class with hyperparameters
            logger: logger instance from logging module
        """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyper params
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        envs = []
        for env_name in config.env_names:
            env = gym.make(env_name)
            env = wrap_dqn(env)
            env = PreproWrapper(env,
                                prepro=greyscale,
                                shape=(84, 84, 1),
                                overwrite_render=config.overwrite_render)
            envs.append(env)
        self.envs = envs

        self.recon = self.config.recon

        # build model
        self.build()
Esempio n. 9
0
    def __init__(self):
        """
        Creates output directories if they don't exist and load vocabulary
        Defines attributes that depends on the vocab.
        Look for the __init__ comments in the class attributes
        """
        # check that the reload directory exists
        if self.dir_reload is not None and not os.path.exists(self.dir_reload):
            print("Weights directory not found ({})".format(self.dir_reload))
            self.dir_reload = None

        # directory for training outputs
        if not os.path.exists(self.dir_output):
            os.makedirs(self.dir_output)

        if not os.path.exists(self.model_output):
            os.makedirs(self.model_output)

        if not os.path.exists(self.dir_plots):
            os.makedirs(self.dir_plots)

        # initializer file for answers
        with open(self.path_results, "a") as f:
            pass

        with open(self.path_results_final, "a") as f:
            pass

        self.vocab = load_vocab(self.path_vocab)
        self.vocab_size = len(self.vocab)
        self.attn_cell_config["num_proj"] = self.vocab_size
        self.id_PAD = self.vocab[PAD]
        self.id_END = self.vocab[END]
        self.logger = get_logger(self.path_log)
Esempio n. 10
0
    def __init__(self, env, config, logger=None):

        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        self.config = config
        self.logger = logger
        self.batch_counter = 0
        self.seed = None
        if logger is None:
            self.logger = get_logger(config.log_path)
        self.env = env

        self.discrete = isinstance(env.action_space, gym.spaces.Discrete)

        if str(config.env_name).startswith("Fourrooms"):
            self.observation_dim = 1
        else:
            self.observation_dim = self.env.observation_space.shape[0]

        self.action_dim = self.env.action_space.n if self.discrete else \
            self.env.action_space.shape[0]

        self.lr = self.config.learning_rate

        self.build()
Esempio n. 11
0
    def __init__(self, env, config, logger=None):
        """
    Initialize Policy Gradient Class

    Args:
            env: an OpenAI Gym environment
            config: class with hyperparameters
            logger: logger instance from the logging module

    You do not need to implement anything in this function. However,
    you will need to use self.discrete, self.observation_dim,
    self.action_dim, and self.lr in other methods.

    """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyperparameters
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        self.env = env

        # discrete vs continuous action space
        self.discrete = isinstance(env.action_space, gym.spaces.Discrete)
        self.observation_dim = self.env.observation_space.shape[0]
        self.action_dim = self.env.action_space.n if self.discrete else self.env.action_space.shape[
            0]

        self.lr = self.config.learning_rate

        # build model
        self.build()
Esempio n. 12
0
    def __init__(self):
        self.lr = 5e-2
        self.controller_cells = 64
        self.batch_size = 500
        self.num_batches = 500
        self.observation_dim = 1000
        self.action_dim = 1000
        self.num_layers = 2

        self.action_buffer = []
        self.state_buffer = []
        self.logprob_buffer = []
        self._dict = {}
        self._used_dict = {}
        self.log_acc = []
        self.logger = get_logger('./log.txt')
        self.baseline = -1000.0

        self._num_used_models = []

        #self._initial_baseline =
        '''
    with open('./action_average_reward_dict.json', 'r') as f:
      self._raw_dict = json.load(f)
    temp_map = {30:0,  60:1, 100:2, 144:3}
    for key in self._raw_dict.keys():
      actions = [temp_map[int(a)] for a in key[1:-1].split(',')]
      temp = str(actions).replace(",","")
      accuracy = float(self._raw_dict[key]) / 10000
      self._dict[temp] = accuracy
      self._used_dict[temp] = 0
    '''
        self._dict = self.build_reward_function()
        self._used_dict = np.zeros_like(self._dict)
        self.build()
Esempio n. 13
0
    def __init__(self, config: dict, dirs: dict, device):
        self.parallel = isinstance(device, list)
        self.config = config
        self.logger = get_logger(self.config["general"]["project_name"])
        self.is_debug = self.config["general"]["debug"]
        if self.is_debug:
            self.logger.info("Running in debug mode")

        if self.parallel:
            self.device = torch.device(
                f"cuda:{device[0]}" if torch.cuda.is_available() else "cpu")
            self.all_devices = device
            self.logger.info("Running experiment on multiple gpus!")
        else:
            self.device = device
            self.all_devices = [device]
        self.dirs = dirs
        if torch.cuda.is_available():
            os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([
                str(dev.index if self.parallel else dev)
                for dev in self.all_devices
            ])

        if self.config["general"]["restart"]:
            self.logger.info(
                f'Resume training run with name "{self.config["general"]["project_name"]}" on device(s) {self.all_devices}'
            )
        else:
            self.logger.info(
                f'Start new training run with name "{self.config["general"]["project_name"]}" on device(s) {self.all_devices}'
            )

        ########## seed setting ##########
        torch.manual_seed(self.config["general"]["seed"])
        torch.cuda.manual_seed(self.config["general"]["seed"])
        np.random.seed(self.config["general"]["seed"])
        # random.seed(opt.seed)
        torch.backends.cudnn.deterministic = True
        torch.manual_seed(self.config["general"]["seed"])
        rng = np.random.RandomState(self.config["general"]["seed"])

        if self.config["general"]["mode"] == "train":

            project = "visual_poking_unsupervised"
            wandb.init(
                dir=self.dirs["log"],
                project=project,
                name=self.config["general"]["project_name"],
                group=self.config["general"]["experiment"],
            )

            # log paramaters
            self.logger.info("Training parameters:")
            for key in self.config:
                if key != "testing":
                    self.logger.info(
                        f"{key}: {self.config[key]}")  # print to console
                wandb.config.update({key:
                                     self.config[key]})  # update wandb config
Esempio n. 14
0
  def __init__(self):
    self.lr = 10e-2
    self.batch_size = 64
    self.controller_cells = 64
    self.num_iterations = 100
    self.observation_dim = 4
    self.action_dim_1 = 4
    self.action_dim_2 = 2
    self.action_dim_3 = 2
    self.num_layers = 3
    self.num_actions_per_layer = 3

    self.hasConstraint = True
    self.hardConstraint = True
    self.reg_weight = 1e-5
    self.reg_op = 1e-8
    self.weight_limit = 8000
    self.op_limit = 1e8

    self.temp1 = []
    self.temp2 = []

    self.action_buffer = []
    self.state_buffer = []
    self.logprob_buffer = []
    self._dict = {}
    self._used_dict = {}
    self.log_acc = []
    self.logger = get_logger('./log.txt')

    self._num_used_models = []

    self._initial_baseline = 0.05

    #with open('./unormdata.json', 'r') as f:
    with open('./normalizedata.json', 'r') as f:
      self._raw_dict = json.load(f)
    f.close()
    filter_nums_map = {10:0, 50:1, 100:2, 200:3}
    kernel_sizes_map = {3:0, 5:1}
    strides_map = {1:0, 2:1}
    for key in self._raw_dict.keys():
      params = key[1:-1].split(',')
      temp = []
      for i in range(9):
        if i%3 == 0: temp.append(filter_nums_map[int(params[i])])
        elif i%3 == 1: temp.append(kernel_sizes_map[int(params[i])])
        else: temp.append(strides_map[int(params[i])])

      self._dict[str(temp)] = np.mean(self._raw_dict[key])
      self._used_dict[str(temp)] = 0
    self.build()
def stats(cfg_dict):

    logger = get_logger("stats_calculation")

    cfg_dict['data']['normalize_flows'] = True

    transforms = tt.Compose(
        [tt.ToTensor(), tt.Lambda(lambda x: (x * 2.0) - 1.0)])

    datakeys = ["flow", "images"]

    dataset, _ = get_dataset(config=cfg_dict["data"])
    test_dataset = dataset(transforms, datakeys, cfg_dict["data"], train=True)

    all_frange_data = []
    for l in tqdm(range(test_dataset.data['flow_paths'].shape[-1])):
        logger.info(
            f'Calculating stats for lag of {(l+1) * cfg_dict["flow_delta"]} frames...'
        )
        in_data = [(f, i, l, test_dataset) for f, i in zip(
            test_dataset.data["flow_paths"][:,
                                            l], test_dataset.data["img_path"])]
        out_data = parallel_data_prefetch(process_flows,
                                          in_data[:100],
                                          n_proc=20,
                                          cpu_intensive=True,
                                          target_data_type="list")
        all_frange_data.append(out_data)
        n_error = np.count_nonzero(out_data[:, 2])

        logger.info(f"While loading the data, {n_error} errors occurred.")

    all_frange_data = np.stack(all_frange_data, axis=-1)

    assert all_frange_data.shape[-1] == test_dataset.datadict[
        'flow_paths'].shape[-1]

    with open(
            path.join(test_dataset.datapath, f"{test_dataset.metafilename}.p"),
            "rb") as f:
        datadict = pickle.load(f)

    #assert out_data.shape[0] == len(datadict["img_path"])

    key = "flow_range"
    name_key = "frange"

    datadict.update({key: all_frange_data})
    with open(
            path.join(test_dataset.datapath,
                      f"{test_dataset.metafilename}_{name_key}.p"), "wb") as f:
        pickle.dump(datadict, f, protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 16
0
    def __init__(self, config, dir_output):
        """Defines self._config

        Args:
            config: (Config instance) class with hyper parameters,
                vocab and embeddings

        """
        self._config = config
        self._dir_output = dir_output
        init_dir(self._dir_output)
        self.logger = get_logger(self._dir_output + "model.log")
        tf.reset_default_graph()  # saveguard if previous model was defined
Esempio n. 17
0
    def __init__(self, env, config):
        name = time.strftime("%m%d_%H%M")

        config.output_path = "./elo_scores/{}/".format(name)
        print("Outputting to ", config.output_path)
        config.log_path = config.output_path + "log.log"
        self.config = config
        self.env = env

        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        self.logger = get_logger(
            config.log_path.format(time.strftime("_%m%d_%H%M")))
Esempio n. 18
0
 def __init__(self, env, config):
     self.config = config
     if not os.path.exists(self.config.output_path):
         os.makedirs(self.config.output_path)
     # 定义critic网络,critic
     self.critic = Critic(S_DIM, A_DIM, self.config.num_units)
     # 定义策略网络
     self.actor = Actor(S_DIM, A_DIM, self.config.num_units)
     self.actor_old = Actor(S_DIM, A_DIM, self.config.num_units)
     # 得到环境
     self.env = env
     # logger
     self.logger = get_logger(self.config.log_path)
     # tensorboard 相关 定义一个writer
     self.writer = SummaryWriter(self.config.output_path)
Esempio n. 19
0
    def __init__(self, agent_idx, env, configuration, logger=None):
        self.agent_idx = agent_idx # the index of this agent
        self.config = configuration

        # directory for training outputs
        if not os.path.exists(self.config.output_path):
            os.makedirs(self.config.output_path)

        # store hyper-params
        self.logger = logger
        if logger is None:
            self.logger = get_logger(self.config.log_path)
        self.env = env

        # action space for a given agent - is Discrete(5) for simple_spread
        # NOTE: assumes that all agents have the same action space for now
        # TODO: action_dim as a argument to this function, so it can vary by agent

        # TODO: for simple_spread we don't need to worry about communication space
        # however, for senarios with communication channels, we will need to re-look at this
        #  as action_space seems to be a tuple of movement space and comm space
        self.action_dim = self.env.action_space[0].n

        # observation space for a given agent - is Box(18) for simple_spread
        # NOTE: assumes that all agents have the same observation space for now
        # TODO: observation_dim as a argument to this function, so it can vary by agent
        self.observation_dim = self.env.observation_space[0].shape[0]

        self.lr = self.config.learning_rate

        # top level scopes
        self.policy_approx_networks_scope = "policy_approx_networks"
        self.actor_network_scope = "actor_network"
        self.critic_network_scope = "critic_network"

        # Noise to simulate the random process
        # TODO: consider making this an input parameter so we can tweak it?
        self.noise = OrnsteinUhlenbeckActionNoise(
            mu=np.zeros(env.action_space[0].n),
            sigma=0.3,
            theta=0.15,
            dt=1e-2,
            x0=None)

        self.param_noise_stddev = tf.placeholder(tf.float32, shape=(), name='param_noise_stddev')
        self.param_noise = AdaptiveParamNoiseSpec(initial_stddev=0.6, desired_action_stddev=0.2)

        self.t = 0
Esempio n. 20
0
    def __init__(self, env, config, logger=None):
        """
    Initialize Policy Gradient Class

    Args:
            env: an OpenAI Gym environment
            config: class with hyperparameters
            use_mask: train time, omit velocity features in state
            logger: logger instance from the logging module

    You do not need to implement anything in this function. However,
    you will need to use self.discrete, self.observation_dim,
    self.action_dim, and self.lr in other methods.

    """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyperparameters
        self.config = config
        if self.config.use_mask:
            print('Using mask...')
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        self.env = env

        # discrete vs continuous action space
        self.discrete = isinstance(env.action_space, gym.spaces.Discrete)
        self.observation_dim = get_obs_dims(self.config.env_name,
                                            self.config.use_mask)
        self.action_dim = self.env.action_space.n if self.discrete else self.env.action_space.shape[
            0]
        self.lr = self.config.learning_rate

        # for milestone: capture raw tuple embedding
        self.memory_dim = 6  #self.observation_dim * 2 + self.action_dim + 1 + 1 # (s, a, r, s', done_mask)
        self.replay_buffer = ReplayBuffer(self.config.memory_len + 1,
                                          1,
                                          action_dim=self.action_dim)
        self.percolated_buffer = ReplayBuffer(self.config.percolate_len + 1,
                                              1,
                                              action_dim=self.action_dim)

        # build model
        self.build()
Esempio n. 21
0
  def __init__(self, env, record_env, network, FLAGS, logger=None):
    # Directory for training outputs
    if not os.path.exists(FLAGS.output_path):
      os.makedirs(FLAGS.output_path)

    # Store hyper params
    self.FLAGS       = FLAGS
    self.env         = env
    self.record_env  = record_env
    self.network     = network
    self.summary     = Summary()

    # Setup Logger
    if logger is None: self.logger = get_logger(FLAGS.log_path)
    else:          self.logger = logger

    # Create network
    self.network.build()
Esempio n. 22
0
    def __init__(self, model_0, model_1, env, config):
        self.model_0 = model_0
        self.model_1 = model_1
        self.env = env
        self.config = config
        name = time.strftime("_%m%d_%H%M")

        config.output_path = config.output_path.format(name)
        config.model_output = config.model_output.format(name)
        config.log_path = config.log_path.format(name)
        config.plot_output = config.plot_output.format(name)
        config.record_path = config.record_path.format(name)

        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        self.logger = get_logger(
            config.log_path.format(time.strftime("_%m%d_%H%M")))
Esempio n. 23
0
  def __init__(self, env, configuration=config, logger=None):
    # directory for training outputs
    if not os.path.exists(config.output_path):
      os.makedirs(config.output_path)

    # store hyper-params
    self.config = config
    self.logger = logger
    if logger is None:
      self.logger = get_logger(config.log_path)

    self.env = env

    # create n PG objects for n agents
    temp = []
    for i in range(self.env.n):
      temp.append(PG(i, self.env, configuration=self.config, logger=logger))
    self.agents = temp
Esempio n. 24
0
def train():
    #s, _, loss, y = autoencoder()
    s, _, loss, y, recon_loss, KL = vae()
    train_op, grad_norm = add_optimizer_op(loss)

    if not os.path.exists(config.output_path):
        os.makedirs(config.output_path)
    logger = get_logger(config.log_path)
    
    train_data, eval_data = load_data() 

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    for i in xrange(0, config.epoch_num):
        # each epoch

        #train
        prog = Progbar(target=1 + int(len(train_data) / config.batch_size)) 
        step = 1
        for batch in minibatches(train_data, config.batch_size):
            loss_eval, grad_norm_eval, y_train, _, recon_loss_train, KL_train = sess.run([loss, grad_norm, y, train_op, recon_loss, KL], feed_dict={s: batch})
            #prog.update(step, [("train loss", loss_eval), ("grad norm", grad_norm_eval)])
            prog.update(step, [("train loss", loss_eval), ("grad norm", grad_norm_eval), ('recon loss', recon_loss_train), ('VLBO', KL_train)])
            step += 1
	plt.imshow(y_train[0,:,:,0], cmap='Greys')
	plt.savefig('y.png')

        #eval
        #prog = Progbar(target=1 + int(len(eval_data) / config.batch_size)) 
        #step = 1
        #losses = []
        #for batch in minibatches(eval_data, config.batch_size):
        #    loss_eval = sess.run(loss, feed_dict={s: batch})
        #    prog.update(step, [("eval loss", loss_eval)])
        #    losses.append(loss_eval)
        #    step += 1
        #avg_loss = np.mean(losses)
        #sigma_loss = np.sqrt(np.var(losses) / len(losses))
        #print ""
        #msg = "Average loss: {:04.2f} +/- {:04.2f}".format(avg_loss, sigma_loss)
        #logger.info(msg)

        save(sess)
Esempio n. 25
0
    def __init__(self,
                 env,
                 config,
                 parent_scope,
                 q_network_sizes,
                 logger=None,
                 student=False):
        """
        Initialize Q Network and env

        Args:
            env: environment
            config: class with hyperparameters
            parent_scope: parent scope under which this model is constructed
            logger: logger instance from logging module
        """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyper params
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(self.config.log_path)
        self.env = env
        self.student = student
        self.parent_scope = parent_scope
        self.q_network_sizes = q_network_sizes

        # log the config
        self.logger.info('CONFIG VARIABLES:')
        self.logger.info(self.config.get_config())

        # build model
        self.build(student=student)

        self.size = sum(v.get_shape().num_elements()
                        for v in tf.trainable_variables(self.parent_scope))
        # subtracting the size of the teacher models is no longer necessary
        # because tf.trainable_variables is using the scope of this student model only
        # if self.student:
        #     self.size -= sum([teachermodel.size for teachermodel in self.teachermodels])
        self.logger.info('Num params: %d' % self.size)
Esempio n. 26
0
    def __init__(self, env, config, logger=None):
        """
        Initialize Q Network and env

        Args:
            config: class with hyperparameters
            logger: logger instance from logging module
        """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyper params
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        self.env = env
        self.build()
Esempio n. 27
0
    def __init__(self, env, config, run=0, logger=None):
        """
        Initialize Policy Gradient Class

        Args:
                env: an OpenAI Gym environment
                config: class with hyperparameters
                logger: logger instance from the logging module

        """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyperparameters
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        self.env = env

        # discrete vs continuous action space
        self.discrete = isinstance(env.action_space, gym.spaces.Discrete)
        self.observation_shape = self.env.observation_space.shape
        self.observation_dim = self.env.observation_space.shape[0]
        self.action_dim = self.env.action_space.n if self.discrete else self.env.action_space.shape[
            0]

        self.actor_lr = self.config.learning_rate
        self.critic_lr = self.config.critic_learning_rate
        self.tau = self.config.target_update_weight
        self.gamma = self.config.gamma

        if not self.discrete:
            self.action_high = float(self.env.action_space.high[0])
            self.action_low = float(self.env.action_space.low[0])
            self.obs_high = self.env.observation_space.high
            self.obs_low = self.env.observation_space.low

        self.agent_name = "pg"
        self.run = run
Esempio n. 28
0
    def __init__(self):
        self.lr = 5e-2
        self.batch_size = 500
        self.controller_cells = 128
        self.num_iterations = 5000
        self.observation_dim = 100
        self.action_dim_1 = 1
        self.action_dim_2 = 2
        self.action_dim_3 = 2
        self.num_layers = 3
        self.num_actions_per_layer = 3

        self.hasConstraint = False
        self.hardConstraint = False
        self.reg_weight = 1e-5
        self.reg_op = 1e-8
        self.weight_limit = 8000
        self.op_limit = 1e8

        self.temp1 = []
        self.temp2 = []

        self.action_buffer = []
        self.state_buffer = []
        self.logprob_buffer = []
        self._dict = {}
        self._used_dict = {}
        self.log_acc = []
        self.logger = get_logger('./log.txt')

        self._num_used_models = []

        self._initial_baseline = 0
        self.max_filter = 100
        self._used_models = []

        with open('./norm_inter_acc.json', 'r') as f:
            self._raw_dict = json.load(f)

        self.build()
    def __init__(self, env, config, current_graph, logger=None):
        """
        Initialize Q Network and env

        Args:
            config: class with hyperparameters
            logger: logger instance from logging module
        """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)
            
        # store hyper params
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        self.env = env
        self.current_graph = current_graph
        self.state_shape = list([3, 3, len(self.env.state.xmap.item_class_id)+2])
        # build model
        self.build()
    def __init__(self, env, config, exp_schedule, lr_schedule, is_training_agent, train_from_scratch=False,
                 reward_after_somebody_died=False,
                 logger=None):
        """
        Initialize Q Network and env

        :param env: Game environment
        :param config: config(hyper-parameters) instance
        :param logger: logger instance from logging module
        :param exp_schedule: exploration strategy for epsilon
        :param lr_schedule: schedule for learning rate
        """
        super(DQNAgent, self).__init__()

        # Variables initialized in _build
        self._states = None
        self._actions = None
        self._rewards = None
        self._next_states = None
        self._done_mask = None
        self._learning_rate = None
        self._q_values = None
        self._target_q_values = None
        self._next_q_values = None
        self._update_target_op = None
        self._loss = None
        self._train_op = None
        self._grad_norm = None

        # Variables initialized in init_agent
        self._session = None
        self._avg_reward_placeholder = None
        self._max_reward_placeholder = None
        self._std_reward_placeholder = None
        self._avg_q_placeholder = None
        self._max_q_placeholder = None
        self._std_q_placeholder = None
        # TODO: Commented due to lack of evaluate()
        # self._eval_reward_placeholder = None
        self._merged = None
        self._file_writer = None
        self._saver = None
        self._train_replay_buffer = None
        self._train_rewards = None
        self._train_max_q_values = None
        self._train_q_values = None
        self._avg_reward = None
        self._max_reward = None
        self._std_reward = None
        self._avg_q = None
        self._max_q = None
        self._std_q = None
        # TODO: Commented due to lack of evaluate()
        # self._eval_reward = None
        self._time_step = None
        self._progress_bar = None
        self._has_episode_started = None

        # Variables initialized in act.
        self._last_action = None
        self._last_idx = None
        self._enemy_count = None

        # Directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        self._logger = logger
        if logger is None:
            self._logger = get_logger(config.log_path)

        self._config = config
        self._env = env
        self._exp_schedule = exp_schedule
        self._lr_schedule = lr_schedule
        self._is_training_agent = is_training_agent
        self._train_from_scratch = train_from_scratch
        self._reward_after_somebody_died = reward_after_somebody_died
        self._total_reward = 0

        # Build model.
        self._build()