Example #1
0
    def __init__(self,
                 model_func,
                 exploration_schedule,
                 obs_shape,
                 input_type,
                 lr=1e-4,
                 buffer_size=1000000,
                 num_actions=6,
                 latent_dim=32,
                 gamma=0.99,
                 knn=4,
                 eval_epsilon=0.01,
                 queue_threshold=5e-5,
                 batch_size=32,
                 tf_writer=None):
        self.conn, child_conn = Pipe()
        self.replay_buffer = np.empty((buffer_size, ) + obs_shape, np.float32)
        self.ec_buffer = PSLearningProcess(num_actions, buffer_size,
                                           latent_dim, obs_shape, child_conn,
                                           gamma)
        self.obs = None
        self.z = None
        self.h = None
        self.ind = -1
        self.writer = tf_writer
        self.sequence = []
        self.gamma = gamma
        self.queue_threshold = queue_threshold
        self.num_actions = num_actions
        self.exploration_schedule = exploration_schedule
        self.latent_dim = latent_dim
        self.knn = knn
        self.steps = 0
        self.batch_size = batch_size
        self.rmax = 100000
        self.logger = logging.getLogger("ecbp")
        self.eval_epsilon = eval_epsilon
        self.train_step = 4
        self.alpha = 1
        self.burnin = 2000
        self.burnout = 1000000

        self.loss_type = ["contrast"]

        self.hash_func, self.train_func, self.eval_func, self.norm_func = build_train_contrast(
            make_obs_ph=lambda name: input_type(obs_shape, name=name),
            model_func=model_func,
            num_actions=num_actions,
            optimizer=tf.train.AdamOptimizer(learning_rate=lr, epsilon=1e-4),
            gamma=gamma,
            grad_norm_clipping=10,
            loss_type=self.loss_type)
        self.ec_buffer.start()
    def __init__(self,
                 encoder_func,
                 decoder_func,
                 exploration_schedule,
                 obs_shape,
                 vector_input=True,
                 lr=1e-4,
                 buffer_size=1000000,
                 num_actions=6,
                 latent_dim=32,
                 gamma=0.99,
                 knn=4,
                 eval_epsilon=0.1,
                 queue_threshold=5e-5,
                 batch_size=32,
                 density=True,
                 trainable=True,
                 num_neg=10,
                 tf_writer=None):
        self.conn, child_conn = Pipe()
        self.replay_buffer = np.empty((buffer_size + 10, ) + obs_shape,
                                      np.float32 if vector_input else np.uint8)
        self.ec_buffer = PSLearningProcess(num_actions,
                                           buffer_size,
                                           latent_dim * 2,
                                           obs_shape,
                                           child_conn,
                                           gamma,
                                           density=density)
        self.obs = None
        self.z = None
        self.cur_capacity = 0
        self.ind = -1
        self.writer = tf_writer
        self.sequence = []
        self.gamma = gamma
        self.queue_threshold = queue_threshold
        self.num_actions = num_actions
        self.exploration_schedule = exploration_schedule
        self.latent_dim = latent_dim
        self.knn = knn
        self.steps = 0
        self.batch_size = batch_size
        self.rmax = 100000
        self.logger = logging.getLogger("ecbp")
        self.log("psmp learning agent here")
        self.eval_epsilon = eval_epsilon
        self.train_step = 4
        self.alpha = 1
        self.burnin = 2000
        self.burnout = 10000000000
        self.update_target_freq = 10000
        self.buffer_capacity = 0
        self.trainable = trainable
        self.num_neg = num_neg
        self.loss_type = ["attention"]
        input_type = U.Float32Input if vector_input else U.Uint8Input
        # input_type = U.Uint8Input
        self.hash_func, self.unmask_z_func, self.train_func, self.eval_func, self.norm_func, self.attention_func, self.value_func, self.reconstruct_func, self.update_target_func = build_train_mer_bvae_attention(
            input_type=input_type,
            obs_shape=obs_shape,
            encoder_func=encoder_func,
            decoder_func=decoder_func,
            num_actions=num_actions,
            optimizer=tf.train.AdamOptimizer(learning_rate=lr, epsilon=1e-4),
            gamma=gamma,
            grad_norm_clipping=10,
            latent_dim=latent_dim,
            loss_type=self.loss_type,
            batch_size=batch_size,
            num_neg=num_neg,
            c_loss_type="sqmargin",
        )
        self.finds = [0, 0]

        self.ec_buffer.start()