Python PSLearningProcess.PSLearningProcess Examples

Programming Language: Python

Namespace/Package Name: baselines.ecbp.agents.buffer.ps_learning_process

Method/Function: PSLearningProcess

Examples at hotexamples.com: 2

Python PSLearningProcess.PSLearningProcess - 2 examples found. These are the top rated real world Python examples of baselines.ecbp.agents.buffer.ps_learning_process.PSLearningProcess.PSLearningProcess extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

start(5)

PSLearningProcess(2)

Frequently Used Methods

start (5)

PSLearningProcess (2)

Example #1

Show file

    def __init__(self,
                 model_func,
                 exploration_schedule,
                 obs_shape,
                 input_type,
                 lr=1e-4,
                 buffer_size=1000000,
                 num_actions=6,
                 latent_dim=32,
                 gamma=0.99,
                 knn=4,
                 eval_epsilon=0.01,
                 queue_threshold=5e-5,
                 batch_size=32,
                 tf_writer=None):
        self.conn, child_conn = Pipe()
        self.replay_buffer = np.empty((buffer_size, ) + obs_shape, np.float32)
        self.ec_buffer = PSLearningProcess(num_actions, buffer_size,
                                           latent_dim, obs_shape, child_conn,
                                           gamma)
        self.obs = None
        self.z = None
        self.h = None
        self.ind = -1
        self.writer = tf_writer
        self.sequence = []
        self.gamma = gamma
        self.queue_threshold = queue_threshold
        self.num_actions = num_actions
        self.exploration_schedule = exploration_schedule
        self.latent_dim = latent_dim
        self.knn = knn
        self.steps = 0
        self.batch_size = batch_size
        self.rmax = 100000
        self.logger = logging.getLogger("ecbp")
        self.eval_epsilon = eval_epsilon
        self.train_step = 4
        self.alpha = 1
        self.burnin = 2000
        self.burnout = 1000000

        self.loss_type = ["contrast"]

        self.hash_func, self.train_func, self.eval_func, self.norm_func = build_train_contrast(
            make_obs_ph=lambda name: input_type(obs_shape, name=name),
            model_func=model_func,
            num_actions=num_actions,
            optimizer=tf.train.AdamOptimizer(learning_rate=lr, epsilon=1e-4),
            gamma=gamma,
            grad_norm_clipping=10,
            loss_type=self.loss_type)
        self.ec_buffer.start()

Example #2

Show file

File: mer_bvae_attention_agent.py Project: MouseHu/emdqn

    def __init__(self,
                 encoder_func,
                 decoder_func,
                 exploration_schedule,
                 obs_shape,
                 vector_input=True,
                 lr=1e-4,
                 buffer_size=1000000,
                 num_actions=6,
                 latent_dim=32,
                 gamma=0.99,
                 knn=4,
                 eval_epsilon=0.1,
                 queue_threshold=5e-5,
                 batch_size=32,
                 density=True,
                 trainable=True,
                 num_neg=10,
                 tf_writer=None):
        self.conn, child_conn = Pipe()
        self.replay_buffer = np.empty((buffer_size + 10, ) + obs_shape,
                                      np.float32 if vector_input else np.uint8)
        self.ec_buffer = PSLearningProcess(num_actions,
                                           buffer_size,
                                           latent_dim * 2,
                                           obs_shape,
                                           child_conn,
                                           gamma,
                                           density=density)
        self.obs = None
        self.z = None
        self.cur_capacity = 0
        self.ind = -1
        self.writer = tf_writer
        self.sequence = []
        self.gamma = gamma
        self.queue_threshold = queue_threshold
        self.num_actions = num_actions
        self.exploration_schedule = exploration_schedule
        self.latent_dim = latent_dim
        self.knn = knn
        self.steps = 0
        self.batch_size = batch_size
        self.rmax = 100000
        self.logger = logging.getLogger("ecbp")
        self.log("psmp learning agent here")
        self.eval_epsilon = eval_epsilon
        self.train_step = 4
        self.alpha = 1
        self.burnin = 2000
        self.burnout = 10000000000
        self.update_target_freq = 10000
        self.buffer_capacity = 0
        self.trainable = trainable
        self.num_neg = num_neg
        self.loss_type = ["attention"]
        input_type = U.Float32Input if vector_input else U.Uint8Input
        # input_type = U.Uint8Input
        self.hash_func, self.unmask_z_func, self.train_func, self.eval_func, self.norm_func, self.attention_func, self.value_func, self.reconstruct_func, self.update_target_func = build_train_mer_bvae_attention(
            input_type=input_type,
            obs_shape=obs_shape,
            encoder_func=encoder_func,
            decoder_func=decoder_func,
            num_actions=num_actions,
            optimizer=tf.train.AdamOptimizer(learning_rate=lr, epsilon=1e-4),
            gamma=gamma,
            grad_norm_clipping=10,
            latent_dim=latent_dim,
            loss_type=self.loss_type,
            batch_size=batch_size,
            num_neg=num_neg,
            c_loss_type="sqmargin",
        )
        self.finds = [0, 0]

        self.ec_buffer.start()