Ejemplo n.º 1
0
 def __init__(self, hyperparams, policy, sess, actions):
     self.hyperparams = hyperparams
     self.sess = sess
     with tf.device('/cpu:0'):
         self.policy = policy.create_policy(trainable=False)
     self.sess.run(tf.initialize_all_variables())
     self.input_vars = get_input_vars(self.policy)
     self.y_hat = get_output(self.policy)
     self.experience_cache = []
     self.episode_cache = []
     self.prepare_epsilon()
     self.training = True
     self.actions = actions
     self.greedy_ind = None
Ejemplo n.º 2
0
 def __init__(self, hyperparams, policy, sess, actions):
     self.hyperparams = hyperparams
     self.sess = sess
     with tf.device('/cpu:0'):
         self.policy = policy.create_policy(trainable=False)
     self.sess.run(tf.initialize_all_variables())
     self.input_vars = get_input_vars(self.policy)
     self.y_hat = get_output(self.policy)
     self.experience_cache = []
     self.episode_cache = []
     self.prepare_epsilon()
     self.training = True
     self.actions = actions
     self.greedy_ind = None
Ejemplo n.º 3
0
 def __init__(self, hyperparams, q_model, sess, actions):
     self.hyperparams = hyperparams
     self.sess = sess
     with tf.device('/cpu:0'):
         self.net = q_model.create_net(trainable=False)
     self.sess.run(tf.initialize_all_variables())
     self.input_vars = get_input_vars(self.net)
     self.y_hat = get_output(self.net)
     self.experience_cache = []
     self.recent_train_q = deque(
         maxlen=self.hyperparams['num_recent_steps'])
     self.recent_eval_q = deque(maxlen=self.hyperparams['num_recent_steps'])
     self.prepare_epsilon()
     self.training = True
     self.actions = actions
     self.greedy_ind = None
Ejemplo n.º 4
0
 def __init__(self, hyperparams, q_model, sess, actions):
     self.hyperparams = hyperparams
     self.sess = sess
     with tf.device('/cpu:0'):
         self.net = q_model.create_net(trainable=False)
     self.sess.run(tf.initialize_all_variables())
     self.input_vars = get_input_vars(self.net)
     self.y_hat = get_output(self.net)
     self.experience_cache = []
     self.recent_train_q = deque(
         maxlen=self.hyperparams['num_recent_steps'])
     self.recent_eval_q = deque(
         maxlen=self.hyperparams['num_recent_steps'])
     self.prepare_epsilon()
     self.training = True
     self.actions = actions
     self.greedy_ind = None
Ejemplo n.º 5
0
 def setup_net(self):
     self.build_net()
     self.input_vars = get_input_vars(self.get_net())
     self.y_hat = get_output(self.get_net())
Ejemplo n.º 6
0
 def build_net(self):
     self.net = self.create_net()
     self.input_vars = get_input_vars(self.net)
     self.target_net = self.create_net(trainable=False)
     self.target_input_vars = get_input_vars(self.target_net)
Ejemplo n.º 7
0
 def setup_net(self):
     self.build_net()
     self.policy_y_hat = get_output(self.policy)
     self.value_y_hat = get_output(self.value)
     self.policy_input_vars = get_input_vars(self.policy)
     self.value_input_vars = get_input_vars(self.value)