def __init__(self, hyperparams, policy, sess, actions): self.hyperparams = hyperparams self.sess = sess with tf.device('/cpu:0'): self.policy = policy.create_policy(trainable=False) self.sess.run(tf.initialize_all_variables()) self.input_vars = get_input_vars(self.policy) self.y_hat = get_output(self.policy) self.experience_cache = [] self.episode_cache = [] self.prepare_epsilon() self.training = True self.actions = actions self.greedy_ind = None
def __init__(self, hyperparams, q_model, sess, actions): self.hyperparams = hyperparams self.sess = sess with tf.device('/cpu:0'): self.net = q_model.create_net(trainable=False) self.sess.run(tf.initialize_all_variables()) self.input_vars = get_input_vars(self.net) self.y_hat = get_output(self.net) self.experience_cache = [] self.recent_train_q = deque( maxlen=self.hyperparams['num_recent_steps']) self.recent_eval_q = deque(maxlen=self.hyperparams['num_recent_steps']) self.prepare_epsilon() self.training = True self.actions = actions self.greedy_ind = None
def __init__(self, hyperparams, q_model, sess, actions): self.hyperparams = hyperparams self.sess = sess with tf.device('/cpu:0'): self.net = q_model.create_net(trainable=False) self.sess.run(tf.initialize_all_variables()) self.input_vars = get_input_vars(self.net) self.y_hat = get_output(self.net) self.experience_cache = [] self.recent_train_q = deque( maxlen=self.hyperparams['num_recent_steps']) self.recent_eval_q = deque( maxlen=self.hyperparams['num_recent_steps']) self.prepare_epsilon() self.training = True self.actions = actions self.greedy_ind = None
def setup_net(self): self.build_net() self.input_vars = get_input_vars(self.get_net()) self.y_hat = get_output(self.get_net())
def build_net(self): self.net = self.create_net() self.input_vars = get_input_vars(self.net) self.target_net = self.create_net(trainable=False) self.target_input_vars = get_input_vars(self.target_net)
def setup_net(self): self.build_net() self.policy_y_hat = get_output(self.policy) self.value_y_hat = get_output(self.value) self.policy_input_vars = get_input_vars(self.policy) self.value_input_vars = get_input_vars(self.value)