def hidden_to_reward(self, head, config, Collection): head = cops.add_relu_layer(head, size=256, layer_name="r_relu1", Collection=Collection) # the last layer is linear without a relu r = cops.add_linear_layer(head, config.action_num, Collection, layer_name="r_linear2", weight_name="r_W") tf.add_to_collection(Collection + "_summaries", tf.histogram_summary("r", r)) return r
def Q_network(self, input_state, Collection=None): conv_stack_shape=[(32,8,4), (64,4,2), (64,3,1)] head = tf.div(input_state, 256., name="normalized_input") cops.build_activation_summary(head, Collection) head = cops.conv_stack(head, conv_stack_shape, self.config, Collection) head = cops.flatten(head) V_head = cops.add_relu_layer(head, size=512, Collection=Collection) V = cops.add_linear_layer(V_head, 1, Collection, layer_name="V") A_head = cops.add_relu_layer(head, size=512, Collection=Collection) A = cops.add_linear_layer(A_head, self.config.action_num, Collection, layer_name="A") Q = tf.add(A, V - tf.expand_dims(tf.reduce_mean(A, axis=1)/self.config.action_num, axis=1) ) cops.build_scalar_summary(V[0], Collection, "Q/V_0") for i in range(self.config.action_num): cops.build_scalar_summary(Q[0, i], Collection, "Q/Q_0_"+str(i)) cops.build_scalar_summary(A[0, i], Collection, "Q/A_0_"+str(i)) return Q
def hidden_to_Q(self, head, config, Collection): suffix = "" if tf.get_variable_scope().reuse: suffix = "_prediction" head = cops.add_relu_layer(head, size=512, Collection=Collection, layer_name="final_relu_layer" + suffix, weight_name="final_linear_Q_W") Q = cops.add_linear_layer(head, config.action_num, Collection, layer_name="Q"+suffix, weight_name="Q_W") for i in range(config.action_num): tf.scalar_summary("DQN/action"+suffix +"_"+str(i), Q[0, i], collections=["Q_summaries"]) return Q
def Q_network(self, input_state, config, Collection=None): conv_stack_shape=[(32,8,4), (64,4,2), (64,3,1)] head = tf.div(input_state,256., name='normalized_input') head = cops.conv_stack(head, conv_stack_shape, Collection) head = cops.flatten(head) head = cops.add_relu_layer(head, size=512, Collection=Collection) Q = cops.add_linear_layer(head, self.num_actions, Collection, layer_name="Q") return Q
def Q_network(self, input_state, Collection): conv_stack_shape=[(32,8,4), (64,4,2), (64,3,1)] head = tf.div(input_state, 256., name="normalized_input") cops.build_activation_summary(head, Collection) head = cops.conv_stack(head, conv_stack_shape, self.config, Collection) head = cops.flatten(head) head = cops.add_relu_layer(head, size=512, Collection=Collection) Q = cops.add_linear_layer(head, self.config.action_num, Collection, layer_name="Q") # DQN summary for i in range(self.config.action_num): cops.build_scalar_summary(Q[0, i], Collection, "Q/Q_0_"+str(i)) return Q