def add_logits_op_train(self): if self.cnn_format == "NHWC": x = tf.transpose(self.state, [0, 2, 3, 1]) else: x = self.state self.image_summary = [] w, b, out, summary = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_train", summary_tag="conv1_out", activation=tf.nn.relu, data_format=self.cnn_format) self.w["wc1"] = w self.w["bc1"] = b self.image_summary.append(summary) w, b, out, summary = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_train", summary_tag="conv2_out", activation=tf.nn.relu, data_format=self.cnn_format) self.w["wc2"] = w self.w["bc2"] = b self.image_summary.append(summary) w, b, out, summary = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_train", summary_tag="conv3_out", activation=tf.nn.relu, data_format=self.cnn_format) self.w["wc3"] = w self.w["bc3"] = b self.image_summary.append(summary) shape = out.get_shape().as_list() out_flat = tf.reshape( out, [tf.shape(out)[0], 1, shape[1] * shape[2] * shape[3]]) out, state = stateful_lstm(out_flat, self.num_lstm_layers, self.lstm_size, tuple([self.lstm_state_train]), scope_name="lstm_train") self.state_output_c = state[0][0] self.state_output_h = state[0][1] shape = out.get_shape().as_list() out = tf.reshape(out, [tf.shape(out)[0], shape[2]]) w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_train", activation=None) self.w["wout"] = w self.w["bout"] = b self.q_out = out self.q_action = tf.argmax(self.q_out, axis=1)
def wavenet_logits_target(self): x = self.get_inputs(self.opens_, self.highs_, self.lows_, self.closes_, self.volumes_, self.positions_, self.order_prices_, self.current_prices_, self.time_since_, self.todays_) inputs, w, b = temporal_convolution_layer(inputs=x, output_units=8, convolution_width=1, scope='target-CNN-1x1') self.w_target["wcnn1"] = w self.w_target["bcnn1"] = b outputs = lstm_layer(inputs, self.lengths, self.lstm_size, scope="series-lstm-target") h, w, b = time_distributed_dense_layer(outputs, 128, scope='target-dense-encode-1', activation=tf.nn.relu, reuse=tf.AUTO_REUSE) self.w_target["wtf1"] = w self.w_target["btf1"] = b out, w, b = time_distributed_dense_layer(h, 32, scope='target-dense-encode-2', activation=tf.nn.relu, reuse=tf.AUTO_REUSE) self.w_target["wtf2"] = w self.w_target["btf2"] = b shape = out.get_shape().as_list() out_flat = tf.reshape(out, [tf.shape(out)[0], 1, shape[1] * shape[2]]) out, state = stateful_lstm(out_flat, self.num_lstm_layers, self.lstm_size, tuple([self.lstm_state_target]), scope_name="lstm_target") self.state_output_target_c = state[0][0] self.state_output_target_h = state[0][1] shape = out.get_shape().as_list() out = tf.reshape(out, [tf.shape(out)[0], shape[2]]) out, w, b = fully_connected_layer(out, self.n_actions, scope_name='target-dense-encode-2', activation=None) self.w_target["wout"] = w self.w_target["bout"] = b self.q_target_out = out self.q_target_action = tf.argmax(self.q_target_out, axis=1)