def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        kernel_size=3,
        n_attn_heads=16,
        dropout=0.9,
    ):
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))

        encoder_embedded = tf.layers.dense(self.X, size_layer)
        encoder_embedded += position_encoding(encoder_embedded)

        e = tf.identity(encoder_embedded)
        for i in range(num_layers):
            dilation_rate = 2**i
            pad_sz = (kernel_size - 1) * dilation_rate
            with tf.variable_scope('block_%d' % i):
                encoder_embedded += cnn_block(encoder_embedded, dilation_rate,
                                              pad_sz, size_layer, kernel_size)

        encoder_output, output_memory = encoder_embedded, encoder_embedded + e
        g = tf.identity(encoder_embedded)

        for i in range(num_layers):
            dilation_rate = 2**i
            pad_sz = (kernel_size - 1) * dilation_rate
            with tf.variable_scope('decode_%d' % i):
                attn_res = h = cnn_block(encoder_embedded, dilation_rate,
                                         pad_sz, size_layer, kernel_size)

            C = []
            for j in range(n_attn_heads):
                h_ = tf.layers.dense(h, size_layer // n_attn_heads)
                g_ = tf.layers.dense(g, size_layer // n_attn_heads)
                zu_ = tf.layers.dense(encoder_output,
                                      size_layer // n_attn_heads)
                ze_ = tf.layers.dense(output_memory,
                                      size_layer // n_attn_heads)

                d = tf.layers.dense(h_, size_layer // n_attn_heads) + g_
                dz = tf.matmul(d, tf.transpose(zu_, [0, 2, 1]))
                a = tf.nn.softmax(dz)
                c_ = tf.matmul(a, ze_)
                C.append(c_)

            c = tf.concat(C, 2)
            h = tf.layers.dense(attn_res + c, size_layer)
            h = tf.nn.dropout(h, keep_prob=dropout)
            encoder_embedded += h

        encoder_embedded = tf.sigmoid(encoder_embedded[-1])
        self.logits = tf.layers.dense(encoder_embedded, output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost)
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     tf.reset_default_graph()
     self.X = tf.placeholder(tf.float32, (None, self.state_size))
     self.Y = tf.placeholder(tf.float32, (None, self.state_size))
     self.ACTION = tf.placeholder(tf.float32, (None))
     self.REWARD = tf.placeholder(tf.float32, (None))
     self.batch_size = tf.shape(self.ACTION)[0]
     
     with tf.variable_scope('curiosity_model'):
         action = tf.reshape(self.ACTION, (-1,1))
         state_action = tf.concat([self.X, action], axis=1)
         save_state = tf.identity(self.Y)
         
         feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu)
         self.curiosity_logits = tf.layers.dense(feed, self.state_size)
         self.curiosity_cost = tf.reduce_sum(tf.square(save_state - self.curiosity_logits), axis=1)
         
         self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE)            .minimize(tf.reduce_mean(self.curiosity_cost))
     
     total_reward = tf.add(self.curiosity_cost, self.REWARD)
     
     with tf.variable_scope("q_model"):
         with tf.variable_scope("eval_net"):
             x_action = tf.layers.dense(self.X, 128, tf.nn.relu)
             self.logits = tf.layers.dense(x_action, self.OUTPUT_SIZE)
         
         with tf.variable_scope("target_net"):
             y_action = tf.layers.dense(self.Y, 128, tf.nn.relu)
             y_q = tf.layers.dense(y_action, self.OUTPUT_SIZE)
         
         q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
         action = tf.cast(self.ACTION, tf.int32)
         action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
         q = tf.gather_nd(params=self.logits, indices=action_indices)
         self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q)
         self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize(
         self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))
         
     t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net')
     e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net')
     self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
     
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
Esempio n. 3
0
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        kernel_size=3,
        n_attn_heads=16,
        dropout=0.9,
    ):
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))

        encoder_embedded = tf.layers.dense(self.X, size_layer)

        e = tf.identity(encoder_embedded)
        for i in range(num_layers):
            z = layer(
                encoder_embedded,
                encoder_block,
                kernel_size,
                size_layer * 2,
                encoder_embedded,
            )
            z = tf.nn.dropout(z, keep_prob=dropout)
            encoder_embedded = z

        encoder_output, output_memory = z, z + e
        g = tf.identity(encoder_embedded)

        for i in range(num_layers):
            attn_res = h = layer(
                encoder_embedded,
                decoder_block,
                kernel_size,
                size_layer * 2,
                residual=tf.zeros_like(encoder_embedded),
            )
            C = []
            for j in range(n_attn_heads):
                h_ = tf.layers.dense(h, size_layer // n_attn_heads)
                g_ = tf.layers.dense(g, size_layer // n_attn_heads)
                zu_ = tf.layers.dense(encoder_output,
                                      size_layer // n_attn_heads)
                ze_ = tf.layers.dense(output_memory,
                                      size_layer // n_attn_heads)

                d = tf.layers.dense(h_, size_layer // n_attn_heads) + g_
                dz = tf.matmul(d, tf.transpose(zu_, [0, 2, 1]))
                a = tf.nn.softmax(dz)
                c_ = tf.matmul(a, ze_)
                C.append(c_)

            c = tf.concat(C, 2)
            h = tf.layers.dense(attn_res + c, size_layer)
            h = tf.nn.dropout(h, keep_prob=dropout)
            encoder_embedded = h

        encoder_embedded = tf.sigmoid(encoder_embedded[-1])
        self.logits = tf.layers.dense(encoder_embedded, output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost)
    def __init__(self, state_size, window_size, trend, skip):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        tf.reset_default_graph()
        self.INITIAL_FEATURES = np.zeros((4, self.state_size))
        self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
        self.Y = tf.placeholder(tf.float32, (None, None, self.state_size))
        self.hidden_layer = tf.placeholder(tf.float32,
                                           (None, 2 * self.LAYER_SIZE))
        self.ACTION = tf.placeholder(tf.float32, (None))
        self.REWARD = tf.placeholder(tf.float32, (None))
        self.batch_size = tf.shape(self.ACTION)[0]
        self.seq_len = tf.shape(self.X)[1]

        with tf.variable_scope('curiosity_model'):
            action = tf.reshape(self.ACTION, (-1, 1, 1))
            repeat_action = tf.tile(action, [1, self.seq_len, 1])
            state_action = tf.concat([self.X, repeat_action], axis=-1)
            save_state = tf.identity(self.Y)
            cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                           state_is_tuple=False)
            self.rnn, last_state = tf.nn.dynamic_rnn(
                inputs=state_action,
                cell=cell,
                dtype=tf.float32,
                initial_state=self.hidden_layer)
            self.curiosity_logits = tf.layers.dense(self.rnn[:, -1],
                                                    self.state_size)
            self.curiosity_cost = tf.reduce_sum(
                tf.square(save_state[:, -1] - self.curiosity_logits), axis=1)

            self.curiosity_optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    tf.reduce_mean(self.curiosity_cost))

        total_reward = tf.add(self.curiosity_cost, self.REWARD)

        with tf.variable_scope("q_model"):
            with tf.variable_scope("eval_net"):
                cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                               state_is_tuple=False)
                rnn, self.last_state = tf.nn.dynamic_rnn(
                    inputs=self.X,
                    cell=cell,
                    dtype=tf.float32,
                    initial_state=self.hidden_layer)
                self.logits = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE)

            with tf.variable_scope("target_net"):
                cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                               state_is_tuple=False)
                rnn, last_state = tf.nn.dynamic_rnn(
                    inputs=self.Y,
                    cell=cell,
                    dtype=tf.float32,
                    initial_state=self.hidden_layer)
                y_q = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE)

            q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
            action = tf.cast(self.ACTION, tf.int32)
            action_indices = tf.stack(
                [tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
            q = tf.gather_nd(params=self.logits, indices=action_indices)
            self.cost = tf.losses.mean_squared_error(labels=q_target,
                                                     predictions=q)
            self.optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    self.cost,
                    var_list=tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))

        t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/target_net')
        e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/eval_net')
        self.target_replace_op = [
            tf.assign(t, e) for t, e in zip(t_params, e_params)
        ]

        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())