Esempio n. 1
0
    def calculate_outputs(self, x):
        h = lstm_layer(x, self.history_length, self.lstm_size)
        c = wavenet(x, self.dilations, self.filter_widths, self.skip_channels,
                    self.residual_channels)
        h = tf.concat([h, c, x], axis=2)

        self.h_final = time_distributed_dense_layer(h,
                                                    50,
                                                    activation=tf.nn.relu,
                                                    scope='dense-1')
        y_hat = time_distributed_dense_layer(self.h_final,
                                             1,
                                             activation=tf.nn.sigmoid,
                                             scope='dense-2')
        y_hat = tf.squeeze(y_hat, 2)

        final_temporal_idx = tf.stack([
            tf.range(tf.shape(self.history_length)[0]), self.history_length - 1
        ],
                                      axis=1)
        self.final_states = tf.gather_nd(self.h_final, final_temporal_idx)
        self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'product_ids': self.product_id,
            'final_states': self.final_states,
            'predictions': self.final_predictions
        }

        return y_hat
    def calculate_outputs(self, x):
        h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1')
        h = tf.concat([h, x], axis=2)
        h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1')

        n_components = 1
        params = time_distributed_dense_layer(h_final, n_components*2, scope='dense-2', activation=None)
        ps, mixing_coefs = tf.split(params, 2, axis=2)

        # this is implemented incorrectly, but it still helped...
        mixing_coefs = tf.nn.softmax(mixing_coefs - tf.reduce_min(mixing_coefs, 2, keep_dims=True))
        ps = tf.nn.sigmoid(ps)

        labels = tf.tile(tf.expand_dims(self.next_is_ordered, 2), (1, 1, n_components))
        losses = tf.reduce_sum(mixing_coefs*log_loss(labels, ps), axis=2)
        sequence_mask = tf.cast(tf.sequence_mask(self.history_length, maxlen=100), tf.float32)
        avg_loss = tf.reduce_sum(losses*sequence_mask) / tf.cast(tf.reduce_sum(self.history_length), tf.float32)

        final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1)
        self.final_states = tf.gather_nd(h_final, final_temporal_idx)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'product_ids': self.product_id,
            'final_states': self.final_states
        }

        return avg_loss
Esempio n. 3
0
    def wavenet_logits_target(self):

        x = self.get_inputs(self.opens_, self.highs_, self.lows_, self.closes_,
                            self.volumes_, self.positions_, self.order_prices_,
                            self.current_prices_, self.time_since_,
                            self.todays_)

        inputs, w, b = temporal_convolution_layer(inputs=x,
                                                  output_units=8,
                                                  convolution_width=1,
                                                  scope='target-CNN-1x1')

        self.w_target["wcnn1"] = w
        self.w_target["bcnn1"] = b

        outputs = lstm_layer(inputs,
                             self.lengths,
                             self.lstm_size,
                             scope="series-lstm-target")

        h, w, b = time_distributed_dense_layer(outputs,
                                               128,
                                               scope='target-dense-encode-1',
                                               activation=tf.nn.relu,
                                               reuse=tf.AUTO_REUSE)
        self.w_target["wtf1"] = w
        self.w_target["btf1"] = b

        out, w, b = time_distributed_dense_layer(h,
                                                 32,
                                                 scope='target-dense-encode-2',
                                                 activation=tf.nn.relu,
                                                 reuse=tf.AUTO_REUSE)
        self.w_target["wtf2"] = w
        self.w_target["btf2"] = b

        shape = out.get_shape().as_list()
        out_flat = tf.reshape(out, [tf.shape(out)[0], 1, shape[1] * shape[2]])
        out, state = stateful_lstm(out_flat,
                                   self.num_lstm_layers,
                                   self.lstm_size,
                                   tuple([self.lstm_state_target]),
                                   scope_name="lstm_target")
        self.state_output_target_c = state[0][0]
        self.state_output_target_h = state[0][1]

        shape = out.get_shape().as_list()

        out = tf.reshape(out, [tf.shape(out)[0], shape[2]])

        out, w, b = fully_connected_layer(out,
                                          self.n_actions,
                                          scope_name='target-dense-encode-2',
                                          activation=None)

        self.w_target["wout"] = w
        self.w_target["bout"] = b

        self.q_target_out = out
        self.q_target_action = tf.argmax(self.q_target_out, axis=1)
Esempio n. 4
0
    def calculate_outputs(self, x):
        # lstm
        h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1')

        # cnn
        c = time_distributed_dense_layer(x, self.lstm_size, activation=tf.nn.relu, scope='dense-1')
        for i in range(6):
            c_i = temporal_convolution_layer(
                inputs=c,
                output_units=self.lstm_size,
                convolution_width=2,
                activation=tf.nn.relu,
                causal=True,
                dilation_rate=[2**i],
                scope='cnn-exp-{}'.format(i)
            )
            c += c_i

        h = tf.concat([h, c, x], axis=2)
        self.h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-2')
        y_hat = time_distributed_dense_layer(self.h_final, 1, activation=tf.nn.sigmoid, scope='dense-3')
        y_hat = tf.squeeze(y_hat, 2)

        final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1)
        self.final_states = tf.gather_nd(self.h_final, final_temporal_idx)
        self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'product_ids': self.product_id,
            'final_states': self.final_states,
            'predictions': self.final_predictions
        }

        return y_hat
    def calculate_outputs(self, x):
        h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm1')
        h = tf.concat([h, x], axis=2)

        self.h_final = time_distributed_dense_layer(h,
                                                    50,
                                                    activation=tf.nn.relu,
                                                    scope='dense1')
        y_hat = tf.squeeze(
            time_distributed_dense_layer(self.h_final,
                                         1,
                                         activation=tf.nn.sigmoid,
                                         scope='dense2'), 2)

        final_temporal_idx = tf.stack([
            tf.range(tf.shape(self.history_length)[0]), self.history_length - 1
        ],
                                      axis=1)
        self.final_states = tf.gather_nd(self.h_final, final_temporal_idx)
        self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'aisle_ids': self.aisle_id,
            'final_states': self.final_states,
            'predictions': self.final_predictions
        }

        return y_hat
Esempio n. 6
0
    def calculate_outputs(self, x):
        h = lstm_layer(self.x,
                       self.history_length,
                       self.lstm_size,
                       scope='lstm1')

        self.h_final = time_distributed_dense_layer(h,
                                                    50,
                                                    activation=tf.nn.relu,
                                                    scope='dense0')

        n_components = 3
        params = time_distributed_dense_layer(self.h_final,
                                              n_components * 3,
                                              scope='dense1')
        means, variances, mixing_coefs = tf.split(params, 3, axis=2)

        mixing_coefs = tf.nn.softmax(
            mixing_coefs - tf.reduce_min(mixing_coefs, 2, keep_dims=True))
        variances = tf.exp(variances) + 1e-5

        labels = tf.cast(
            tf.tile(tf.expand_dims(self.next_reorder_size, 2),
                    (1, 1, n_components)), tf.float32)
        n_likelihoods = 1.0 / (tf.sqrt(2 * np.pi * variances)) * tf.exp(
            -tf.square(labels - means) / (2 * variances))
        log_likelihood = -tf.log(
            tf.reduce_sum(mixing_coefs * n_likelihoods, axis=2) + 1e-10)

        self.means = means
        self.variances = variances
        self.mixing_coefs = mixing_coefs
        self.nll = log_likelihood

        samples = tf.cast(tf.reshape(tf.range(25), (1, 1, 1, 25)), tf.float32)

        means = tf.tile(tf.expand_dims(means, 3), (1, 1, 1, 25))
        variances = tf.tile(tf.expand_dims(variances, 3), (1, 1, 1, 25))
        mixing_coefs = tf.tile(tf.expand_dims(mixing_coefs, 3), (1, 1, 1, 25))
        sample_n_likelihoods = 1.0 / (tf.sqrt(2 * np.pi * variances)) * tf.exp(
            -tf.square(samples - means) / (2 * variances))

        self.sample_log_likelihoods = tf.reduce_sum(mixing_coefs *
                                                    sample_n_likelihoods,
                                                    axis=2)
        final_temporal_idx = tf.stack([
            tf.range(tf.shape(self.history_length)[0]), self.history_length - 1
        ],
                                      axis=1)
        self.final_states = tf.gather_nd(self.h_final, final_temporal_idx)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'final_states': self.final_states,
            'predictions': self.sample_log_likelihoods
        }

        return self.nll
    def calculate_outputs(self, x):
        h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1')
        h_final = time_distributed_dense_layer(h,
                                               50,
                                               activation=tf.nn.relu,
                                               scope='dense-1')

        n_components = 3
        params = time_distributed_dense_layer(h_final,
                                              n_components * 3,
                                              scope='dense-2')
        means, variances, mixing_coefs = tf.split(params, 3, axis=2)

        mixing_coefs = tf.nn.softmax(
            mixing_coefs - tf.reduce_min(mixing_coefs, 2, keep_dims=True))
        variances = tf.exp(variances) + 1e-5

        labels = tf.cast(
            tf.tile(tf.expand_dims(self.next_reorder_size, 2),
                    (1, 1, n_components)), tf.float32)
        n_likelihoods = 1.0 / (tf.sqrt(2 * np.pi * variances)) * tf.exp(
            -tf.square(labels - means) / (2 * variances))
        nlls = -tf.log(
            tf.reduce_sum(mixing_coefs * n_likelihoods, axis=2) + 1e-10)

        sequence_mask = tf.cast(
            tf.sequence_mask(self.history_length, maxlen=100), tf.float32)
        nll = tf.reduce_sum(nlls * sequence_mask) / tf.cast(
            tf.reduce_sum(self.history_length), tf.float32)

        # evaluate likelihood at a sample of discrete points
        samples = tf.cast(tf.reshape(tf.range(25), (1, 1, 1, 25)), tf.float32)
        means = tf.tile(tf.expand_dims(means, 3), (1, 1, 1, 25))
        variances = tf.tile(tf.expand_dims(variances, 3), (1, 1, 1, 25))
        mixing_coefs = tf.tile(tf.expand_dims(mixing_coefs, 3), (1, 1, 1, 25))
        n_sample_likelihoods = 1.0 / (tf.sqrt(2 * np.pi * variances)) * tf.exp(
            -tf.square(samples - means) / (2 * variances))
        sample_nlls = -tf.log(
            tf.reduce_sum(mixing_coefs * n_sample_likelihoods, axis=2) + 1e-10)

        final_temporal_idx = tf.stack([
            tf.range(tf.shape(self.history_length)[0]), self.history_length - 1
        ],
                                      axis=1)
        final_states = tf.gather_nd(h_final, final_temporal_idx)
        final_sample_nlls = tf.gather_nd(sample_nlls, final_temporal_idx)
        self.final_states = tf.concat([final_states, final_sample_nlls],
                                      axis=1)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'final_states': self.final_states
        }

        return nll
Esempio n. 8
0
    def initialize_decode_params(self, x, features):
        # x shape (batch_size, seq_len , 1)
        # features (batch_size, num_decode_step, 79)
        # after concat, x shape is (batch_size, num_decode_steps, 80)
        x = tf.concat([x, features], axis=2)

        # shape (batch_size, num_decode_step, residual_channels)
        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-decode')

        skip_outputs = []
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(
                zip(self.dilations, self.filter_widths)):
            # convolution with dilation
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2 * self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-decode-{}'.format(i))
            # split into filter and gate
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            # combine by multiplying
            dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate)

            # change shape from (batch_size, num_decode_step, residual_channel) to
            # (batch_size, num_decode_step, residual_channel + skip_channel)
            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-decode-{}'.format(i))
            # split
            # skips shape (batch_size, num_decode_step, skip_channels)
            # residual shape (batch_size, num_decode_step, residual_channels)
            skips, residuals = tf.split(
                outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        # Turn skip_outputs into y_hat
        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))
        h = time_distributed_dense_layer(skip_outputs,
                                         128,
                                         scope='dense-decode-1',
                                         activation=tf.nn.relu)
        y_hat = time_distributed_dense_layer(h, 1, scope='dense-decode-2')
        return y_hat
Esempio n. 9
0
    def encode(self, x, features):
        # shape (batch_size, seq_len, 1 + 17 = 18 )
        x = tf.concat([x, features], axis=2)

        # Use tf.einsum to change shape (batch_size, seq_len, 18) to (batch_size, seq_len, residual_channels)
        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-encode')

        # Use for encoding result
        skip_outputs = []
        # Convolution results based on inputs
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(
                zip(self.dilations, self.filter_widths)):
            # convolution with dilation
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2 * self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-encode-{}'.format(i))
            # split dilated conv into filter and gate, and combine them by multiplying
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate)

            # change dilated_conv the shape(batch_size, seq_len, residual_channels) to
            # (batch_size, seq_len, residual_channels + skip_channels)
            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-encode-{}'.format(i))
            # split into skips and residuals
            skips, residuals = tf.split(
                outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        # skip_outputs shape (batch_size, seq_len, 32*24=768) -> (batch_size, seq_len, 1)
        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))
        h = time_distributed_dense_layer(skip_outputs,
                                         128,
                                         scope='dense-encode-1',
                                         activation=tf.nn.relu)
        y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2')

        # conv_inputs shape(batch_size, seq_len, residual_channel) * 25
        return y_hat, conv_inputs[:-1]
Esempio n. 10
0
    def encode(self, x, features):
        """
        Encode 
        :param x: time series values
        :param features: extra features
        :return: 
        """
        x = tf.concat([x, features], axis=2)

        # output from time distributed dense layer, use as the input to convolution layer
        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-encode'
        )

        skip_outputs = []
        conv_inputs = [inputs]

        # stack multiple convolutions
        for i, (dilation, filter_width) in enumerate(zip(self.dilations, self.filter_widths)):
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2*self.residual_channels,  # double the convolution channels
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-encode-{}'.format(i)
            )

            # gated activation units based on output from dilated convolutions
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter)*tf.nn.sigmoid(conv_gate)

            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-encode-{}'.format(i)
            )
            skips, residuals = tf.split(outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        # skip connections from each layer to the final dense layer
        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))
        h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-encode-1', activation=tf.nn.relu)
        y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2')

        return y_hat, conv_inputs[:-1]
Esempio n. 11
0
    def encode(self, x, features):
        """
        返回值:
        y_hat:skip(每次残差) concat后全连接成输出的预测值
        conv_inputs=[inputs] :每层残差与输入的和 组成的数组(去除最后一层)
        :param x: log_x_encode 销量的对数
        :param features: 需要encoding的其他特征
        :return:
        """
        # batch,seq,1+17
        x = tf.concat([x, features], axis=2)

        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-encode'
        )
        # 保存每一步的skip
        skip_outputs = []
        # 保存每一步的残差
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(zip(self.dilations, self.filter_widths)):
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2 * self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-encode-{}'.format(i)
            )
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate)

            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-encode-{}'.format(i)
            )
            skips, residuals = tf.split(outputs, [self.skip_channels, self.residual_channels], axis=2)
            # 残差网累加作为下一层输入
            inputs += residuals
            conv_inputs.append(inputs)
            # skip 合并
            skip_outputs.append(skips)
        # skip 合并
        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))
        h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-encode-1', activation=tf.nn.relu)
        y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2')

        return y_hat, conv_inputs[:-1]
Esempio n. 12
0
    def encode(self, x, features):
        x = tf.concat([x, features], axis=2)

        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-encode',
            reuse=tf.AUTO_REUSE)

        skip_outputs = []
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(
                zip(self.dilations, self.filter_widths)):
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2 * self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-encode-{}'.format(i),
                reuse=tf.AUTO_REUSE)
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate)

            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-encode-{}'.format(i),
                reuse=tf.AUTO_REUSE)
            skips, residuals = tf.split(
                outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))

        h = time_distributed_dense_layer(skip_outputs,
                                         128,
                                         scope='dense-encode-1',
                                         activation=tf.nn.relu,
                                         reuse=tf.AUTO_REUSE)
        y_hat = time_distributed_dense_layer(h,
                                             1,
                                             scope='dense-encode-2',
                                             reuse=tf.AUTO_REUSE)

        return y_hat, conv_inputs[:-1]
    def calculate_outputs(self, x):
        h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1')
        h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1')
        y_hat = tf.squeeze(time_distributed_dense_layer(h_final, 1, scope='dense2'), 2)

        final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1)
        self.final_states = tf.gather_nd(h_final, final_temporal_idx)
        self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'final_states': self.final_states,
            'predictions': self.final_predictions
        }

        return y_hat
Esempio n. 14
0
    def initialize_decode_params(self, x, features):
        x = tf.concat([x, features], axis=2)

        h = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='h-init-decode',
        )
        c = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='c-init-decode',
        )

        skip_outputs = []
        conv_inputs = [h]
        for i, (dilation, filter_width) in enumerate(
                zip(self.dilations, self.filter_widths)):
            dilated_conv = temporal_convolution_layer(
                inputs=h,
                output_units=4 * self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-decode-{}'.format(i),
            )
            input_gate, conv_filter, conv_gate, emit_gate = tf.split(
                dilated_conv, 4, axis=2)

            c = tf.nn.sigmoid(input_gate) * c + tf.nn.tanh(
                conv_filter) * tf.nn.sigmoid(conv_gate)
            h = tf.nn.sigmoid(emit_gate) * tf.nn.tanh(c)

            skip_outputs.append(h)
            conv_inputs.append(h)

        skip_outputs = tf.concat(skip_outputs, axis=2)
        h = time_distributed_dense_layer(skip_outputs,
                                         128,
                                         scope='dense-decode-1',
                                         activation=tf.nn.relu)
        y_hat = time_distributed_dense_layer(h, 2, scope='dense-decode-2')
        return y_hat
Esempio n. 15
0
    def calculate_outputs(self, x):
        h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1')
        h = tf.concat([h, x], axis=2)
        h_final = time_distributed_dense_layer(h,
                                               50,
                                               activation=tf.nn.relu,
                                               scope='dense-1')

        n_components = 1
        params = time_distributed_dense_layer(h_final,
                                              n_components * 2,
                                              scope='dense-2',
                                              activation=None)
        ps, mixing_coefs = tf.split(params, 2, axis=2)

        # this is implemented incorrectly, but it still helped...
        mixing_coefs = tf.nn.softmax(
            mixing_coefs - tf.reduce_min(mixing_coefs, 2, keep_dims=True))
        ps = tf.nn.sigmoid(ps)

        labels = tf.tile(tf.expand_dims(self.next_is_ordered, 2),
                         (1, 1, n_components))
        losses = tf.reduce_sum(mixing_coefs * log_loss(labels, ps), axis=2)
        sequence_mask = tf.cast(
            tf.sequence_mask(self.history_length, maxlen=100), tf.float32)
        avg_loss = tf.reduce_sum(losses * sequence_mask) / tf.cast(
            tf.reduce_sum(self.history_length), tf.float32)

        final_temporal_idx = tf.stack([
            tf.range(tf.shape(self.history_length)[0]), self.history_length - 1
        ],
                                      axis=1)
        self.final_states = tf.gather_nd(h_final, final_temporal_idx)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'product_ids': self.product_id,
            'final_states': self.final_states
        }

        return avg_loss
    def calculate_outputs(self, x):
        h = lstm_layer(x, self.history_length, self.lstm_size)
        c = wavenet(x, self.dilations, self.filter_widths, self.skip_channels, self.residual_channels)
        h = tf.concat([h, c, x], axis=2)

        self.h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1')
        y_hat = time_distributed_dense_layer(self.h_final, 1, activation=tf.nn.sigmoid, scope='dense-2')
        y_hat = tf.squeeze(y_hat, 2)

        final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1)
        self.final_states = tf.gather_nd(self.h_final, final_temporal_idx)
        self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'product_ids': self.product_id,
            'final_states': self.final_states,
            'predictions': self.final_predictions
        }

        return y_hat
Esempio n. 17
0
    def calculate_loss(self):
        self.x = tf.placeholder(tf.float32, [None, None, 3])
        self.y = tf.placeholder(tf.float32, [None, None, 3])
        self.x_len = tf.placeholder(tf.int32, [None])
        self.c = tf.placeholder(tf.int32, [None, None])
        self.c_len = tf.placeholder(tf.int32, [None])

        self.sample_tsteps = tf.placeholder(tf.int32, [])
        self.num_samples = tf.placeholder(tf.int32, [])
        self.prime = tf.placeholder(tf.bool, [])
        self.x_prime = tf.placeholder(tf.float32, [None, None, 3])
        self.x_prime_len = tf.placeholder(tf.int32, [None])
        self.bias = tf.placeholder_with_default(
            tf.zeros([self.num_samples], dtype=tf.float32), [None])

        cell = LSTMAttentionCell(
            lstm_size=self.lstm_size,
            num_attn_mixture_components=self.attention_mixture_components,
            """
            Use attention model on alphabet
            """
            attention_values=tf.one_hot(self.c, len(drawing.alphabet)),
            attention_values_lengths=self.c_len,
            num_output_mixture_components=self.output_mixture_components,
            bias=self.bias
        )
        self.initial_state = cell.zero_state(tf.shape(self.x)[0], dtype=tf.float32)
        outputs, self.final_state = tf.nn.dynamic_rnn(
            inputs=self.x,
            cell=cell,
            sequence_length=self.x_len,
            dtype=tf.float32,
            initial_state=self.initial_state,
            scope='rnn'
        )

        """
        use time distrubuted layer for store time singly
        """
        params = time_distributed_dense_layer(outputs, self.output_units, scope='rnn/gmm')
        pis, mus, sigmas, rhos, es = self.parse_parameters(params)
        sequence_loss, self.loss = self.NLL(self.y, self.x_len, pis, mus, sigmas, rhos, es)

        self.sampled_sequence = tf.cond(
            self.prime,
            lambda: self.primed_sample(cell),
            lambda: self.sample(cell)
        )
        return self.loss
Esempio n. 18
0
    def encode(self, x, features):
        x = tf.concat([x, features], axis=2)

        h = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-init',
        )
        c = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='c-init',
        )

        conv_inputs = [h]
        for i, (dilation, filter_width) in enumerate(
                list(zip(self.dilations, self.filter_widths))[:-1]):
            dilated_conv = temporal_convolution_layer(
                inputs=h,
                output_units=4 * self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-encode-{}'.format(i),
            )
            input_gate, conv_filter, conv_gate, emit_gate = tf.split(
                dilated_conv, 4, axis=2)

            c = tf.nn.sigmoid(input_gate) * c + tf.nn.tanh(
                conv_filter) * tf.nn.sigmoid(conv_gate)
            h = tf.nn.sigmoid(emit_gate) * tf.nn.tanh(c)
            conv_inputs.append(h)

        return conv_inputs
Esempio n. 19
0
    def calculate_loss(self):
        self.x = tf.placeholder(tf.float32, [None, None, 3])
        self.y = tf.placeholder(tf.float32, [None, None, 3])
        self.x_len = tf.placeholder(tf.int32, [None])
        self.c = tf.placeholder(tf.int32, [None, None])
        self.c_len = tf.placeholder(tf.int32, [None])

        self.sample_tsteps = tf.placeholder(tf.int32, [])
        self.num_samples = tf.placeholder(tf.int32, [])
        self.prime = tf.placeholder(tf.bool, [])
        self.x_prime = tf.placeholder(tf.float32, [None, None, 3])
        self.x_prime_len = tf.placeholder(tf.int32, [None])
        self.bias = tf.placeholder_with_default(
            tf.zeros([self.num_samples], dtype=tf.float32), [None])

        cell = LSTMAttentionCell(
            lstm_size=self.lstm_size,
            num_attn_mixture_components=self.attention_mixture_components,
            attention_values=tf.one_hot(self.c, len(drawing.alphabet)),
            attention_values_lengths=self.c_len,
            num_output_mixture_components=self.output_mixture_components,
            bias=self.bias
        )
        self.initial_state = cell.zero_state(tf.shape(self.x)[0], dtype=tf.float32)
        outputs, self.final_state = tf.nn.dynamic_rnn(
            inputs=self.x,
            cell=cell,
            sequence_length=self.x_len,
            dtype=tf.float32,
            initial_state=self.initial_state,
            scope='rnn'
        )
        params = time_distributed_dense_layer(outputs, self.output_units, scope='rnn/gmm')
        pis, mus, sigmas, rhos, es = self.parse_parameters(params)
        sequence_loss, self.loss = self.NLL(self.y, self.x_len, pis, mus, sigmas, rhos, es)

        self.sampled_sequence = tf.cond(
            self.prime,
            lambda: self.primed_sample(cell),
            lambda: self.sample(cell)
        )
        return self.loss
Esempio n. 20
0
    def encode(self, x, features):
        """
        Parameters
        ----------
        x: sequence input
            shape = [batch_size, seq_len]
        features: features
            shape = [batch_size, seq_len, num_features]

        Returns
        -------
        y_hat: Tensor
            projected skip outputs (thought vector)
            shape = [batch_size, seq_len, 1]
        conv_inputs: [Tensor]
            outputs of convolution
            length = len(dilations)
            each element has shape [batch_size, seq_len, residual_channels]
        """

        # x.shape = [batch_size, seq_len, num_features + 1]
        x = tf.concat([x, features], axis=2)
        """
        Pass initial inputs through dense layer so that they'll have
        the same shape as the residuals. We're expanding the number
        of "channels".

        inputs.shape = [batch_size, seq_len, residual_channels]
        """
        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-encode')

        skip_outputs = []
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(
                zip(self.dilations, self.filter_widths)):

            # dilated_conv.shape = [batch_size, seq_len, 2*residual_channels]
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2 * self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-encode-{}'.format(i))

            # dilated_conv.shape = [batch_size, seq_len, residual_channels]
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate)

            # Pass dilated_conv through dense layer to expand the number of channels.
            # outputs.shape = [batch_size, seq_len, skip_channels + residual_channels]
            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-encode-{}'.format(i))

            # skips.shape = [batch_size, seq_len, skip_channels]
            # residuals.shape = [batch_size, seq_len, residual_channels]
            skips, residuals = tf.split(
                outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        # skip_output.shape = [batch_size, seq_len, len(dilations)*skip_channels]
        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))

        # h.shape = [batch_size, seq_len, 128]
        h = time_distributed_dense_layer(skip_outputs,
                                         128,
                                         scope='dense-encode-1',
                                         activation=tf.nn.relu)

        # y_hat.shape = [batch_size, seq_len, 1]
        y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2')

        return y_hat, conv_inputs[:-1]
Esempio n. 21
0
    def wavenet_logits_target(self):

        x = self.x

        inputs, w, b = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='target-x-proj-encode',
            reuse=False)

        self.w_target["wf0"] = w
        self.w_target["bf0"] = b

        skip_outputs = []
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(
                zip(self.dilations, self.filter_widths)):
            dilated_conv, w, b = temporal_convolution_layer(
                inputs=inputs,
                output_units=2 * self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='target-dilated-conv-encode-{}'.format(i),
                reuse=tf.AUTO_REUSE)
            self.w_target["wc{}".format(i)] = w
            self.w_target["wb{}".format(i)] = b

            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate)

            outputs, w, b = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='target-dilated-conv-proj-encode-{}'.format(i),
                reuse=tf.AUTO_REUSE)
            self.w_target["wtf-{}".format(i)] = w
            self.w_target["btf-{}".format(i)] = b

            skips, residuals = tf.split(
                outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))

        h, w, b = time_distributed_dense_layer(skip_outputs,
                                               128,
                                               scope='target-dense-encode-1',
                                               activation=tf.nn.relu,
                                               reuse=tf.AUTO_REUSE)
        self.w_target["wtf1"] = w
        self.w_target["btf1"] = b

        h, w, b = time_distributed_dense_layer(h,
                                               3,
                                               scope='target-dense-encode-2',
                                               activation=tf.nn.relu,
                                               reuse=tf.AUTO_REUSE)
        self.w_target["wtf2"] = w
        self.w_target["btf2"] = b

        s = h.get_shape().as_list()
        out_flat = tf.reshape(h, [-1, reduce(lambda x, y: x * y, s[1:])])

        h, w, b = fully_connected_layer(out_flat,
                                        128,
                                        scope_name='target-dense-encode-1',
                                        activation=tf.nn.relu)
        self.w_target["wf1"] = w
        self.w_target["bf1"] = b

        out, w, b = fully_connected_layer(h,
                                          self.n_actions,
                                          scope_name='target-dense-encode-2',
                                          activation=None)

        self.w_target["wout"] = w
        self.w_target["bout"] = b

        self.q_target_out = out
        self.q_target_action = tf.argmax(self.q_target_out, axis=1)