Beispiel #1
0
    def _setup_functions(self, X_sym, y_sym, X_mask, y_mask, layer_sizes):
        recurrent_sizes = layer_sizes[:-1]
        input_variable, params = stack_forward_layers(
            X_sym, X_mask, recurrent_sizes, build_recurrent_lstm_layer,
            self.random_state)
        sz = recurrent_sizes[-1]

        mu, mu_params = build_linear_layer(
            sz, self.n_mixture_components * self.n_features,
            input_variable, self.random_state)
        params = params + mu_params
        var, var_params = build_linear_layer(
            sz, self.n_mixture_components * self.n_features,
            input_variable,
            self.random_state)
        params = params + var_params
        coeff, coeff_params = build_linear_layer(
            sz, self.n_mixture_components, input_variable,
            self.random_state)
        params = params + coeff_params

        mu_shp = mu.shape
        var_shp = var.shape
        coeff_shp = coeff.shape
        y_shp = y_sym.shape

        # TODO: Masking!

        # Reshape everything to 2D
        coeff = coeff.reshape([coeff_shp[0] * coeff_shp[1], coeff_shp[2]])
        coeff = T.nnet.softmax(coeff)
        y_r = y_sym.reshape([y_shp[0] * y_shp[1], y_shp[2]])
        mu = mu.reshape([mu_shp[0] * mu_shp[1], mu_shp[2]])
        var = var.reshape([var_shp[0] * var_shp[1], var_shp[2]])

        # Reshape using 2D shapes...
        y_r = y_r.dimshuffle(0, 1, 'x')
        mu = mu.reshape([mu.shape[0],
                        T.cast(mu.shape[1] / coeff.shape[-1], 'int32'),
                        coeff.shape[-1]])
        var = var.reshape([var.shape[0],
                           T.cast(var.shape[1] / coeff.shape[-1], 'int32'),
                           coeff.shape[-1]])

        # Calculate GMM cost with minimum tolerance
        log_var = T.log(T.nnet.softplus(var) + 1E-15)
        cost = -0.5 * T.sum(T.sqr(y_r - mu) * T.exp(-log_var) + log_var
                            + T.log(2 * np.pi), axis=1)

        cost = -logsumexp(T.log(coeff) + cost, axis=1).sum()
        grads = T.grad(cost, params)
        self.opt_ = self.optimizer(params)
        updates = self.opt_.updates(
            params, grads, self.learning_rate, self.momentum)

        self.fit_function = theano.function(inputs=[X_sym, y_sym, X_mask,
                                                    y_mask],
                                            outputs=cost,
                                            updates=updates,
                                            on_unused_input="ignore")

        self.loss_function = theano.function(inputs=[X_sym, y_sym, X_mask,
                                                     y_mask],
                                             outputs=cost,
                                             on_unused_input="ignore")

        self.generate_function = theano.function(inputs=[X_sym, X_mask],
                                                 outputs=[mu, log_var, coeff],
                                                 on_unused_input="ignore")
Beispiel #2
0
    def _setup_functions(self, X_sym, y_sym, X_mask, y_mask, layer_sizes):
        recurrent_sizes = layer_sizes[:-1]
        input_variable, params = stack_forward_layers(
            X_sym, X_mask, recurrent_sizes, build_recurrent_lstm_layer,
            self.random_state)
        sz = recurrent_sizes[-1]

        mu, mu_params = build_linear_layer(
            sz, self.n_mixture_components * self.n_features, input_variable,
            self.random_state)
        params = params + mu_params
        var, var_params = build_linear_layer(
            sz, self.n_mixture_components * self.n_features, input_variable,
            self.random_state)
        params = params + var_params
        coeff, coeff_params = build_linear_layer(sz, self.n_mixture_components,
                                                 input_variable,
                                                 self.random_state)
        params = params + coeff_params

        mu_shp = mu.shape
        var_shp = var.shape
        coeff_shp = coeff.shape
        y_shp = y_sym.shape

        # TODO: Masking!

        # Reshape everything to 2D
        coeff = coeff.reshape([coeff_shp[0] * coeff_shp[1], coeff_shp[2]])
        coeff = T.nnet.softmax(coeff)
        y_r = y_sym.reshape([y_shp[0] * y_shp[1], y_shp[2]])
        mu = mu.reshape([mu_shp[0] * mu_shp[1], mu_shp[2]])
        var = var.reshape([var_shp[0] * var_shp[1], var_shp[2]])

        # Reshape using 2D shapes...
        y_r = y_r.dimshuffle(0, 1, 'x')
        mu = mu.reshape([
            mu.shape[0],
            T.cast(mu.shape[1] / coeff.shape[-1], 'int32'), coeff.shape[-1]
        ])
        var = var.reshape([
            var.shape[0],
            T.cast(var.shape[1] / coeff.shape[-1], 'int32'), coeff.shape[-1]
        ])

        # Calculate GMM cost with minimum tolerance
        log_var = T.log(T.nnet.softplus(var) + 1E-15)
        cost = -0.5 * T.sum(
            T.sqr(y_r - mu) * T.exp(-log_var) + log_var + T.log(2 * np.pi),
            axis=1)

        cost = -logsumexp(T.log(coeff) + cost, axis=1).sum()
        grads = T.grad(cost, params)
        self.opt_ = self.optimizer(params)
        updates = self.opt_.updates(params, grads, self.learning_rate,
                                    self.momentum)

        self.fit_function = theano.function(
            inputs=[X_sym, y_sym, X_mask, y_mask],
            outputs=cost,
            updates=updates,
            on_unused_input="ignore")

        self.loss_function = theano.function(
            inputs=[X_sym, y_sym, X_mask, y_mask],
            outputs=cost,
            on_unused_input="ignore")

        self.generate_function = theano.function(inputs=[X_sym, X_mask],
                                                 outputs=[mu, log_var, coeff],
                                                 on_unused_input="ignore")
Beispiel #3
0
    def _setup_functions(self, X_sym, y_sym, X_mask, y_mask, layer_sizes):
        recurrent_sizes = layer_sizes[:-1]
        input_variable, params = stack_forward_layers(
            X_sym, X_mask, recurrent_sizes, build_recurrent_lstm_layer,
            self.random_state)
        sz = recurrent_sizes[-1]

        # Hardcoded, works for 3 dims/ handwriting *only*!
        # Up/down channel
        binary, binary_params = build_linear_layer(
            sz, 1, input_variable, self.random_state)
        params = params + binary_params

        # Means
        mu, mu_params = build_linear_layer(
            sz, self.n_mixture_components * 2,
            input_variable, self.random_state)
        params = params + mu_params

        # Diagonal
        var, var_params = build_linear_layer(
            sz, self.n_mixture_components * 2,
            input_variable,
            self.random_state)
        params = params + var_params

        # Off-diagonal
        corr, corr_params = build_linear_layer(
            sz,
            self.n_mixture_components * 1,
            input_variable,
            self.random_state)
        params = params + corr_params

        coeff, coeff_params = build_linear_layer(
            sz, self.n_mixture_components, input_variable,
            self.random_state)
        params = params + coeff_params

        mu_shp = mu.shape
        var_shp = var.shape
        corr_shp = corr.shape
        coeff_shp = coeff.shape
        y_shp = y_sym.shape

        # TODO: Masking!
        # Reshape everything to 2D
        coeff = coeff.reshape([coeff_shp[0] * coeff_shp[1], coeff_shp[2]])
        coeff = T.nnet.softmax(coeff)

        y_r = y_sym.reshape([y_shp[0] * y_shp[1], y_shp[2]])
        y_b = y_r[:, 0]
        y_r = y_r[:, 1:]
        mu = mu.reshape([mu_shp[0] * mu_shp[1], mu_shp[2]])
        var = var.reshape([var_shp[0] * var_shp[1], var_shp[2]])
        corr = corr.reshape([corr_shp[0] * corr_shp[1], corr_shp[2]])

        log_var = T.log(T.nnet.softplus(var) + 1E-9)
        # Negative due to sigmoid? AG paper has positive exponential
        binary = T.nnet.sigmoid(-binary)
        corr = T.tanh(corr)
        binary = binary.ravel()

        # Reshape using 2D shapes...
        y_r = y_r.dimshuffle(0, 1, 'x')
        mu = mu.reshape([mu.shape[0],
                        T.cast(mu.shape[1] / coeff.shape[-1], 'int32'),
                        coeff.shape[-1]])
        log_var = log_var.reshape([log_var.shape[0],
                           T.cast(log_var.shape[1] / coeff.shape[-1], 'int32'),
                           coeff.shape[-1]])
        corr = corr.reshape([corr.shape[0],
                            T.cast(corr.shape[1] / coeff.shape[-1], 'int32'),
                            coeff.shape[-1]])
        # Exact AG cost - see the paper "Generating Sequences with Recurrent
        # Neural Networks", Alex Graves
        # http://arxiv.org/pdf/1308.0850v5.pdf
        x1 = X_sym[:, :, 1]
        x1 = T.addbroadcast(x1, 1)
        x2 = X_sym[:, :, 2]
        x2 = T.addbroadcast(x2, 1)
        mu1 = mu[:, 0, :]
        mu2 = mu[:, 1, :]
        log_var1 = log_var[:, 0, :]
        log_var2 = log_var[:, 1, :]
        # Binary cost
        c_b = -y_b * T.log(binary + 1E-9) - (1 - y_b) * T.log(1 - binary + 1E-9)
        # First part of log Gaussian
        c_g1 = -T.log(2 * np.pi) - log_var1 - log_var2 - .5 * T.log(
            1 - T.sum(corr, axis=1) ** 2 + 1E-9)
        # Multiplier on z
        c_g2 = -.5 * 1. / (1 - T.sum(corr, axis=1) ** 2)
        z = (x1 - mu1) ** 2 / T.exp(log_var1) ** 2
        z += (x2 - mu2) ** 2 / T.exp(log_var2) ** 2
        z -= 2 * T.sum(corr, axis=1) * (x1 - mu1) * (x2 - mu2) / (
            T.exp(log_var1) * T.exp(log_var2))
        cost = c_g1 + c_g2 * z
        cost = T.sum(-logsumexp(T.log(coeff) + cost, axis=1) + c_b)

        grads = T.grad(cost, params)
        self.opt_ = self.optimizer(params)
        updates = self.opt_.updates(
            params, grads, self.learning_rate, self.momentum)

        self.fit_function = theano.function(inputs=[X_sym, y_sym, X_mask,
                                                    y_mask],
                                            outputs=cost,
                                            updates=updates,
                                            on_unused_input="ignore")

        self.loss_function = theano.function(inputs=[X_sym, y_sym, X_mask,
                                                     y_mask],
                                             outputs=cost,
                                             on_unused_input="ignore")

        self.generate_function = theano.function(inputs=[X_sym, X_mask],
                                                 outputs=[binary, mu, log_var,
                                                          corr, coeff],
                                                 on_unused_input="ignore")
Beispiel #4
0
    def _setup_functions(self, X_sym, y_sym, X_mask, y_mask, layer_sizes):
        recurrent_sizes = layer_sizes[:-1]
        input_variable, params = stack_forward_layers(
            X_sym, X_mask, recurrent_sizes, build_recurrent_lstm_layer,
            self.random_state)
        sz = recurrent_sizes[-1]

        # Hardcoded, works for 3 dims/ handwriting *only*!
        # Up/down channel
        binary, binary_params = build_linear_layer(sz, 1, input_variable,
                                                   self.random_state)
        params = params + binary_params

        # Means
        mu, mu_params = build_linear_layer(sz, self.n_mixture_components * 2,
                                           input_variable, self.random_state)
        params = params + mu_params

        # Diagonal
        var, var_params = build_linear_layer(sz, self.n_mixture_components * 2,
                                             input_variable, self.random_state)
        params = params + var_params

        # Off-diagonal
        corr, corr_params = build_linear_layer(sz,
                                               self.n_mixture_components * 1,
                                               input_variable,
                                               self.random_state)
        params = params + corr_params

        coeff, coeff_params = build_linear_layer(sz, self.n_mixture_components,
                                                 input_variable,
                                                 self.random_state)
        params = params + coeff_params

        mu_shp = mu.shape
        var_shp = var.shape
        corr_shp = corr.shape
        coeff_shp = coeff.shape
        y_shp = y_sym.shape

        # TODO: Masking!
        # Reshape everything to 2D
        coeff = coeff.reshape([coeff_shp[0] * coeff_shp[1], coeff_shp[2]])
        coeff = T.nnet.softmax(coeff)

        y_r = y_sym.reshape([y_shp[0] * y_shp[1], y_shp[2]])
        y_b = y_r[:, 0]
        y_r = y_r[:, 1:]
        mu = mu.reshape([mu_shp[0] * mu_shp[1], mu_shp[2]])
        var = var.reshape([var_shp[0] * var_shp[1], var_shp[2]])
        corr = corr.reshape([corr_shp[0] * corr_shp[1], corr_shp[2]])

        log_var = T.log(T.nnet.softplus(var) + 1E-9)
        # Negative due to sigmoid? AG paper has positive exponential
        binary = T.nnet.sigmoid(-binary)
        corr = T.tanh(corr)
        binary = binary.ravel()

        # Reshape using 2D shapes...
        y_r = y_r.dimshuffle(0, 1, 'x')
        mu = mu.reshape([
            mu.shape[0],
            T.cast(mu.shape[1] / coeff.shape[-1], 'int32'), coeff.shape[-1]
        ])
        log_var = log_var.reshape([
            log_var.shape[0],
            T.cast(log_var.shape[1] / coeff.shape[-1], 'int32'),
            coeff.shape[-1]
        ])
        corr = corr.reshape([
            corr.shape[0],
            T.cast(corr.shape[1] / coeff.shape[-1], 'int32'), coeff.shape[-1]
        ])
        # Exact AG cost - see the paper "Generating Sequences with Recurrent
        # Neural Networks", Alex Graves
        # http://arxiv.org/pdf/1308.0850v5.pdf
        x1 = X_sym[:, :, 1]
        x1 = T.addbroadcast(x1, 1)
        x2 = X_sym[:, :, 2]
        x2 = T.addbroadcast(x2, 1)
        mu1 = mu[:, 0, :]
        mu2 = mu[:, 1, :]
        log_var1 = log_var[:, 0, :]
        log_var2 = log_var[:, 1, :]
        # Binary cost
        c_b = -y_b * T.log(binary + 1E-9) - (1 - y_b) * T.log(1 - binary +
                                                              1E-9)
        # First part of log Gaussian
        c_g1 = -T.log(2 * np.pi) - log_var1 - log_var2 - .5 * T.log(
            1 - T.sum(corr, axis=1)**2 + 1E-9)
        # Multiplier on z
        c_g2 = -.5 * 1. / (1 - T.sum(corr, axis=1)**2)
        z = (x1 - mu1)**2 / T.exp(log_var1)**2
        z += (x2 - mu2)**2 / T.exp(log_var2)**2
        z -= 2 * T.sum(corr, axis=1) * (x1 - mu1) * (x2 - mu2) / (
            T.exp(log_var1) * T.exp(log_var2))
        cost = c_g1 + c_g2 * z
        cost = T.sum(-logsumexp(T.log(coeff) + cost, axis=1) + c_b)

        grads = T.grad(cost, params)
        self.opt_ = self.optimizer(params)
        updates = self.opt_.updates(params, grads, self.learning_rate,
                                    self.momentum)

        self.fit_function = theano.function(
            inputs=[X_sym, y_sym, X_mask, y_mask],
            outputs=cost,
            updates=updates,
            on_unused_input="ignore")

        self.loss_function = theano.function(
            inputs=[X_sym, y_sym, X_mask, y_mask],
            outputs=cost,
            on_unused_input="ignore")

        self.generate_function = theano.function(
            inputs=[X_sym, X_mask],
            outputs=[binary, mu, log_var, corr, coeff],
            on_unused_input="ignore")