Python Linear.initialize Exemples, blocks.bricks.simple.Linear.initialize Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : lstm_gmm_blocks.py Projet : olimastro/ift6266

    def build_theano_functions(self):
        x = T.fmatrix('time_sequence')
        x = x.reshape((self.batch_dim, self.sequence_dim, self.time_dim))

        y = x[:,1:self.sequence_dim,:]
        x = x[:,:self.sequence_dim-1,:]

        # if we try to include the spectrogram features
        spec_dims = 0
        if self.image_size is not None :
            print "Convolution activated"
            self.init_conv()
            spec = T.ftensor4('spectrogram')
            spec_features, spec_dims = self.conv.build_conv_layers(spec)
            print "Conv final dims =", spec_dims
            spec_dims = np.prod(spec_dims)
            spec_features = spec_features.reshape(
                (self.batch_dim, self.sequence_dim-1, spec_dims))
            x = T.concatenate([x, spec_features], axis=2)

        layers_input = [x]
        dims =np.array([self.time_dim + spec_dims])
        for dim in self.lstm_layers_dim :
            dims = np.append(dims, dim)
        print "Dimensions =", dims

        # layer is just an index of the layer
        for layer in range(len(self.lstm_layers_dim)) :

            # before the cell, input, forget and output gates, x needs to
            # be transformed
            linear = Linear(dims[layer],
                            dims[layer+1]*4,
                            weights_init=Orthogonal(self.orth_scale),
                            biases_init=Constant(0),
                            name="linear"+str(layer))
            linear.initialize()
            lstm_input = linear.apply(layers_input[layer])

            # the lstm wants batch X sequence X time
            lstm = LSTM(
                dim=dims[layer+1],
                weights_init=IsotropicGaussian(mean=0.,std=0.5),
                biases_init=Constant(1),
                name="lstm"+str(layer))
            lstm.initialize()
            # hack to use Orthogonal on lstm w_state
            lstm.W_state.set_value(
                self.orth_scale*Orthogonal().generate(np.random, lstm.W_state.get_value().shape))
            h, _dummy = lstm.apply(lstm_input)

            layers_input.append(h)

        # this is where Alex Graves' paper starts
        print "Last linear transform dim :", dims[1:].sum()
        output_transform = Linear(dims[1:].sum(),
                                  self.output_dim,
                                  weights_init=Orthogonal(self.orth_scale),
                                  use_bias=False,
                                  name="output_transform")
        output_transform.initialize()
        if len(self.lstm_layers_dim) == 1 :
            print "hallo there, only one layer speaking"
            y_hat = output_transform.apply(layers_input[-1])
        else :
            y_hat = output_transform.apply(T.concatenate(layers_input[1:], axis=2))

        # transforms to find each gmm params (mu, pi, sig)
        # small hack to softmax a 3D tensor
        pis = T.reshape(
                    T.nnet.softmax(
                        T.reshape(y_hat[:,:,:self.gmm_dim], ((self.sequence_dim-1)*self.batch_dim, self.gmm_dim))),
                    (self.batch_dim, (self.sequence_dim-1), self.gmm_dim))
        sig = T.exp(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+1e-6
        mus = y_hat[:,:,self.gmm_dim*2:]

        pis = pis[:,:,:,np.newaxis]
        mus = mus[:,:,:,np.newaxis]
        sig = sig[:,:,:,np.newaxis]
        y = y[:,:,np.newaxis,:]

        y = T.patternbroadcast(y, (False, False, True, False))
        mus = T.patternbroadcast(mus, (False, False, False, True))
        sig = T.patternbroadcast(sig, (False, False, False, True))

        # sum likelihood with targets
        # see blog for this crazy Pr() = sum log sum prod
        # axes :: (batch, sequence, mixture, time)
        expo_term = -0.5*((y-mus)**2)/sig**2
        coeff = T.log(T.maximum(1./(T.sqrt(2.*np.pi)*sig), EPS))
        #coeff = T.log(1./(T.sqrt(2.*np.pi)*sig))
        sequences = coeff + expo_term
        log_sequences = T.log(pis + EPS) + T.sum(sequences, axis=3, keepdims=True)

        log_sequences_max = T.max(log_sequences, axis=2, keepdims=True)

        LL = -(log_sequences_max + T.log(EPS + T.sum(T.exp(log_sequences - log_sequences_max), axis=2, keepdims=True))).mean()
        LL.name = "summed_likelihood"

        model = Model(LL)
        self.model = model
        parameters = model.parameters

        algorithm = GradientDescent(
            cost=LL,
            parameters=model.parameters,
            step_rule=Adam())

        f = theano.function([x],[pis, sig, mus])

        return algorithm, f

Exemple #2

0

Afficher le fichier

    def build_theano_functions(self):
        x = T.ftensor3('x')  # shape of input : batch X time X value
        y = T.ftensor3('y')
        z = T.ftensor3('z')

        layers_input = [x]
        dims = np.array([self.input_dim])
        for dim in self.lstm_layers_dim:
            dims = np.append(dims, dim)
        print "Dimensions =", dims

        # layer is just an index of the layer
        for layer in range(len(self.lstm_layers_dim)):

            # before the cell, input, forget and output gates, x needs to
            # be transformed
            linear = Linear(
                dims[layer],
                dims[layer + 1] * 4,
                #weights_init=Uniform(mean=data_mean, std=1),
                weights_init=IsotropicGaussian(mean=1., std=1),
                biases_init=Constant(0),
                name="linear" + str(layer))
            linear.initialize()
            lstm_input = linear.apply(layers_input[layer])

            # the lstm wants batch X time X value
            lstm = LSTM(dim=dims[layer + 1],
                        weights_init=IsotropicGaussian(mean=0., std=0.5),
                        biases_init=Constant(1),
                        name="lstm" + str(layer))
            lstm.initialize()
            # hack to use Orthogonal on lstm w_state
            lstm.W_state.set_value(Orthogonal().generate(
                np.random,
                lstm.W_state.get_value().shape))
            h, _dummy = lstm.apply(lstm_input)

            layers_input.append(h)

        # the idea is to have one gaussian parametrize every frequency bin
        print "Last linear transform dim :", dims[1:].sum()
        output_transform = Linear(
            dims[1:].sum(),
            self.output_dim,
            weights_init=IsotropicGaussian(mean=0., std=1),
            biases_init=Constant(0),
            #use_bias=False,
            name="output_transform")
        output_transform.initialize()
        if len(self.lstm_layers_dim) == 1:
            print "hallo there, only one layer speaking"
            y_hat = output_transform.apply(layers_input[-1])
        else:
            y_hat = output_transform.apply(
                T.concatenate(layers_input[1:], axis=2))

        sig = T.nnet.relu(y_hat[:, :, :self.output_dim / 2]) + 0.05
        mus = y_hat[:, :, self.output_dim / 2:]

        # sum likelihood with targets
        # sum inside log accross mixtures, sum outside log accross time
        inside_expo = -0.5 * ((y - mus)**2) / sig**2
        expo = T.exp(inside_expo)
        coeff = 1. / (T.sqrt(2. * np.pi) * sig)
        inside_log = T.log(coeff * expo)
        inside_log_max = T.max(inside_log, axis=2, keepdims=True)
        LL = -(inside_log_max + T.log(
            T.sum(T.exp(inside_log - inside_log_max), axis=2,
                  keepdims=True))).sum()

        #zinside_expo = -0.5*((z-mus)**2)/sig**2
        #zexpo = T.exp(zinside_expo)
        #zcoeff = pis*(1./(T.sqrt(2.*np.pi)*sig))
        #zinside_log = (zcoeff*zexpo).sum(axis=2)
        #zLL = -(T.log(zinside_log)).sum()

        model = Model(LL)
        self.model = model
        parameters = model.parameters

        grads = T.grad(LL, parameters)
        updates = []
        lr = T.scalar('lr')
        for i in range(len(grads)):
            #updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))
            updates.append(
                tuple([parameters[i], parameters[i] - lr * grads[i]]))

        #gradf = theano.function([x, y],[LL],updates=updates, mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
        if self.debug:
            gradf = theano.function([x, y, lr], [LL, mus, sig],
                                    updates=updates)
        else:
            #gradf = theano.function([x, y, z],[zLL],updates=updates)
            gradf = theano.function([x, y, lr], [LL], updates=updates)
        f = theano.function([x], [sig, mus])

        return gradf, f

Exemple #3

0

Afficher le fichier

    def build_theano_functions(self):
        x = T.ftensor3('x')  # shape of input : batch X time X value
        y = T.ftensor4('y')

        layers_input = [x]
        dims = np.array([self.time_dim])
        for dim in self.lstm_layers_dim:
            dims = np.append(dims, dim)
        print "Dimensions =", dims

        # layer is just an index of the layer
        for layer in range(len(self.lstm_layers_dim)):

            # before the cell, input, forget and output gates, x needs to
            # be transformed
            linear = Linear(
                dims[layer],
                dims[layer + 1] * 4,
                weights_init=Orthogonal(self.orth_scale),
                #weights_init=IsotropicGaussian(mean=1.,std=1),
                biases_init=Constant(0),
                name="linear" + str(layer))
            linear.initialize()
            lstm_input = linear.apply(layers_input[layer])

            # the lstm wants batch X time X value
            lstm = LSTM(dim=dims[layer + 1],
                        weights_init=IsotropicGaussian(mean=0., std=0.5),
                        biases_init=Constant(1),
                        name="lstm" + str(layer))
            lstm.initialize()
            # hack to use Orthogonal on lstm w_state
            lstm.W_state.set_value(
                self.orth_scale *
                Orthogonal().generate(np.random,
                                      lstm.W_state.get_value().shape))
            h, _dummy = lstm.apply(lstm_input)

            layers_input.append(h)

        # this is where Alex Graves' paper starts
        print "Last linear transform dim :", dims[1:].sum()
        output_transform = Linear(
            dims[1:].sum(),
            self.output_dim,
            weights_init=Orthogonal(self.orth_scale),
            #weights_init=IsotropicGaussian(mean=0., std=1),
            use_bias=False,
            name="output_transform")
        output_transform.initialize()
        if len(self.lstm_layers_dim) == 1:
            print "hallo there, only one layer speaking"
            y_hat = output_transform.apply(layers_input[-1])
        else:
            y_hat = output_transform.apply(
                T.concatenate(layers_input[1:], axis=2))

        # transforms to find each gmm params (mu, pi, sig)
        # small hack to softmax a 3D tensor
        #pis = T.reshape(
        #            T.nnet.softmax(
        #                T.nnet.sigmoid(
        #                    T.reshape(y_hat[:,:,0:self.gmm_dim], (self.time_dim*self.batch_dim, self.gmm_dim)))),
        #            (self.batch_dim, self.time_dim, self.gmm_dim))
        pis = T.reshape(
            T.nnet.softmax(
                T.reshape(y_hat[:, :, :self.gmm_dim],
                          (self.sequence_dim * self.batch_dim, self.gmm_dim))),
            (self.batch_dim, self.sequence_dim, self.gmm_dim))
        sig = T.exp(y_hat[:, :, self.gmm_dim:self.gmm_dim * 2]) + 1e-6
        #sig = T.nnet.relu(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+0.1
        #mus = 2.*T.tanh(y_hat[:,:,self.gmm_dim*2:])
        mus = y_hat[:, :, self.gmm_dim * 2:]

        pis = pis[:, :, :, np.newaxis]
        mus = mus[:, :, :, np.newaxis]
        sig = sig[:, :, :, np.newaxis]
        #y = y[:,:,np.newaxis,:]

        y = T.patternbroadcast(y, (False, False, True, False))
        mus = T.patternbroadcast(mus, (False, False, False, True))
        sig = T.patternbroadcast(sig, (False, False, False, True))

        # sum likelihood with targets
        # see blog for this crazy Pr() = sum log sum prod
        # axes :: (batch, sequence, mixture, time)
        expo_term = -0.5 * ((y - mus)**2) / sig**2
        coeff = T.log(T.maximum(1. / (T.sqrt(2. * np.pi) * sig), EPS))
        #coeff = T.log(1./(T.sqrt(2.*np.pi)*sig))
        sequences = coeff + expo_term
        log_sequences = T.log(pis + EPS) + T.sum(
            sequences, axis=3, keepdims=True)

        log_sequences_max = T.max(log_sequences, axis=2, keepdims=True)

        LL = -(log_sequences_max + T.log(EPS + T.sum(
            T.exp(log_sequences - log_sequences_max), axis=2, keepdims=True))
               ).mean()

        model = Model(LL)
        self.model = model
        parameters = model.parameters

        grads = T.grad(LL, parameters)
        updates = []
        lr = T.scalar('lr')
        for i in range(len(grads)):
            #updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))
            updates.append(
                tuple([parameters[i], parameters[i] - lr * grads[i]]))

        #gradf = theano.function([x, y],[LL],updates=updates, mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
        if self.debug:
            gradf = theano.function([x, y, lr], [LL, pis, mus, sig],
                                    updates=updates)
        else:
            #gradf = theano.function([x, y, z],[zLL],updates=updates)
            gradf = theano.function([x, y, lr], [LL], updates=updates)
        f = theano.function([x], [pis, sig, mus])

        return gradf, f

Exemple #4

0

Afficher le fichier

    def build_theano_functions(self):
        x = T.fmatrix('time_sequence')
        x = x.reshape((self.batch_dim, self.sequence_dim, self.time_dim))

        y = x[:, 1:self.sequence_dim, :]
        x = x[:, :self.sequence_dim - 1, :]

        # if we try to include the spectrogram features
        spec_dims = 0
        if self.image_size is not None:
            print "Convolution activated"
            self.init_conv()
            spec = T.ftensor4('spectrogram')
            spec_features, spec_dims = self.conv.build_conv_layers(spec)
            print "Conv final dims =", spec_dims
            spec_dims = np.prod(spec_dims)
            spec_features = spec_features.reshape(
                (self.batch_dim, self.sequence_dim - 1, spec_dims))
            x = T.concatenate([x, spec_features], axis=2)

        layers_input = [x]
        dims = np.array([self.time_dim + spec_dims])
        for dim in self.lstm_layers_dim:
            dims = np.append(dims, dim)
        print "Dimensions =", dims

        # layer is just an index of the layer
        for layer in range(len(self.lstm_layers_dim)):

            # before the cell, input, forget and output gates, x needs to
            # be transformed
            linear = Linear(dims[layer],
                            dims[layer + 1] * 4,
                            weights_init=Orthogonal(self.orth_scale),
                            biases_init=Constant(0),
                            name="linear" + str(layer))
            linear.initialize()
            lstm_input = linear.apply(layers_input[layer])

            # the lstm wants batch X sequence X time
            lstm = LSTM(dim=dims[layer + 1],
                        weights_init=IsotropicGaussian(mean=0., std=0.5),
                        biases_init=Constant(1),
                        name="lstm" + str(layer))
            lstm.initialize()
            # hack to use Orthogonal on lstm w_state
            lstm.W_state.set_value(
                self.orth_scale *
                Orthogonal().generate(np.random,
                                      lstm.W_state.get_value().shape))
            h, _dummy = lstm.apply(lstm_input)

            layers_input.append(h)

        # this is where Alex Graves' paper starts
        print "Last linear transform dim :", dims[1:].sum()
        output_transform = Linear(dims[1:].sum(),
                                  self.output_dim,
                                  weights_init=Orthogonal(self.orth_scale),
                                  use_bias=False,
                                  name="output_transform")
        output_transform.initialize()
        if len(self.lstm_layers_dim) == 1:
            print "hallo there, only one layer speaking"
            y_hat = output_transform.apply(layers_input[-1])
        else:
            y_hat = output_transform.apply(
                T.concatenate(layers_input[1:], axis=2))

        # transforms to find each gmm params (mu, pi, sig)
        # small hack to softmax a 3D tensor
        pis = T.reshape(
            T.nnet.softmax(
                T.reshape(
                    y_hat[:, :, :self.gmm_dim],
                    ((self.sequence_dim - 1) * self.batch_dim, self.gmm_dim))),
            (self.batch_dim, (self.sequence_dim - 1), self.gmm_dim))
        sig = T.exp(y_hat[:, :, self.gmm_dim:self.gmm_dim * 2]) + 1e-6
        mus = y_hat[:, :, self.gmm_dim * 2:]

        pis = pis[:, :, :, np.newaxis]
        mus = mus[:, :, :, np.newaxis]
        sig = sig[:, :, :, np.newaxis]
        y = y[:, :, np.newaxis, :]

        y = T.patternbroadcast(y, (False, False, True, False))
        mus = T.patternbroadcast(mus, (False, False, False, True))
        sig = T.patternbroadcast(sig, (False, False, False, True))

        # sum likelihood with targets
        # see blog for this crazy Pr() = sum log sum prod
        # axes :: (batch, sequence, mixture, time)
        expo_term = -0.5 * ((y - mus)**2) / sig**2
        coeff = T.log(T.maximum(1. / (T.sqrt(2. * np.pi) * sig), EPS))
        #coeff = T.log(1./(T.sqrt(2.*np.pi)*sig))
        sequences = coeff + expo_term
        log_sequences = T.log(pis + EPS) + T.sum(
            sequences, axis=3, keepdims=True)

        log_sequences_max = T.max(log_sequences, axis=2, keepdims=True)

        LL = -(log_sequences_max + T.log(EPS + T.sum(
            T.exp(log_sequences - log_sequences_max), axis=2, keepdims=True))
               ).mean()
        LL.name = "summed_likelihood"

        model = Model(LL)
        self.model = model
        parameters = model.parameters

        algorithm = GradientDescent(cost=LL,
                                    parameters=model.parameters,
                                    step_rule=Adam())

        f = theano.function([x], [pis, sig, mus])

        return algorithm, f

Exemple #5

0

Afficher le fichier

Fichier : __init__.py Projet : EugenePY/tensor-work

    input_dim = 6
    output_dim = 8
    n_classes = 10
    test_data = {x: np.random.normal(size=(n_steps, batch_size, input_dim)
                                     ).astype(np.float32),
                 y: np.random.randint(n_classes, size=(batch_size, )
                                      ).astype(np.int32)}
    inits = {
        'weights_init': IsotropicGaussian(0.1),
        'biases_init': Constant(0.),
    }

    core = CoreNetwork(input_dim=input_dim, dim=output_dim, **inits)
    core.initialize()
    proj = Linear(input_dim=output_dim*2, output_dim=n_classes, **inits)
    proj.initialize()
    out = Softmax()

    state, cell = core.apply(x)

    a = T.concatenate([state, cell], axis=2)
    a = a.reshape((a.shape[0]*a.shape[1], a.shape[2]))

    a = proj.apply(a)
    prop = out.apply(a).reshape((n_steps, batch_size, n_classes))
    pred = prop[-1]
    prop = prop.reshape((n_steps * batch_size, n_classes))

    print prop.eval({x: test_data[x]})
    y_reat = T.repeat(y[None, :], n_steps, axis=0).reshape(
        (n_steps * batch_size, 1))

Exemple #6

0

Afficher le fichier

Fichier : lstm_gmm.py Projet : olimastro/ift6266

    def build_theano_functions(self) :
        x = T.ftensor3('x') # shape of input : batch X time X value
        y = T.ftensor4('y')

        layers_input = [x]
        dims =np.array([self.time_dim])
        for dim in self.lstm_layers_dim :
            dims = np.append(dims, dim)
        print "Dimensions =", dims

        # layer is just an index of the layer
        for layer in range(len(self.lstm_layers_dim)) :

            # before the cell, input, forget and output gates, x needs to
            # be transformed
            linear = Linear(dims[layer],
                            dims[layer+1]*4,
                            weights_init=Orthogonal(self.orth_scale),
                            #weights_init=IsotropicGaussian(mean=1.,std=1),
                            biases_init=Constant(0),
                            name="linear"+str(layer))
            linear.initialize()
            lstm_input = linear.apply(layers_input[layer])

            # the lstm wants batch X time X value
            lstm = LSTM(
                dim=dims[layer+1],
                weights_init=IsotropicGaussian(mean=0.,std=0.5),
                biases_init=Constant(1),
                name="lstm"+str(layer))
            lstm.initialize()
            # hack to use Orthogonal on lstm w_state
            lstm.W_state.set_value(
                self.orth_scale*Orthogonal().generate(np.random, lstm.W_state.get_value().shape))
            h, _dummy = lstm.apply(lstm_input)

            layers_input.append(h)

        # this is where Alex Graves' paper starts
        print "Last linear transform dim :", dims[1:].sum()
        output_transform = Linear(dims[1:].sum(),
                                  self.output_dim,
                                  weights_init=Orthogonal(self.orth_scale),
                                  #weights_init=IsotropicGaussian(mean=0., std=1),
                                  use_bias=False,
                                  name="output_transform")
        output_transform.initialize()
        if len(self.lstm_layers_dim) == 1 :
            print "hallo there, only one layer speaking"
            y_hat = output_transform.apply(layers_input[-1])
        else :
            y_hat = output_transform.apply(T.concatenate(layers_input[1:], axis=2))

        # transforms to find each gmm params (mu, pi, sig)
        # small hack to softmax a 3D tensor
        #pis = T.reshape(
        #            T.nnet.softmax(
        #                T.nnet.sigmoid(
        #                    T.reshape(y_hat[:,:,0:self.gmm_dim], (self.time_dim*self.batch_dim, self.gmm_dim)))),
        #            (self.batch_dim, self.time_dim, self.gmm_dim))
        pis = T.reshape(
                    T.nnet.softmax(
                        T.reshape(y_hat[:,:,:self.gmm_dim], (self.sequence_dim*self.batch_dim, self.gmm_dim))),
                    (self.batch_dim, self.sequence_dim, self.gmm_dim))
        sig = T.exp(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+1e-6
        #sig = T.nnet.relu(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+0.1
        #mus = 2.*T.tanh(y_hat[:,:,self.gmm_dim*2:])
        mus = y_hat[:,:,self.gmm_dim*2:]

        pis = pis[:,:,:,np.newaxis]
        mus = mus[:,:,:,np.newaxis]
        sig = sig[:,:,:,np.newaxis]
        #y = y[:,:,np.newaxis,:]

        y = T.patternbroadcast(y, (False, False, True, False))
        mus = T.patternbroadcast(mus, (False, False, False, True))
        sig = T.patternbroadcast(sig, (False, False, False, True))

        # sum likelihood with targets
        # see blog for this crazy Pr() = sum log sum prod
        # axes :: (batch, sequence, mixture, time)
        expo_term = -0.5*((y-mus)**2)/sig**2
        coeff = T.log(T.maximum(1./(T.sqrt(2.*np.pi)*sig), EPS))
        #coeff = T.log(1./(T.sqrt(2.*np.pi)*sig))
        sequences = coeff + expo_term
        log_sequences = T.log(pis + EPS) + T.sum(sequences, axis=3, keepdims=True)

        log_sequences_max = T.max(log_sequences, axis=2, keepdims=True)

        LL = -(log_sequences_max + T.log(EPS + T.sum(T.exp(log_sequences - log_sequences_max), axis=2, keepdims=True))).mean()

        model = Model(LL)
        self.model = model
        parameters = model.parameters

        grads = T.grad(LL, parameters)
        updates = []
        lr = T.scalar('lr')
        for i in range(len(grads)) :
            #updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))
            updates.append(tuple([parameters[i], parameters[i] - lr*grads[i]]))

        #gradf = theano.function([x, y],[LL],updates=updates, mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
        if self.debug :
            gradf = theano.function([x, y, lr],[LL, pis, mus, sig],updates=updates)
        else :
            #gradf = theano.function([x, y, z],[zLL],updates=updates)
            gradf = theano.function([x, y, lr],[LL],updates=updates)
        f = theano.function([x],[pis, sig, mus])

        return gradf, f

Exemple #7

0

Afficher le fichier

Fichier : lstm_fourier.py Projet : olimastro/ift6266

    def build_theano_functions(self) :
        x = T.ftensor3('x') # shape of input : batch X time X value
        y = T.ftensor3('y')
        z = T.ftensor3('z')

        layers_input = [x]
        dims =np.array([self.input_dim])
        for dim in self.lstm_layers_dim :
            dims = np.append(dims, dim)
        print "Dimensions =", dims

        # layer is just an index of the layer
        for layer in range(len(self.lstm_layers_dim)) :

            # before the cell, input, forget and output gates, x needs to
            # be transformed
            linear = Linear(dims[layer],
                            dims[layer+1]*4,
                            #weights_init=Uniform(mean=data_mean, std=1),
                            weights_init=IsotropicGaussian(mean=1.,std=1),
                            biases_init=Constant(0),
                            name="linear"+str(layer))
            linear.initialize()
            lstm_input = linear.apply(layers_input[layer])

            # the lstm wants batch X time X value
            lstm = LSTM(
                dim=dims[layer+1],
                weights_init=IsotropicGaussian(mean=0.,std=0.5),
                biases_init=Constant(1),
                name="lstm"+str(layer))
            lstm.initialize()
            # hack to use Orthogonal on lstm w_state
            lstm.W_state.set_value(Orthogonal().generate(np.random, lstm.W_state.get_value().shape))
            h, _dummy = lstm.apply(lstm_input)

            layers_input.append(h)

        # the idea is to have one gaussian parametrize every frequency bin
        print "Last linear transform dim :", dims[1:].sum()
        output_transform = Linear(dims[1:].sum(),
                                  self.output_dim,
                                  weights_init=IsotropicGaussian(mean=0., std=1),
                                  biases_init=Constant(0),
                                  #use_bias=False,
                                  name="output_transform")
        output_transform.initialize()
        if len(self.lstm_layers_dim) == 1 :
            print "hallo there, only one layer speaking"
            y_hat = output_transform.apply(layers_input[-1])
        else :
            y_hat = output_transform.apply(T.concatenate(layers_input[1:], axis=2))

        sig = T.nnet.relu(y_hat[:,:,:self.output_dim/2])+0.05
        mus = y_hat[:,:,self.output_dim/2:]

        # sum likelihood with targets
        # sum inside log accross mixtures, sum outside log accross time
        inside_expo = -0.5*((y-mus)**2)/sig**2
        expo = T.exp(inside_expo)
        coeff = 1./(T.sqrt(2.*np.pi)*sig)
        inside_log = T.log(coeff*expo)
        inside_log_max = T.max(inside_log, axis=2, keepdims=True)
        LL = -(inside_log_max + T.log(T.sum(T.exp(inside_log - inside_log_max), axis=2, keepdims=True))).sum()

        #zinside_expo = -0.5*((z-mus)**2)/sig**2
        #zexpo = T.exp(zinside_expo)
        #zcoeff = pis*(1./(T.sqrt(2.*np.pi)*sig))
        #zinside_log = (zcoeff*zexpo).sum(axis=2)
        #zLL = -(T.log(zinside_log)).sum()

        model = Model(LL)
        self.model = model
        parameters = model.parameters

        grads = T.grad(LL, parameters)
        updates = []
        lr = T.scalar('lr')
        for i in range(len(grads)) :
            #updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))
            updates.append(tuple([parameters[i], parameters[i] - lr*grads[i]]))

        #gradf = theano.function([x, y],[LL],updates=updates, mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
        if self.debug :
            gradf = theano.function([x, y, lr],[LL, mus, sig],updates=updates)
        else :
            #gradf = theano.function([x, y, z],[zLL],updates=updates)
            gradf = theano.function([x, y, lr],[LL],updates=updates)
        f = theano.function([x],[sig, mus])

        return gradf, f

Exemple #8

0

Afficher le fichier

Fichier : test_lstm_gmm_theano.py Projet : olimastro/ift6266

    def build_theano_functions(self, data_mean, data_std) :
        x = T.ftensor3('x') # shape of input : batch X time X value
        y = T.ftensor3('y')

        # before the cell, input, forget and output gates, x needs to
        # be transformed
        linear_transforms = []
        for transform in ['c','i','f','o'] :
            linear_transforms.append(
                Linear(self.input_dim,
                       self.lstm_dim,
                       weights_init=Uniform(mean=data_mean, std=data_std),
                       #weights_init=IsotropicGaussian(mean=1.,std=1),
                       biases_init=Constant(data_mean),
                       name=transform+"_transform")
            )

        for transform in linear_transforms :
            transform.initialize()

        linear_applications = []
        for transform in linear_transforms :
            linear_applications.append(
                transform.apply(x))

        lstm_input = T.concatenate(linear_applications, axis=2)

        # the lstm wants batch X time X value
        lstm = LSTM(
            dim=self.lstm_dim,
            weights_init=IsotropicGaussian(mean=0.5,std=1),
            biases_init=Constant(1))
        lstm.initialize()
        h, _dummy = lstm.apply(lstm_input)

        # this is where Alex Graves' paper starts
        output_transform = Linear(self.lstm_dim,
                                  self.output_dim,
                                  #weights_init=Uniform(mean=data_mean, std=data_std),
                                  weights_init=IsotropicGaussian(mean=0., std=1),
                                  biases_init=Constant(1),
                                  name="output_transform")
        output_transform.initialize()
        y_hat = output_transform.apply(h)

        # transforms to find each gmm params (mu, pi, sig)
        #pis = NDimensionalSoftmax.apply(y_hat[:,:,0:self.gmm_dim])
        # small hack to softmax a 3D tensor
        pis = T.reshape(
                    T.nnet.softmax(
                        T.reshape(y_hat[:,:,0:self.gmm_dim], (self.time_dim*self.batch_dim, self.gmm_dim)))
                    , (self.batch_dim, self.time_dim, self.gmm_dim))
        #sig = T.exp(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])
        sig = T.nnet.relu(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+0.1
        mus = y_hat[:,:,self.gmm_dim*2:]

        pis = pis[:,:,:,np.newaxis]
        mus = mus[:,:,:,np.newaxis]
        sig = sig[:,:,:,np.newaxis]
        y = y[:,:,np.newaxis,:]

        #sig=theano.printing.Print()(sig)

        # sum likelihood with targets
        # sum inside log accross mixtures, sum outside log accross time
        #LL = -T.log((pis*(1./(T.sqrt(2.*np.pi)*sig))*T.exp(-0.5*((y-mus)**2)/sig**2)).sum(axis=2)).sum()
        expo = T.exp(-0.5*((y-mus)**2)/sig**2)
        test_expo = theano.function([x,y],[expo, mus, sig])
        return test_expo

        coeff = pis*(1./(T.sqrt(2.*np.pi)*sig))
        inside_log = (coeff*expo).sum(axis=2)
        LL = -(T.log(inside_log)).sum()


        model = Model(LL)
        self.model = model
        parameters = model.parameters

        grads = T.grad(LL, parameters)
        updates = []
        for i in range(len(grads)) :
            updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))

        #gradf = theano.function([x, y],[LL],updates=updates, mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
        gradf = theano.function([x, y],[LL],updates=updates)
        f = theano.function([x],[pis, sig, mus])

        return gradf, f

Exemple #9

0

Afficher le fichier

    def build_theano_functions(self, data_mean, data_std):
        x = T.ftensor3('x')  # shape of input : batch X time X value
        y = T.ftensor3('y')

        # before the cell, input, forget and output gates, x needs to
        # be transformed
        linear_transforms = []
        for transform in ['c', 'i', 'f', 'o']:
            linear_transforms.append(
                Linear(
                    self.input_dim,
                    self.lstm_dim,
                    weights_init=Uniform(mean=data_mean, std=data_std),
                    #weights_init=IsotropicGaussian(mean=1.,std=1),
                    biases_init=Constant(data_mean),
                    name=transform + "_transform"))

        for transform in linear_transforms:
            transform.initialize()

        linear_applications = []
        for transform in linear_transforms:
            linear_applications.append(transform.apply(x))

        lstm_input = T.concatenate(linear_applications, axis=2)

        # the lstm wants batch X time X value
        lstm = LSTM(dim=self.lstm_dim,
                    weights_init=IsotropicGaussian(mean=0.5, std=1),
                    biases_init=Constant(1))
        lstm.initialize()
        h, _dummy = lstm.apply(lstm_input)

        # this is where Alex Graves' paper starts
        output_transform = Linear(
            self.lstm_dim,
            self.output_dim,
            #weights_init=Uniform(mean=data_mean, std=data_std),
            weights_init=IsotropicGaussian(mean=0., std=1),
            biases_init=Constant(1),
            name="output_transform")
        output_transform.initialize()
        y_hat = output_transform.apply(h)

        # transforms to find each gmm params (mu, pi, sig)
        #pis = NDimensionalSoftmax.apply(y_hat[:,:,0:self.gmm_dim])
        # small hack to softmax a 3D tensor
        pis = T.reshape(
            T.nnet.softmax(
                T.reshape(y_hat[:, :, 0:self.gmm_dim],
                          (self.time_dim * self.batch_dim, self.gmm_dim))),
            (self.batch_dim, self.time_dim, self.gmm_dim))
        #sig = T.exp(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])
        sig = T.nnet.relu(y_hat[:, :, self.gmm_dim:self.gmm_dim * 2]) + 0.1
        mus = y_hat[:, :, self.gmm_dim * 2:]

        pis = pis[:, :, :, np.newaxis]
        mus = mus[:, :, :, np.newaxis]
        sig = sig[:, :, :, np.newaxis]
        y = y[:, :, np.newaxis, :]

        #sig=theano.printing.Print()(sig)

        # sum likelihood with targets
        # sum inside log accross mixtures, sum outside log accross time
        #LL = -T.log((pis*(1./(T.sqrt(2.*np.pi)*sig))*T.exp(-0.5*((y-mus)**2)/sig**2)).sum(axis=2)).sum()
        expo = T.exp(-0.5 * ((y - mus)**2) / sig**2)
        test_expo = theano.function([x, y], [expo, mus, sig])
        return test_expo

        coeff = pis * (1. / (T.sqrt(2. * np.pi) * sig))
        inside_log = (coeff * expo).sum(axis=2)
        LL = -(T.log(inside_log)).sum()

        model = Model(LL)
        self.model = model
        parameters = model.parameters

        grads = T.grad(LL, parameters)
        updates = []
        for i in range(len(grads)):
            updates.append(
                tuple([parameters[i], parameters[i] - self.lr * grads[i]]))

        #gradf = theano.function([x, y],[LL],updates=updates, mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
        gradf = theano.function([x, y], [LL], updates=updates)
        f = theano.function([x], [pis, sig, mus])

        return gradf, f

Exemple #10

0

Afficher le fichier

    def build_theano_functions(self) :
        # shape of theano inpu is time+1 X features
        x = T.fmatrix('frequency_sequence')
        x = x.reshape((self.batch_dim, self.time_dim+1, self.input_dim))

        y = x[:,1:self.time_dim+1,:]
        x = x[:,:self.time_dim,:]

        layers_input = [x]
        dims =np.array([self.input_dim])
        for dim in self.lstm_layers_dim :
            dims = np.append(dims, dim)
        print "Dimensions =", dims

        # layer is just an index of the layer
        for layer in range(len(self.lstm_layers_dim)) :

            # before the cell, input, forget and output gates, x needs to
            # be transformed
            linear = Linear(dims[layer],
                            dims[layer+1]*4,
                            weights_init=Orthogonal(self.orth_scale),
                            #weights_init=IsotropicGaussian(mean=1.,std=1),
                            biases_init=Constant(0),
                            name="linear"+str(layer))
            linear.initialize()
            lstm_input = linear.apply(layers_input[layer])

            # the lstm wants batch X time X value
            lstm = LSTM(
                dim=dims[layer+1],
                weights_init=IsotropicGaussian(mean=0.,std=0.5),
                biases_init=Constant(1),
                name="lstm"+str(layer))
            lstm.initialize()
            # hack to use Orthogonal on lstm w_state
            lstm.W_state.set_value(
                self.orth_scale*Orthogonal().generate(np.random, lstm.W_state.get_value().shape))
            h, _dummy = lstm.apply(lstm_input)

            layers_input.append(h)

        # the idea is to have one gaussian parametrize every frequency bin
        print "Last linear transform dim :", dims[1:].sum()
        output_transform = Linear(dims[1:].sum(),
                                  self.output_dim,
                                  #weights_init=IsotropicGaussian(mean=0., std=1),
                                  weights_init=Orthogonal(self.orth_scale),
                                  biases_init=Constant(0),
                                  #use_bias=False,
                                  name="output_transform")
        output_transform.initialize()
        if len(self.lstm_layers_dim) == 1 :
            print "hallo there, only one layer speaking"
            y_hat = output_transform.apply(layers_input[-1])
        else :
            y_hat = output_transform.apply(T.concatenate(layers_input[1:], axis=2))

        sig = T.nnet.relu(y_hat[:,:,:self.output_dim/2])+0.05
        mus = y_hat[:,:,self.output_dim/2:]

        # sum likelihood with targets
        # sum inside log accross mixtures, sum outside log accross time
        inside_expo = -0.5*((y-mus)**2)/sig**2
        expo = T.exp(inside_expo)
        coeff = 1./(T.sqrt(2.*np.pi)*sig)
        inside_log = T.log(coeff*expo)
        inside_log_max = T.max(inside_log, axis=2, keepdims=True)
        LL = -(inside_log_max + T.log(T.sum(T.exp(inside_log - inside_log_max), axis=2, keepdims=True))).sum()
        LL.name = "summed_likelihood"

        model = Model(LL)
        self.model = model

        algorithm = GradientDescent(
            cost=LL,
            parameters=model.parameters,
            step_rule=AdaGrad())

        f = theano.function([x],[sig, mus])

        return algorithm, f