Esempi in Python per param, esempi in Python per lib.param

Esempio n. 1

0

Mostra file

def Conv1D(name, input_dim, output_dim, filter_size, inputs, biases=True):
    """
    inputs.shape: (batch size, input_dim, height)
    output.shape: (batch size, output_dim, height)
    * performs valid convs
    """
    def uniform(stdev, size):
        return np.random.uniform(low=-stdev * np.sqrt(3),
                                 high=stdev * np.sqrt(3),
                                 size=size).astype(theano.config.floatX)

    filters = lib.param(
        name + '.Filters',
        uniform(1. / np.sqrt(input_dim * filter_size),
                (output_dim, input_dim, filter_size, 1)))

    inputs = inputs.dimshuffle(0, 1, 2, 'x')
    result = T.nnet.conv2d(inputs, filters, border_mode='valid')
    result = T.addbroadcast(result, 3)
    result = result.dimshuffle(0, 1, 2)

    if biases:
        biases_ = lib.param(name + '.Biases',
                            np.zeros(output_dim, dtype=theano.config.floatX))
        result = result + biases_[None, :, None]

    return result

Esempio n. 2

0

Mostra file

File: conv1d.py Progetto: igul222/image_generation

def Conv1D(name, input_dim, output_dim, filter_size, inputs, biases=True):
    """
    inputs.shape: (batch size, input_dim, height)
    output.shape: (batch size, output_dim, height)
    * performs valid convs
    """
    def uniform(stdev, size):
        return np.random.uniform(
            low=-stdev * np.sqrt(3),
            high=stdev * np.sqrt(3),
            size=size
        ).astype(theano.config.floatX)

    filters = lib.param(
        name+'.Filters',
        uniform(
            1./np.sqrt(input_dim * filter_size),
            (output_dim, input_dim, filter_size, 1)
        )
    )

    inputs = inputs.dimshuffle(0, 1, 2, 'x')
    result = T.nnet.conv2d(inputs, filters, border_mode='valid')
    result = T.addbroadcast(result, 3)
    result = result.dimshuffle(0, 1, 2)

    if biases:
        biases_ = lib.param(
            name+'.Biases',
            np.zeros(output_dim, dtype=theano.config.floatX)
        )
        result = result + biases_[None, :, None]

    return result

Esempio n. 3

0

Mostra file

File: ops.py Progetto: zhang-jian/WaveNet

def myGRU(name, input_dim, hidden_dim, inputs, h0=None):
    #inputs.shape = (batch_size,N_FRAMES,FRAME_SIZE)
    inputs = inputs.transpose(1, 0, 2)

    weight_values = lasagne.init.GlorotUniform().sample(
        (input_dim + hidden_dim, 2 * hidden_dim))
    W1 = lib.param(name + '.Gates.W', weight_values)

    b1 = lib.param(name + '.Gates.b',
                   np.ones(2 * hidden_dim).astype(theano.config.floatX))

    weight_values = lasagne.init.GlorotUniform().sample(
        (input_dim + hidden_dim, hidden_dim))
    W2 = lib.param(name + '.Candidate.W', weight_values)

    b2 = lib.param(name + '.Candidate.b',
                   np.zeros(hidden_dim).astype(theano.config.floatX))

    def step(x_t, h_tm1):
        return recurrent_fn(x_t, h_tm1, name, input_dim, hidden_dim, W1, b1,
                            W2, b2)

    outputs, _ = theano.scan(
        step,
        sequences=[inputs],
        outputs_info=[h0],
    )

    out = outputs.dimshuffle(1, 0, 2)
    out.name = name + '.output'
    return out

Esempio n. 4

0

Mostra file

File: ops.py Progetto: zhang-jian/WaveNet

def conv1d(name,
           input,
           kernel,
           stride,
           n_filters,
           depth,
           bias=False,
           batchnorm=False,
           pad='valid',
           filter_dilation=(1, 1),
           run_mode=0):
    W = lib.param(
        name + '.W',
        lasagne.init.HeNormal().sample(
            (n_filters, depth, kernel, 1)).astype('float32'))

    out = T.nnet.conv2d(input,
                        W,
                        subsample=(stride, 1),
                        border_mode=pad,
                        filter_dilation=filter_dilation)

    if bias:
        b = lib.param(name + '.b', np.zeros(n_filters).astype('float32'))

        out += b[None, :, None, None]

    if batchnorm:
        out = BatchNorm(name, out, n_filters, mode=1, run_mode=run_mode)

    return out

Esempio n. 5

0

Mostra file

def Deconv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True):
    """
    inputs: tensor of shape (batch size, num channels, height, width)
    returns: tensor of shape (batch size, num channels, 2*height, 2*width)
    """
    def uniform(stdev, size):
        return np.random.uniform(low=-stdev * np.sqrt(3),
                                 high=stdev * np.sqrt(3),
                                 size=size).astype(theano.config.floatX)

    filters_stdev = np.sqrt(1. / (input_dim * filter_size**2))
    if he_init:
        filters_stdev *= np.sqrt(2.)

    filters = lib.param(
        name + '.Filters',
        uniform(filters_stdev,
                (input_dim, output_dim, filter_size, filter_size)))

    biases = lib.param(name + '.Biases',
                       np.zeros(output_dim, dtype=theano.config.floatX))

    pad = (filter_size - 1) / 2
    result = _deconv2d(
        inputs,
        filters,
        subsample=(2, 2),
        border_mode=(pad, pad),
    )
    result = result + biases[None, :, None, None]
    return result

Esempio n. 6

0

Mostra file

def conv1d(name,
           input,
           input_dim,
           output_dim,
           filter_size,
           init='glorot',
           non_linearity='relu',
           bias=True):
    """
    :author: Kundan Kumar (http://github.com/kundan2510)
    """
    import lasagne

    inp = input.dimshuffle(0, 2, 1, 'x')

    if init == 'glorot':
        initializer = lasagne.init.GlorotUniform()
    elif init == 'he':
        initializer = lasagne.init.HeUniform()

    if non_linearity == 'gated':
        num_filters = 2 * output_dim
    else:
        num_filters = output_dim

    W_shape = (num_filters, input_dim, filter_size, 1)

    if bias:
        bias_shape = (num_filters, )

    W = lib.param(name + ".W", initializer.sample(W_shape))

    if bias:
        b = lib.param(name + ".b",
                      lasagne.init.Constant(0.).sample(bias_shape))

    conv_out = T.nnet.conv2d(inp, W, filter_flip=False, border_mode='valid')

    if bias:
        conv_out = conv_out + b[None, :, None, None]

    if non_linearity == 'gated':
        activation = gated_non_linerity
    elif non_linearity == 'relu':
        activation = T.nnet.relu
    elif non_linearity == 'elu':
        activation = lambda x: T.switch(x >= 0., x, T.exp(x) - floatX(1.))
    elif non_linearity == 'identity':
        activation = lambda x: x
    else:
        raise NotImplementedError(
            "{} non-linearity not implemented!".format(non_linearity))

    output = conv_out

    output = output.reshape(
        (output.shape[0], output.shape[1], output.shape[2]))
    output = output.dimshuffle(0, 2, 1)

    return output

Esempio n. 7

0

Mostra file

File: deconv2d.py Progetto: igul222/nn

def Deconv2D(
    name, 
    input_dim, 
    output_dim, 
    filter_size, 
    inputs, 
    he_init=True,
    weightnorm=None,
    ):
    """
    inputs: tensor of shape (batch size, num channels, height, width)
    returns: tensor of shape (batch size, num channels, 2*height, 2*width)
    """
    def uniform(stdev, size):
        return np.random.uniform(
            low=-stdev * np.sqrt(3),
            high=stdev * np.sqrt(3),
            size=size
        ).astype(theano.config.floatX)

    filters_stdev = np.sqrt(1./(input_dim * filter_size**2))
    filters_stdev *= 2. # Because of the stride
    if he_init:
        filters_stdev *= np.sqrt(2.)

    filter_values = uniform(
        filters_stdev,
        (input_dim, output_dim, filter_size, filter_size)
    )

    filters = lib.param(
        name+'.Filters',
        filter_values
    )

    if weightnorm==None:
        weightnorm = _default_weightnorm
    if weightnorm:
        norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,2,3)))
        norms = lib.param(
            name + '.g',
            norm_values
        )
        filters = filters * (norms / T.sqrt(T.sum(T.sqr(filters), axis=(0,2,3)))).dimshuffle('x',0,'x','x')

    biases = lib.param(
        name+'.Biases',
        np.zeros(output_dim, dtype=theano.config.floatX)
    )

    pad = (filter_size-1)/2
    result = _deconv2d(
        inputs, 
        filters, 
        subsample=(2,2),
        border_mode=(pad,pad),
    )
    result = result + biases[None, :, None, None]
    # result = lib.debug.print_stats(name, result)
    return result

Esempio n. 8

0

Mostra file

def Conv2D(name,
           input_dim,
           output_dim,
           filter_size,
           inputs,
           mask_type=None,
           he_init=False):
    """
    inputs.shape: (batch size, height, width, input_dim)
    mask_type: None, 'a', 'b'
    output.shape: (batch size, height, width, output_dim)
    """
    def uniform(stdev, size):
        """uniform distribution with the given stdev and size"""
        return numpy.random.uniform(low=-stdev * numpy.sqrt(3),
                                    high=stdev * numpy.sqrt(3),
                                    size=size).astype(theano.config.floatX)

    filters_init = uniform(
        1. / numpy.sqrt(input_dim * filter_size * filter_size),
        # output dim, input dim, height, width
        (output_dim, input_dim, filter_size, filter_size))

    if he_init:
        filters_init *= lib.floatX(numpy.sqrt(2.))

    if mask_type is not None:
        filters_init *= lib.floatX(numpy.sqrt(2.))

    filters = lib.param(name + '.Filters', filters_init)

    if mask_type is not None:
        mask = numpy.ones((output_dim, input_dim, filter_size, filter_size),
                          dtype=theano.config.floatX)
        center = filter_size // 2
        for i in xrange(filter_size):
            for j in xrange(filter_size):
                if (j > center) or (j == center and i > center):
                    mask[:, :, j, i] = 0.
        for i in xrange(N_CHANNELS):
            for j in xrange(N_CHANNELS):
                if (mask_type == 'a' and i >= j) or (mask_type == 'b'
                                                     and i > j):
                    mask[j::N_CHANNELS, i::N_CHANNELS, center, center] = 0.

        filters = filters * mask

    # conv2d takes inputs as (batch size, input channels, height, width)
    inputs = inputs.dimshuffle(0, 3, 1, 2)
    result = T.nnet.conv2d(inputs,
                           filters,
                           border_mode='half',
                           filter_flip=False)

    biases = lib.param(name + '.Biases',
                       numpy.zeros(output_dim, dtype=theano.config.floatX))
    result = result + biases[None, :, None, None]

    return result.dimshuffle(0, 2, 3, 1)

Esempio n. 9

0

Mostra file

def Batchnorm(name,
              input_dim,
              inputs,
              stepwise=False,
              axes=None,
              wrt=None,
              i_gamma=None,
              i_beta=None):
    """
    From Ishaan's repo
    """
    if wrt is None:
        wrt = inputs

    if axes is not None:
        means = wrt.mean(axis=axes, keepdims=True)
        variances = wrt.var(axis=axes, keepdims=True)
    # elif stepwise:
    #     means = wrt.mean(axis=1, keepdims=True)
    #     variances = wrt.var(axis=1, keepdims=True)
    else:
        means = wrt.reshape((-1, input_dim)).mean(axis=0)
        variances = wrt.reshape((-1, input_dim)).var(axis=0)

    if i_gamma is None:
        i_gamma = lib.floatX(0.1) * numpy.ones(input_dim,
                                               dtype=theano.config.floatX)

    if i_beta is None:
        i_beta = numpy.zeros(input_dim, dtype=theano.config.floatX)

    gamma = lib.param(name + '.gamma', i_gamma)

    beta = lib.param(name + '.beta', i_beta)

    stdevs = T.sqrt(variances + lib.floatX(1e-6))

    stdevs.name = name + '.stdevs'
    means.name = name + '.means'

    # return (((inputs - means) / stdevs) * gamma) + beta
    if axes is not None:
        dimshuffle_pattern = [
            'x' if i in axes else 0 for i in xrange(inputs.ndim)
        ]
        return T.nnet.bn.batch_normalization(
            inputs,
            gamma.dimshuffle(*dimshuffle_pattern),
            beta.dimshuffle(*dimshuffle_pattern),
            means,
            stdevs,
            mode='low_mem')
    else:
        return T.nnet.bn.batch_normalization(inputs,
                                             gamma.dimshuffle('x', 0),
                                             beta.dimshuffle('x', 0),
                                             means.dimshuffle('x', 0),
                                             stdevs.dimshuffle('x', 0),
                                             mode='low_mem')

Esempio n. 10

0

Mostra file

def DilatedConv1D(name,
                  input_dim,
                  output_dim,
                  filter_size,
                  inputs,
                  dilation,
                  mask_type=None,
                  apply_biases=True):
    """
    inputs.shape: (batch size, length, input_dim)
    mask_type: None, 'a', 'b'
    output.shape: (batch size, length, output_dim)
    """
    def uniform(stdev, size):
        """uniform distribution with the given stdev and size"""
        return numpy.random.uniform(low=-stdev * numpy.sqrt(3),
                                    high=stdev * numpy.sqrt(3),
                                    size=size).astype(theano.config.floatX)

    filters_init = uniform(
        1. / numpy.sqrt(input_dim * filter_size),
        # output dim, input dim, height, width
        (output_dim, input_dim, filter_size, 1))

    if mask_type is not None:
        filters_init *= lib.floatX(numpy.sqrt(2.))

    filters = lib.param(name + '.Filters', filters_init)

    if mask_type is not None:
        mask = numpy.ones((output_dim, input_dim, filter_size, 1),
                          dtype=theano.config.floatX)

        center = filter_size // 2
        for i in xrange(filter_size):
            if (i > center):
                mask[:, :, i, :] = 0.
            # if (mask_type=='a' and i == center):
            #     mask[:, :, center] = 0.
        filters = filters * mask

    inputs = inputs.reshape(
        (inputs.shape[0], inputs.shape[1], 1, inputs.shape[2]))
    # conv2d takes inputs as (batch size, input channels, height[?], width[?])
    inputs = inputs.dimshuffle(0, 3, 1, 2)
    result = T.nnet.conv2d(inputs,
                           filters,
                           border_mode='half',
                           filter_flip=False,
                           filter_dilation=(dilation, 1))

    if apply_biases:
        biases = lib.param(name + '.Biases',
                           numpy.zeros(output_dim, dtype=theano.config.floatX))
        result = result + biases[None, :, None, None]

    result = result.dimshuffle(0, 2, 3, 1)
    return result.reshape((result.shape[0], result.shape[1], result.shape[3]))

Esempio n. 11

0

Mostra file

def DiagonalLSTM(name, input_dim, inputs):
    """
    inputs.shape: (batch size, height, width, input_dim)
    outputs.shape: (batch size, height, width, DIM)
    """
    inputs = Skew(inputs)

    input_to_state = Conv2D(name + '.InputToState',
                            input_dim,
                            4 * DIM,
                            1,
                            inputs,
                            mask_type='b')

    batch_size = inputs.shape[0]

    c0_unbatched = lib.param(
        name + '.c0', numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX))
    c0 = T.alloc(c0_unbatched, batch_size, HEIGHT, DIM)

    h0_unbatched = lib.param(
        name + '.h0', numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX))
    h0 = T.alloc(h0_unbatched, batch_size, HEIGHT, DIM)

    def step_fn(current_input_to_state, prev_c, prev_h):
        # all args have shape (batch size, height, DIM)

        # TODO consider learning this padding
        prev_h = T.concatenate(
            [T.zeros((batch_size, 1, DIM), theano.config.floatX), prev_h],
            axis=1)
        state_to_state = Conv1D(name + '.StateToState',
                                DIM,
                                4 * DIM,
                                2,
                                prev_h,
                                apply_biases=False)

        gates = current_input_to_state + state_to_state

        o_f_i = T.nnet.sigmoid(gates[:, :, :3 * DIM])
        o = o_f_i[:, :, 0 * DIM:1 * DIM]
        f = o_f_i[:, :, 1 * DIM:2 * DIM]
        i = o_f_i[:, :, 2 * DIM:3 * DIM]
        g = T.tanh(gates[:, :, 3 * DIM:4 * DIM])

        new_c = (f * prev_c) + (i * g)
        new_h = o * T.tanh(new_c)

        return (new_c, new_h)

    outputs, _ = theano.scan(step_fn,
                             sequences=input_to_state.dimshuffle(2, 0, 1, 3),
                             outputs_info=[c0, h0])
    all_cs = outputs[0].dimshuffle(1, 2, 0, 3)
    all_hs = outputs[1].dimshuffle(1, 2, 0, 3)

    return Unskew(all_hs)

Esempio n. 12

0

Mostra file

def conv1d(name, input, kernel, stride, n_filters, depth, bias=False):
    W = lib.param(
        name + '.W',
        glorot_uniform((n_filters, depth, 1, kernel)).astype('float32'))

    if bias:
        b = lib.param(name + '.b', np.zeros(n_filters).astype('float32'))

    return T.nnet.conv2d(input, W, filter_flip=False,
                         subsample=(1, stride)) + b[None, :, None, None]

Esempio n. 13

0

Mostra file

File: pixel_rnn.py Progetto: igul222/pixel_rnn

def DiagonalLSTM(name, input_dim, inputs):
    """
    inputs.shape: (batch size, height, width, input_dim)
    outputs.shape: (batch size, height, width, DIM)
    """
    inputs = Skew(inputs)

    input_to_state = Conv2D(name+'.InputToState', input_dim, 4*DIM, 1, inputs, mask_type='b')

    batch_size = inputs.shape[0]

    c0_unbatched = lib.param(
        name + '.c0',
        numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX)
    )
    c0 = T.alloc(c0_unbatched, batch_size, HEIGHT, DIM)

    h0_unbatched = lib.param(
        name + '.h0',
        numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX)
    )
    h0 = T.alloc(h0_unbatched, batch_size, HEIGHT, DIM)

    def step_fn(current_input_to_state, prev_c, prev_h):
        # all args have shape (batch size, height, DIM)

        # TODO consider learning this padding
        prev_h = T.concatenate([
            T.zeros((batch_size, 1, DIM), theano.config.floatX), 
            prev_h
        ], axis=1)
        state_to_state = Conv1D(name+'.StateToState', DIM, 4*DIM, 2, prev_h, apply_biases=False)

        gates = current_input_to_state + state_to_state

        o_f_i = T.nnet.sigmoid(gates[:,:,:3*DIM])
        o = o_f_i[:,:,0*DIM:1*DIM]
        f = o_f_i[:,:,1*DIM:2*DIM]
        i = o_f_i[:,:,2*DIM:3*DIM]
        g = T.tanh(gates[:,:,3*DIM:4*DIM])

        new_c = (f * prev_c) + (i * g)
        new_h = o * T.tanh(new_c)

        return (new_c, new_h)

    outputs, _ = theano.scan(
        step_fn,
        sequences=input_to_state.dimshuffle(2,0,1,3),
        outputs_info=[c0, h0]
    )
    all_cs = outputs[0].dimshuffle(1,2,0,3)
    all_hs = outputs[1].dimshuffle(1,2,0,3)

    return Unskew(all_hs)

Esempio n. 14

0

Mostra file

def DilatedConv2D(name,
                  input_shape,
                  output_dim,
                  filter_size,
                  inputs,
                  he_init=True,
                  dilation=(1, 1)):

    input_dim = input_shape[1]

    def uniform(stdev, size):
        return np.random.uniform(low=-stdev * np.sqrt(3),
                                 high=stdev * np.sqrt(3),
                                 size=size).astype(theano.config.floatX)

    fan_in = input_dim * filter_size**2
    fan_out = output_dim * filter_size**2

    if he_init:
        filters_stdev = np.sqrt(2. / fan_in)
    else:  # Normalized init (Glorot & Bengio)
        filters_stdev = np.sqrt(2. / (fan_in + fan_out))

    W = lib.param(
        name + '.W',
        uniform(filters_stdev,
                (input_dim, output_dim, filter_size, filter_size)))

    b = lib.param(name + '.b', np.zeros(output_dim,
                                        dtype=theano.config.floatX))

    # Manually apply 'same' padding beforehand
    pad = (filter_size - 1) / 2

    input_shape = (input_shape[0], input_shape[1], input_shape[2] + pad,
                   input_shape[3] + pad)

    inputs = lasagne.theano_extensions.padding.pad(inputs,
                                                   width=pad,
                                                   batch_ndim=2)

    layer = lasagne.layers.DilatedConv2DLayer(
        input_shape,
        output_dim,
        filter_size,
        dilation=dilation,
        pad=0,
        untie_biases=False,
        W=W,
        b=b,
        nonlinearity=None,
        flip_filters=False,
    )

    return layer(inputs)

Esempio n. 15

0

Mostra file

File: conv1d.py Progetto: yobajnin/nn

def Conv1D(name, input_dim, output_dim, filter_size, inputs, he_init=True, biases=True, stride=1, border_mode='half'):
    """
    inputs.shape: (batch size, input_dim, height)
    output.shape: (batch size, output_dim, height)
    """
    def uniform(stdev, size):
        return np.random.uniform(
            low=-stdev * np.sqrt(3),
            high=stdev * np.sqrt(3),
            size=size
        ).astype(theano.config.floatX)

    fan_in = input_dim * filter_size
    fan_out = output_dim * filter_size
    fan_out /= stride

    if he_init:
        filters_stdev = np.sqrt(4./(fan_in+fan_out))
    else: # Normalized init (Glorot & Bengio)
        filters_stdev = np.sqrt(2./(fan_in+fan_out))

    filters = lib.param(
        name+'.Filters',
        uniform(
            filters_stdev,
            (output_dim, input_dim, filter_size, 1)
        )
    )

    inputs = inputs.dimshuffle(0, 1, 2, 'x')
    result = T.nnet.conv2d(
        inputs, 
        filters, 
        border_mode=border_mode,
        subsample=(stride, 1)
    )
    result = T.addbroadcast(result, 3)
    result = result.dimshuffle(0, 1, 2)

    if biases:
        biases_ = lib.param(
            name+'.Biases',
            np.zeros(output_dim, dtype=theano.config.floatX)
        )
        result = result + biases_[None, :, None]

    # result = lib.ops.batchnorm.Batchnorm(
    #     name+'.BN',
    #     input_dim=output_dim,
    #     inputs=result,
    #     axes=[0,2]
    # )
    # result = lib.debug.print_stats(name, result)
    return result

Esempio n. 16

0

Mostra file

File: ops.py Progetto: ritheshkumar95/TTS

def conv2d(
    name,
    input,
    kernel,
    stride,
    depth,
    n_filters,
    init=None,
    bias=True,
    batchnorm=False,
    train_bn=True,
    weightnorm=True,
    pad='valid',
    filter_dilation=(1,1),
    mode='train',
    **kwargs
    ):
    if isinstance(kernel, int):
        kernel_h = kernel_w = kernel
    else:
        kernel_h, kernel_w = kernel

    filter_values = initializer(init,(n_filters,depth,kernel_h,kernel_w),**kwargs)
    #weight_values = lasagne.init.HeNormal().sample((n_filters,depth,kernel_h,kernel_w)).astype('float32')

    W = lib.param(
        name+'.W',
        filter_values
        )

    if weightnorm:
        norm_values = np.linalg.norm(filter_values.reshape((filter_values.shape[0], -1)), axis=1)
        norms = lib.param(
            name + '.g',
            norm_values
        )
        W = W * (norms / W.reshape((W.shape[0],-1)).norm(2, axis=1)).dimshuffle(0,'x','x','x')

    out = T.nnet.conv2d(input,W,subsample=(stride,stride),border_mode=pad,filter_dilation=filter_dilation)

    if bias:
        b = lib.param(
            name + '.b',
            np.zeros(n_filters).astype('float32')
            )

        out += b[None,:,None,None]

    if batchnorm:
        out = Batchnorm(name,out,n_filters,axes='spatial',mode=mode,trainable_weights=train_bn)

    return out

Esempio n. 17

0

Mostra file

File: ops.py Progetto: ritheshkumar95/TTS

def Batchnorm(
    name,
    inputs,
    input_dim,
    axes=None,
    mode='train',
    trainable_weights=True
):
    #mult = lib.floatX(0.1) if trainable_weights else lib.floatX(1)
    gamma = lib.param(
        name+'.gamma',
        initializer('Normal',(input_dim,),mean=1.0,std=0.02),
        is_param=trainable_weights
    )
    beta = lib.param(
        name+'.beta',
        np.zeros(input_dim).astype(theano.config.floatX),
        is_param=trainable_weights
    )
    running_mean = lib.param(
        name+'.running_mean',
        np.zeros(input_dim).astype(theano.config.floatX),
        is_param=False
    )
    running_var = lib.param(
        name+'.running_variance',
        np.zeros(input_dim).astype(theano.config.floatX),
        is_param=False
    )

    if mode=='train':
        out,_,_,new_mean,new_var = T.nnet.bn.batch_normalization_train(
            inputs,
            axes=axes,
            gamma=gamma,
            beta=beta,
            running_mean=running_mean,
            running_var=running_var
        )
        lib._updates[running_mean] = new_mean
        lib._updates[running_var] = new_var
        return out

    elif mode=='test':
        return T.nnet.bn.batch_normalization_test(
            inputs,
            axes=axes,
            gamma=gamma,
            beta=beta,
            mean=running_mean,
            var=running_var
        )

Esempio n. 18

0

Mostra file

File: ops.py Progetto: ritheshkumar95/speech

def conv1d(name,input,kernel,stride,n_filters,depth,bias=False):
    W = lib.param(
        name+'.W',
        glorot_uniform((n_filters,depth,1,kernel)).astype('float32')
        )

    if bias:
        b = lib.param(
            name + '.b',
            np.zeros(n_filters).astype('float32')
            )

    return T.nnet.conv2d(input,W,filter_flip=False,subsample=(1,stride)) + b[None,:,None,None]

Esempio n. 19

0

Mostra file

File: deconv2d.py Progetto: kundan2510/vae_celeba

def Deconv2D(
    name,
    input_dim,
    output_dim,
    filter_size,
    inputs,
    he_init=True,
    weightnorm=None,
):
    """
    inputs: tensor of shape (batch size, num channels, height, width)
    returns: tensor of shape (batch size, num channels, 2*height, 2*width)
    """
    def uniform(stdev, size):
        return np.random.uniform(low=-stdev * np.sqrt(3),
                                 high=stdev * np.sqrt(3),
                                 size=size).astype(theano.config.floatX)

    filters_stdev = np.sqrt(1. / (input_dim * filter_size**2))
    filters_stdev *= 2.  # Because of the stride
    if he_init:
        filters_stdev *= np.sqrt(2.)

    filter_values = uniform(filters_stdev,
                            (input_dim, output_dim, filter_size, filter_size))

    filters = lib.param(name + '.Filters', filter_values)

    if weightnorm == None:
        weightnorm = _default_weightnorm
    if weightnorm:
        norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0, 2, 3)))
        norms = lib.param(name + '.g', norm_values)
        filters = filters * (
            norms / T.sqrt(T.sum(T.sqr(filters), axis=(0, 2, 3)))).dimshuffle(
                'x', 0, 'x', 'x')

    biases = lib.param(name + '.Biases',
                       np.zeros(output_dim, dtype=theano.config.floatX))

    pad = (filter_size - 1) / 2
    result = _deconv2d(
        inputs,
        filters,
        subsample=(2, 2),
        border_mode=(pad, pad),
    )
    result = result + biases[None, :, None, None]
    # result = lib.debug.print_stats(name, result)
    return result

Esempio n. 20

0

Mostra file

File: ops.py Progetto: shubham1310/speech

def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None):
    if not isinstance(inputs, list):
        inputs = [inputs]

    if not isinstance(hidden_dims, list):
        hidden_dims = [hidden_dims]

    if h0s is None:
        h0s = [None]*len(hidden_dims)

    for i in xrange(len(hidden_dims)):
        if h0s[i] is None:
            h0_unbatched = lib.param(
                name + '.h0_' + str(i),
                numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX)
            )
            num_batches = inputs[0].shape[1]
            h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])

        h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)

    outputs, _ = theano.scan(
        step_fn,
        sequences=inputs,
        outputs_info=h0s,
        non_sequences=non_sequences
    )

    return outputs

Esempio n. 21

0

Mostra file

File: wavent.py Progetto: johndpope/sampleRNN_ICLR2017

def create_wavenet_block(inp,
                         num_dilation_layer,
                         input_dim,
                         output_dim,
                         name=None):
    assert name is not None
    layer_out = inp
    skip_contrib = []
    skip_weights = lib.param(name + ".parametrized_weights",
                             lib.floatX(numpy.ones((num_dilation_layer, ))))
    for i in range(num_dilation_layer):
        layer_out, skip_c = lib.ops.dil_conv_1D(
            layer_out,
            output_dim,
            input_dim if i == 0 else output_dim,
            2,
            dilation=2**i,
            non_linearity='gated',
            name=name + ".dilation_{}".format(i + 1))
        skip_c = skip_c * skip_weights[i]

        skip_contrib.append(skip_c)

    skip_out = skip_contrib[-1]

    j = 0
    for i in range(num_dilation_layer - 1):
        j += 2**(num_dilation_layer - i - 1)
        skip_out = skip_out + skip_contrib[num_dilation_layer - 2 - i][:, j:]

    return layer_out, skip_out

Esempio n. 22

0

Mostra file

File: wavent.py Progetto: adrianEVI/sampleRNN_ICLR2017

def create_wavenet_block(inp, num_dilation_layer, input_dim, output_dim, name =None):
    assert name is not None
    layer_out = inp
    skip_contrib = []
    skip_weights = lib.param(name+".parametrized_weights", lib.floatX(numpy.ones((num_dilation_layer,))))
    for i in range(num_dilation_layer):
        layer_out, skip_c = lib.ops.dil_conv_1D(
                    layer_out,
                    output_dim,
                    input_dim if i == 0 else output_dim,
                    2,
                    dilation = 2**i,
                    non_linearity = 'gated',
                    name = name+".dilation_{}".format(i+1)
                )
        skip_c = skip_c*skip_weights[i]

        skip_contrib.append(skip_c)

    skip_out =  skip_contrib[-1]

    j = 0
    for i in range(num_dilation_layer-1):
        j += 2**(num_dilation_layer-i-1)
        skip_out = skip_out + skip_contrib[num_dilation_layer-2 - i][:,j:]

    return layer_out, skip_out

Esempio n. 23

0

Mostra file

File: ops.py Progetto: kylemcdonald/speech

def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None):
    if not isinstance(inputs, list):
        inputs = [inputs]

    if not isinstance(hidden_dims, list):
        hidden_dims = [hidden_dims]

    if h0s is None:
        h0s = [None]*len(hidden_dims)

    for i in xrange(len(hidden_dims)):
        if h0s[i] is None:
            h0_unbatched = lib.param(
                name + '.h0_' + str(i),
                numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX)
            )
            num_batches = inputs[0].shape[1]
            h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])

        h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)

    outputs, _ = theano.scan(
        step_fn,
        sequences=inputs,
        outputs_info=h0s,
        non_sequences=non_sequences
    )

    return outputs

Esempio n. 24

0

Mostra file

File: vrnn_ar.py Progetto: fagan2888/speech-1

def Recurrence(processed_frames, h0, reset):
    """
    processed_frames.shape: (batch size, n frames, DIM)
    h0.shape: (batch size, N_GRUS, DIM)
    reset.shape: ()
    output.shape: (batch size, n frames, DIM)
    """

    # print "warning no recurrence"
    # return T.zeros_like(processed_frames), h0

    learned_h0 = lib.param(
        'Recurrence.h0', numpy.zeros((N_GRUS, DIM),
                                     dtype=theano.config.floatX))
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    gru0 = lib.ops.LowMemGRU('Recurrence.GRU0',
                             DIM,
                             DIM,
                             processed_frames,
                             h0=h0[:, 0])
    grus = [gru0]
    for i in xrange(1, N_GRUS):
        gru = lib.ops.LowMemGRU('Recurrence.GRU' + str(i),
                                DIM,
                                DIM,
                                grus[-1],
                                h0=h0[:, i])
        grus.append(gru)

    last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1)

    return (grus[-1], last_hidden)

Esempio n. 25

0

Mostra file

File: conv_mimic.py Progetto: ritheshkumar95/speech

def encoder(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, N_FRAMES * FRAME_SIZE)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, N_FRAMES * FRAME_SIZE, DIM)
    """
    batch_size = input_sequences.shape[0]
    n_frames = (input_sequences.shape[1]-3)/FRAME_SIZE

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)

    emb = lib.ops.Embedding(
        'Embedding',
        Q_LEVELS,
        Q_LEVELS,
        input_sequences,
    ).transpose(0,2,1)    

    #X1 = ((input_sequences.astype(theano.config.floatX)/lib.floatX(Q_LEVELS/2)) - lib.floatX(1))*lib.floatX(2)
    X1 = emb[:,:,None,:] #(128,256,1,259)

    X2 = T.nnet.relu(lib.ops.conv1d('conv1',X1,kernel=4,stride=1,n_filters=512,depth=256,bias=True)) #(128,512,1,256)
    #X3 = T.nnet.relu(lib.ops.conv1d('conv2',X2,kernel=1,stride=1,n_filters=512,depth=512,bias=True)) #(128,512,1,256)
    X4 = lib.ops.pool(X2) #(128,2048,1,64) 

    learned_h0 = lib.param(
        'FrameLevel.h0',
        numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
    )

    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    gru_inp = T.concatenate((X4[:,:,0,:].dimshuffle(0,2,1),emb.transpose(0,2,1)[:,:256,:].reshape((batch_size,n_frames,FRAME_SIZE*Q_LEVELS))),axis=2)
    gru1 = lib.ops.GRU('FrameLevel.GRU1', 3072, DIM, gru_inp, h0=h0[:, 0])
    gru2 = lib.ops.GRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1])
    gru3 = lib.ops.GRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) ## (128,64,512)

    X9 = lib.ops.Dense(
        'Projection',
        512,
        2048,
        gru3,
        hidden_dim=gru3.shape[1]
        ).reshape((batch_size,4*gru3.shape[1],DIM)).transpose(0,2,1)[:,:,None,:] #(128,64,2048) --> (128,256,512) --> (128,512,256)

    X10 = T.nnet.relu(X9+X2)
    X11 = T.nnet.relu(lib.ops.conv1d('deconv1',X10,kernel=1,stride=1,n_filters=512,depth=512,bias=True)) #(128,512,1,256)
    X12 = T.nnet.relu(lib.ops.conv1d('deconv2',X11,kernel=1,stride=1,n_filters=512,depth=512,bias=True)) #(128,512,1,256)
    X13 = lib.ops.conv1d('deconv3',X12,kernel=1,stride=1,n_filters=256,depth=512,bias=True) #(128,256,1,256)

    last_hidden = T.stack([gru1[:,-1],gru2[:,-1],gru3[:,-1]],axis=1)

    output = X13[:,:,0,:].transpose(0,2,1)


    return (output.reshape((-1,output.shape[2])),last_hidden)

Esempio n. 26

0

Mostra file

def Embedding(name, n_symbols, output_dim, indices):
    vectors = lib.param(
        name,
        numpy.random.randn(n_symbols, output_dim).astype(theano.config.floatX))

    output_shape = tuple(list(indices.shape) + [output_dim])

    return vectors[indices.flatten()].reshape(output_shape)

Esempio n. 27

0

Mostra file

File: my_two_tier.py Progetto: ritheshkumar95/speech

def frame_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, N_FRAMES * FRAME_SIZE)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, N_FRAMES * FRAME_SIZE, DIM)
    """
    batch_size = input_sequences.shape[0]
    n_frames = input_sequences.shape[1] / FRAME_SIZE

    emb = lib.ops.Embedding('SampleLevel.Embedding', Q_LEVELS, Q_LEVELS,
                            input_sequences)

    learned_h0 = lib.param(
        'FrameLevel.h0', numpy.zeros((N_GRUS, DIM),
                                     dtype=theano.config.floatX))

    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    # frames = input_sequences.reshape((
    #     input_sequences.shape[0],
    #     input_sequences.shape[1] / FRAME_SIZE,
    #     FRAME_SIZE
    # ))

    frames = emb.reshape(
        (input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE,
         FRAME_SIZE * Q_LEVELS))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
    # frames *= lib.floatX(2)

    gru1 = lib.ops.GRU('FrameLevel.GRU1',
                       FRAME_SIZE * Q_LEVELS,
                       DIM,
                       frames,
                       h0=h0[:, 0])
    gru2 = lib.ops.GRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1])
    gru3 = lib.ops.GRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2])

    #gru1,gru2,gru3 = lib.ops.myGRU('FrameLevel.GRU', FRAME_SIZE, DIM, frames, h0=h0)

    # gru3.shape = (batch_size,N_FRAMES,DIM)

    output = lib.ops.Dense('FrameLevel.Output',
                           DIM,
                           FRAME_SIZE * DIM,
                           gru3.reshape(
                               (gru3.shape[0] * gru3.shape[1], gru3.shape[2])),
                           init='he')
    output = output.reshape((batch_size, n_frames * FRAME_SIZE, DIM))

    last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1)

    return (output, last_hidden)

Esempio n. 28

0

Mostra file

File: deconv2d.py Progetto: igul222/image_generation

def Deconv2D(
    name, 
    input_dim, 
    output_dim, 
    filter_size, 
    inputs, 
    he_init=True
    ):
    """
    inputs: tensor of shape (batch size, num channels, height, width)
    returns: tensor of shape (batch size, num channels, 2*height, 2*width)
    """
    def uniform(stdev, size):
        return np.random.uniform(
            low=-stdev * np.sqrt(3),
            high=stdev * np.sqrt(3),
            size=size
        ).astype(theano.config.floatX)

    filters_stdev = np.sqrt(1./(input_dim * filter_size**2))
    if he_init:
        filters_stdev *= np.sqrt(2.)

    filters = lib.param(
        name+'.Filters',
        uniform(
            filters_stdev,
            (input_dim, output_dim, filter_size, filter_size)
        )
    )

    biases = lib.param(
        name+'.Biases',
        np.zeros(output_dim, dtype=theano.config.floatX)
    )

    pad = (filter_size-1)/2
    result = _deconv2d(
        inputs, 
        filters, 
        subsample=(2,2),
        border_mode=(pad,pad),
    )
    result = result + biases[None, :, None, None]
    return result

Esempio n. 29

0

Mostra file

def Embedding(name, n_symbols, output_dim, inputs):
    vectors = lib.param(
        name,
        np.random.randn(n_symbols, output_dim).astype(theano.config.floatX))

    output_shape = [inputs.shape[i]
                    for i in xrange(inputs.ndim)] + [output_dim]

    return vectors[inputs.flatten()].reshape(output_shape)

Esempio n. 30

0

Mostra file

File: ops.py Progetto: ritheshkumar95/TTS

def Embedding(name, n_symbols, output_dim, indices):
    vectors = lib.param(
        name,
        initializer('Normal', (n_symbols,output_dim), std=1/np.sqrt(output_dim)).astype(theano.config.floatX)
    )

    output_shape = tuple(list(indices.shape) + [output_dim])

    return vectors[indices.flatten()].reshape(output_shape)

Esempio n. 31

0

Mostra file

File: gru.py Progetto: Faruk-Ahmed/nn

def Recurrent(
    name, 
    hidden_dims, 
    step_fn, 
    inputs, 
    non_sequences=[], 
    h0s=None,
    reset=None
    ):

    if not isinstance(inputs, list):
        inputs = [inputs]

    if not isinstance(hidden_dims, list):
        hidden_dims = [hidden_dims]

    if h0s is None:
        h0s = [None]*len(hidden_dims)

    for i in xrange(len(hidden_dims)):
        if h0s[i] is None:
            h0_unbatched = lib.param(
                name + '.h0_' + str(i),
                np.zeros((hidden_dims[i],), dtype=theano.config.floatX)
            )
            num_batches = inputs[0].shape[1]
            h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])

        h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)

    if reset is not None:
        last_hiddens = []
        for i in xrange(len(h0s)):
            # The shape of last_hidden doesn't matter right now; we assume
            # it won't be used until we put something proper in it.
            last_hidden = theano.shared(
                np.zeros([1]*h0s[i].ndim, dtype=h0s[i].dtype),
                name=name+'.last_hidden_'+str(i)
            )
            last_hiddens.append(last_hidden)
            h0s[i] = theano.ifelse.ifelse(reset, h0s[i], last_hidden)

    outputs, _ = theano.scan(
        step_fn,
        sequences=inputs,
        outputs_info=h0s,
        non_sequences=non_sequences
    )

    if reset is not None:
        if len(last_hiddens) == 1:
            last_hiddens[0].default_update = outputs[-1]
        else:
            for i in xrange(len(last_hiddens)):
                last_hiddens[i].default_update = outputs[i][-1]

    return outputs

Esempio n. 32

0

Mostra file

File: three_tier.py Progetto: fagan2888/speech-1

def big_frame_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE)
    h0.shape:              (batch size, N_BIG_GRUS, BIG_DIM)
    reset.shape:           ()
    output[0].shape:       (batch size, n frames, DIM)
    output[1].shape:       same as h0.shape
    output[2].shape:       (batch size, seq len, Q_LEVELS)
    """

    learned_h0 = lib.param(
        'BigFrameLevel.h0',
        numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX))
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM)
    learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    frames = input_sequences.reshape(
        (input_sequences.shape[0], input_sequences.shape[1] / BIG_FRAME_SIZE,
         BIG_FRAME_SIZE))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    frames = (frames.astype('float32') /
              lib.floatX(Q_LEVELS / 2)) - lib.floatX(1)
    frames *= lib.floatX(2)

    gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0',
                             BIG_FRAME_SIZE,
                             BIG_DIM,
                             frames,
                             h0=h0[:, 0])
    grus = [gru0]
    for i in xrange(1, N_BIG_GRUS):
        gru = lib.ops.LowMemGRU('BigFrameLevel.GRU' + str(i),
                                BIG_DIM,
                                BIG_DIM,
                                grus[-1],
                                h0=h0[:, i])
        grus.append(gru)

    output = lib.ops.Linear('BigFrameLevel.Output', BIG_DIM,
                            DIM * BIG_FRAME_SIZE / FRAME_SIZE, grus[-1])
    output = output.reshape(
        (output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM))

    last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1)

    independent_preds = lib.ops.Linear('BigFrameLevel.IndependentPreds',
                                       BIG_DIM, Q_LEVELS * BIG_FRAME_SIZE,
                                       grus[-1])
    independent_preds = independent_preds.reshape(
        (independent_preds.shape[0],
         independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS))

    return (output, last_hidden, independent_preds)

Esempio n. 33

0

Mostra file

File: gan_2.py Progetto: kundan2510/parrot

def Decoder(latent_var, text_features, name=""):
    dec_name = "Decoder.{}".format(name)

    learned_h0 = lib.param(
        '{}.h0'.format(dec_name),
        numpy.zeros((N_RNN, H0_MULT * DIM), dtype=theano.config.floatX))
    # Handling LEARN_H0
    learned_h0.param = True
    learned_h0 = T.alloc(learned_h0, latent_var.shape[0], N_RNN, H0_MULT * DIM)
    learned_h0 = T.unbroadcast(learned_h0, 0, 1, 2)
    h0 = learned_h0
    latent_var_repeated = T.extra_ops.repeat(latent_var[:, None, :],
                                             text_features.shape[1],
                                             axis=1)
    features = T.concatenate([text_features, latent_var_repeated], axis=2)
    RNN_INPUT_DIM = INPUT_DIM + LATENT_DIM

    if RNN_TYPE == 'LSTM':
        rnns_out, last_hidden = lib.ops.stackedLSTM('{}.LSTM'.format(dec_name),
                                                    N_RNN,
                                                    RNN_INPUT_DIM,
                                                    DIM,
                                                    features,
                                                    h0=h0,
                                                    weightnorm=WEIGHT_NORM,
                                                    skip_conn=SKIP_CONN)
    else:
        rnns_out, last_hidden = lib.ops.stackedGRU('{}.GRU'.format(dec_name),
                                                   N_RNN,
                                                   RNN_INPUT_DIM,
                                                   DIM,
                                                   features,
                                                   h0=h0,
                                                   weightnorm=WEIGHT_NORM,
                                                   skip_conn=SKIP_CONN,
                                                   use_input_every_layer=True)

    output1 = T.nnet.relu(rnns_out)

    output2 = lib.ops.Linear('{}.Output1'.format(dec_name),
                             DIM,
                             DIM,
                             output1,
                             weightnorm=WEIGHT_NORM)

    output3 = T.nnet.relu(output2)

    output = lib.ops.Linear('{}.Output2'.format(dec_name),
                            DIM,
                            OUTPUT_DIM,
                            output3,
                            initialization='he',
                            weightnorm=WEIGHT_NORM)
    return output

Esempio n. 34

0

Mostra file

File: three_tier.py Progetto: fagan2888/speech-1

def frame_level_rnn(input_sequences, other_input, h0, reset):
    """
    input_sequences.shape: (batch size, n frames * FRAME_SIZE)
    other_input.shape:     (batch size, n frames, DIM)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, n frames * FRAME_SIZE, DIM)
    """

    learned_h0 = lib.param(
        'FrameLevel.h0', numpy.zeros((N_GRUS, DIM),
                                     dtype=theano.config.floatX))
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    frames = input_sequences.reshape(
        (input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE,
         FRAME_SIZE))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    frames = (frames.astype('float32') /
              lib.floatX(Q_LEVELS / 2)) - lib.floatX(1)
    frames *= lib.floatX(2)

    gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM,
                               frames) + other_input

    gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0',
                             DIM,
                             DIM,
                             gru_input,
                             h0=h0[:, 0])
    grus = [gru0]
    for i in xrange(1, N_GRUS):
        gru = lib.ops.LowMemGRU('FrameLevel.GRU' + str(i),
                                DIM,
                                DIM,
                                grus[-1],
                                h0=h0[:, i])
        grus.append(gru)

    output = lib.ops.Linear('FrameLevel.Output',
                            DIM,
                            FRAME_SIZE * DIM,
                            grus[-1],
                            initialization='he')
    output = output.reshape(
        (output.shape[0], output.shape[1] * FRAME_SIZE, DIM))

    last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1)

    return (output, last_hidden)

Esempio n. 35

0

Mostra file

File: three_tier.py Progetto: igul222/speech

def big_frame_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE)
    h0.shape:              (batch size, N_BIG_GRUS, BIG_DIM)
    reset.shape:           ()
    output[0].shape:       (batch size, n frames, DIM)
    output[1].shape:       same as h0.shape
    output[2].shape:       (batch size, seq len, Q_LEVELS)
    """

    learned_h0 = lib.param(
        'BigFrameLevel.h0',
        numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX)
    )
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM)
    learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    frames = input_sequences.reshape((
        input_sequences.shape[0],
        input_sequences.shape[1] / BIG_FRAME_SIZE,
        BIG_FRAME_SIZE
    ))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
    frames *= lib.floatX(2)

    gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0])
    grus = [gru0]
    for i in xrange(1, N_BIG_GRUS):
        gru = lib.ops.LowMemGRU('BigFrameLevel.GRU'+str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i])
        grus.append(gru)

    output = lib.ops.Linear(
        'BigFrameLevel.Output', 
        BIG_DIM,
        DIM * BIG_FRAME_SIZE / FRAME_SIZE,
        grus[-1]
    )
    output = output.reshape((output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM))

    last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)

    independent_preds = lib.ops.Linear(
        'BigFrameLevel.IndependentPreds', 
        BIG_DIM,
        Q_LEVELS * BIG_FRAME_SIZE,
        grus[-1]
    )
    independent_preds = independent_preds.reshape((independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS))

    return (output, last_hidden, independent_preds)

Esempio n. 36

0

Mostra file

File: baseline.py Progetto: shubham1310/speech

def sample_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, seq len)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, seq len, DIM)
    """

    if N_GRUS != 3:
        raise Exception('N_GRUS must be 3, at least for now')

    learned_h0 = lib.param(
        'SampleLevel.h0', numpy.zeros((N_GRUS, DIM),
                                      dtype=theano.config.floatX))
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    # Embedded inputs
    #################

    FRAME_SIZE = Q_LEVELS
    frames = lib.ops.Embedding('SampleLevel.Embedding', Q_LEVELS, Q_LEVELS,
                               input_sequences)

    # Real-valued inputs
    ####################

    # # 'frames' of size 1
    # FRAME_SIZE = 1
    # frames = input_sequences.reshape((
    #     input_sequences.shape[0],
    #     input_sequences.shape[1],
    #     1
    # ))
    # # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # # (a reasonable range to pass as inputs to the RNN)
    # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
    # frames *= lib.floatX(2)

    gru1 = lib.ops.LowMemGRU('SampleLevel.GRU1',
                             FRAME_SIZE,
                             DIM,
                             frames,
                             h0=h0[:, 0])
    gru2 = lib.ops.LowMemGRU('SampleLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1])
    gru3 = lib.ops.LowMemGRU('SampleLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2])

    # We apply the softmax later
    output = lib.ops.Linear('Output', DIM, Q_LEVELS, gru3)

    last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1)

    return (output, last_hidden)

Esempio n. 37

0

Mostra file

File: ops.py Progetto: ritheshkumar95/speech

def Embedding(name, n_symbols, output_dim, indices):
    vectors = lib.param(
        name,
        numpy.random.randn(
            n_symbols, 
            output_dim
        ).astype(theano.config.floatX)
    )

    output_shape = tuple(list(indices.shape) + [output_dim])

    return vectors[indices.flatten()].reshape(output_shape)

Esempio n. 38

0

Mostra file

def myGRU(name, input_dim, hidden_dim, inputs, h0=None):
    #inputs.shape = (batch_size,N_FRAMES,FRAME_SIZE)
    inputs = inputs.transpose(1, 0, 2)

    weight_values = init_weights(input_dim + hidden_dim, 2 * hidden_dim)
    W1 = lib.param(name + '.Gates.W', weight_values)

    norm_values = numpy.linalg.norm(weight_values, axis=0)
    norms = lib.param(name + 'Gates.W.g', norm_values)
    n_W1 = W1 * (norms / W1.norm(2, axis=0)).dimshuffle('x', 0)

    b1 = lib.param(name + '.Gates.b',
                   np.ones(2 * hidden_dim).astype(theano.config.floatX))

    weight_values = init_weights(input_dim + hidden_dim, hidden_dim)
    W2 = lib.param(name + '.Candidate.W', weight_values)

    norm_values = numpy.linalg.norm(weight_values, axis=0)
    norms = lib.param(name + 'Candidate.W.g', norm_values)
    n_W2 = W2 * (norms / W2.norm(2, axis=0)).dimshuffle('x', 0)

    b2 = lib.param(name + '.Candidate.b',
                   np.zeros(hidden_dim).astype(theano.config.floatX))

    outputs, _ = theano.scan(recurrent_fn,
                             sequences=[inputs],
                             outputs_info=[h0],
                             non_sequences=[hidden_dim, n_W1, b1, n_W2, b2])

    out = outputs.dimshuffle(1, 0, 2)
    out.name = name + '.output'
    return out

Esempio n. 39

0

Mostra file

def Conv1D(name,
           input_dim,
           output_dim,
           filter_size,
           inputs,
           apply_biases=True):
    """
    inputs.shape: (batch size, height, input_dim)
    output.shape: (batch size, height, output_dim)
    * performs valid convs
    """
    def uniform(stdev, size):
        """uniform distribution with the given stdev and size"""
        return numpy.random.uniform(low=-stdev * numpy.sqrt(3),
                                    high=stdev * numpy.sqrt(3),
                                    size=size).astype(theano.config.floatX)

    filters = lib.param(
        name + '.Filters',
        uniform(
            1. / numpy.sqrt(input_dim * filter_size),
            # output dim, input dim, height, width
            (output_dim, input_dim, filter_size, 1)))

    # conv2d takes inputs as (batch size, input channels, height[?], width[?])
    inputs = inputs.reshape(
        (inputs.shape[0], inputs.shape[1], 1, inputs.shape[2]))
    inputs = inputs.dimshuffle(0, 3, 1, 2)
    result = T.nnet.conv2d(inputs,
                           filters,
                           border_mode='valid',
                           filter_flip=False)

    if apply_biases:
        biases = lib.param(name + '.Biases',
                           numpy.zeros(output_dim, dtype=theano.config.floatX))
        result = result + biases[None, :, None, None]

    result = result.dimshuffle(0, 2, 3, 1)
    return result.reshape((result.shape[0], result.shape[1], result.shape[3]))

Esempio n. 40

0

Mostra file

File: ops.py Progetto: ritheshkumar95/speech

def Dense(name, input_dim, output_dim, inputs, bias=True, init=None, weightnorm=True,hidden_dim=None):

    weight_values = init_weights(input_dim,output_dim,init)

    weight = lib.param(
        name + '.W',
        weight_values
    )

    batch_size = None
    if inputs.ndim==3:
        batch_size = inputs.shape[0]
        inputs = inputs.reshape((-1,input_dim))

    if weightnorm:
        norm_values = numpy.linalg.norm(weight_values, axis=0)
        norms = lib.param(
            name + '.g',
            norm_values
        )

        normed_weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle('x', 0)
        result = T.dot(inputs, normed_weight)

    else:        
        result = T.dot(inputs, weight)

    if bias:
        b = lib.param(
            name + '.b',
            numpy.zeros((output_dim,), dtype=theano.config.floatX)
        )
        result += b

    result.name = name+".output"
    if batch_size!=None:
        return result.reshape((batch_size,hidden_dim,output_dim))
    else:
        return result

Esempio n. 41

0

Mostra file

def Dense(name,
          input_dim,
          output_dim,
          inputs,
          bias=True,
          init=None,
          weightnorm=True,
          hidden_dim=None):

    weight_values = init_weights(input_dim, output_dim, init)

    weight = lib.param(name + '.W', weight_values)

    batch_size = None
    if inputs.ndim == 3:
        batch_size = inputs.shape[0]
        inputs = inputs.reshape((-1, input_dim))

    if weightnorm:
        norm_values = numpy.linalg.norm(weight_values, axis=0)
        norms = lib.param(name + '.g', norm_values)

        normed_weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle(
            'x', 0)
        result = T.dot(inputs, normed_weight)

    else:
        result = T.dot(inputs, weight)

    if bias:
        b = lib.param(name + '.b',
                      numpy.zeros((output_dim, ), dtype=theano.config.floatX))
        result += b

    result.name = name + ".output"
    if batch_size != None:
        return result.reshape((batch_size, hidden_dim, output_dim))
    else:
        return result

Esempio n. 42

0

Mostra file

File: gru.py Progetto: yobajnin/nn

def Recurrent(name,
              hidden_dims,
              step_fn,
              inputs,
              non_sequences=[],
              h0s=None,
              reset=None):

    if not isinstance(inputs, list):
        inputs = [inputs]

    if not isinstance(hidden_dims, list):
        hidden_dims = [hidden_dims]

    if h0s is None:
        h0s = [None] * len(hidden_dims)

    for i in xrange(len(hidden_dims)):
        if h0s[i] is None:
            h0_unbatched = lib.param(
                name + '.h0_' + str(i),
                np.zeros((hidden_dims[i], ), dtype=theano.config.floatX))
            num_batches = inputs[0].shape[1]
            h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])

        h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)

    if reset is not None:
        last_hiddens = []
        for i in xrange(len(h0s)):
            # The shape of last_hidden doesn't matter right now; we assume
            # it won't be used until we put something proper in it.
            last_hidden = theano.shared(np.zeros([1] * h0s[i].ndim,
                                                 dtype=h0s[i].dtype),
                                        name=name + '.last_hidden_' + str(i))
            last_hiddens.append(last_hidden)
            h0s[i] = theano.ifelse.ifelse(reset, h0s[i], last_hidden)

    outputs, _ = theano.scan(step_fn,
                             sequences=inputs,
                             outputs_info=h0s,
                             non_sequences=non_sequences)

    if reset is not None:
        if len(last_hiddens) == 1:
            last_hiddens[0].default_update = outputs[-1]
        else:
            for i in xrange(len(last_hiddens)):
                last_hiddens[i].default_update = outputs[i][-1]

    return outputs

Esempio n. 43

0

Mostra file

File: pixel_rnn.py Progetto: igul222/pixel_rnn

def Conv1D(name, input_dim, output_dim, filter_size, inputs, apply_biases=True):
    """
    inputs.shape: (batch size, height, input_dim)
    output.shape: (batch size, height, output_dim)
    * performs valid convs
    """
    def uniform(stdev, size):
        """uniform distribution with the given stdev and size"""
        return numpy.random.uniform(
            low=-stdev * numpy.sqrt(3),
            high=stdev * numpy.sqrt(3),
            size=size
        ).astype(theano.config.floatX)

    filters = lib.param(
        name+'.Filters',
        uniform(
            1./numpy.sqrt(input_dim * filter_size),
            # output dim, input dim, height, width
            (output_dim, input_dim, filter_size, 1)
        )
    )

    # conv2d takes inputs as (batch size, input channels, height[?], width[?])
    inputs = inputs.reshape((inputs.shape[0], inputs.shape[1], 1, inputs.shape[2]))
    inputs = inputs.dimshuffle(0, 3, 1, 2)
    result = T.nnet.conv2d(inputs, filters, border_mode='valid', filter_flip=False)

    if apply_biases:
        biases = lib.param(
            name+'.Biases',
            numpy.zeros(output_dim, dtype=theano.config.floatX)
        )
        result = result + biases[None, :, None, None]

    result = result.dimshuffle(0, 2, 3, 1)
    return result.reshape((result.shape[0], result.shape[1], result.shape[3]))

Esempio n. 44

0

Mostra file

File: baseline.py Progetto: kastnerkyle/speech

def sample_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, seq len)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, seq len, DIM)
    """

    if N_GRUS != 3:
        raise Exception('N_GRUS must be 3, at least for now')

    learned_h0 = lib.param(
        'FrameLevel.h0',
        numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
    )
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    # Embedded inputs
    #################

    FRAME_SIZE = Q_LEVELS
    frames = lib.ops.Embedding('SampleLevel.Embedding', Q_LEVELS, Q_LEVELS, input_sequences)

    # Real-valued inputs
    ####################

    # # 'frames' of size 1
    # FRAME_SIZE = 1
    # frames = input_sequences.reshape((
    #     input_sequences.shape[0],
    #     input_sequences.shape[1],
    #     1
    # ))
    # # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # # (a reasonable range to pass as inputs to the RNN)
    # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
    # frames *= lib.floatX(2)

    gru1 = lib.ops.LowMemGRU('SampleLevel.GRU1', FRAME_SIZE, DIM, frames, h0=h0[:, 0])
    gru2 = lib.ops.LowMemGRU('SampleLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1])
    gru3 = lib.ops.LowMemGRU('SampleLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2])

    # We apply the softmax later
    output = lib.ops.Linear('Output', DIM, Q_LEVELS, gru3)

    last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1)

    return (output, last_hidden)

Esempio n. 45

0

Mostra file

File: embedding.py Progetto: Faruk-Ahmed/nn

def Embedding(name, n_symbols, output_dim, inputs):
    vectors = lib.param(
        name,
        np.random.randn(
            n_symbols, 
            output_dim
        ).astype(theano.config.floatX)
    )

    output_shape = [
        inputs.shape[i]
        for i in xrange(inputs.ndim)
    ] + [output_dim]

    return vectors[inputs.flatten()].reshape(output_shape)

Esempio n. 46

0

Mostra file

File: three_tier.py Progetto: igul222/speech

def frame_level_rnn(input_sequences, other_input, h0, reset):
    """
    input_sequences.shape: (batch size, n frames * FRAME_SIZE)
    other_input.shape:     (batch size, n frames, DIM)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, n frames * FRAME_SIZE, DIM)
    """

    learned_h0 = lib.param(
        'FrameLevel.h0',
        numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
    )
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    frames = input_sequences.reshape((
        input_sequences.shape[0],
        input_sequences.shape[1] / FRAME_SIZE,
        FRAME_SIZE
    ))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
    frames *= lib.floatX(2)

    gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM, frames) + other_input

    gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', DIM, DIM, gru_input, h0=h0[:, 0])
    grus = [gru0]
    for i in xrange(1, N_GRUS):
        gru = lib.ops.LowMemGRU('FrameLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
        grus.append(gru)

    output = lib.ops.Linear(
        'FrameLevel.Output', 
        DIM,
        FRAME_SIZE * DIM,
        grus[-1],
        initialization='he'
    )
    output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM))

    last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)

    return (output, last_hidden)

Esempio n. 47

0

Mostra file

File: two_tier.py Progetto: kastnerkyle/speech

def frame_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, n frames * FRAME_SIZE)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, n frames * FRAME_SIZE, DIM)
    """

    if N_GRUS != 3:
        raise Exception('N_GRUS must be 3, at least for now')

    learned_h0 = lib.param(
        'FrameLevel.h0',
        numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
    )
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    frames = input_sequences.reshape((
        input_sequences.shape[0],
        input_sequences.shape[1] / FRAME_SIZE,
        FRAME_SIZE
    ))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
    frames *= lib.floatX(2)

    gru1 = lib.ops.LowMemGRU('FrameLevel.GRU1', FRAME_SIZE, DIM, frames, h0=h0[:, 0])
    gru2 = lib.ops.LowMemGRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1])
    gru3 = lib.ops.LowMemGRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2])

    output = lib.ops.Linear(
        'FrameLevel.Output', 
        DIM,
        FRAME_SIZE * DIM,
        gru3,
        initialization='he'
    )
    output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM))

    last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1)

    return (output, last_hidden)

Esempio n. 48

0

Mostra file

File: ops.py Progetto: ritheshkumar95/speech

def myGRU(name, input_dim, hidden_dim, inputs, h0=None):
    #inputs.shape = (batch_size,N_FRAMES,FRAME_SIZE)
    inputs = inputs.transpose(1,0,2)

    weight_values = init_weights(input_dim+hidden_dim,2*hidden_dim)
    W1 = lib.param(
        name+'.Gates.W',
        weight_values
    )

    norm_values = numpy.linalg.norm(weight_values, axis=0)
    norms = lib.param(
        name + 'Gates.W.g',
        norm_values
    )
    n_W1 = W1 * (norms / W1.norm(2, axis=0)).dimshuffle('x', 0)

    b1 = lib.param(
        name+'.Gates.b',
        np.ones(2*hidden_dim).astype(theano.config.floatX)
        )

    weight_values = init_weights(input_dim+hidden_dim,hidden_dim)
    W2 = lib.param(
        name+'.Candidate.W',
        weight_values
    )

    norm_values = numpy.linalg.norm(weight_values, axis=0)
    norms = lib.param(
        name + 'Candidate.W.g',
        norm_values
    )
    n_W2 = W2 * (norms / W2.norm(2, axis=0)).dimshuffle('x', 0)

    b2 = lib.param(
        name+'.Candidate.b',
        np.zeros(hidden_dim).astype(theano.config.floatX)
        )

    outputs, _ = theano.scan(
        recurrent_fn,
        sequences=[inputs],
        outputs_info=[h0],
        non_sequences=[hidden_dim,n_W1,b1,n_W2,b2]
    )

    out = outputs.dimshuffle(1,0,2)
    out.name = name+'.output'
    return out

Esempio n. 49

0

Mostra file

File: conv2d.py Progetto: Faruk-Ahmed/nn

def Conv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True):
    """
    inputs: tensor of shape (batch size, num channels, height, width)
    mask_type: one of None, 'a', 'b'

    returns: tensor of shape (batch size, num channels, height, width)
    """
    if mask_type is not None:
        mask_type, mask_n_channels = mask_type

    def uniform(stdev, size):
        return np.random.uniform(
            low=-stdev * np.sqrt(3),
            high=stdev * np.sqrt(3),
            size=size
        ).astype(theano.config.floatX)

    fan_in = input_dim * filter_size**2
    fan_out = output_dim * filter_size**2
    # TOOD: shouldn't fan_out be divided by stride

    if mask_type is not None: # only approximately correct
        fan_in /= 2.
        fan_out /= 2.

    if he_init:
        filters_stdev = np.sqrt(4./(fan_in+fan_out))
    else: # Normalized init (Glorot & Bengio)
        filters_stdev = np.sqrt(2./(fan_in+fan_out))

    filter_values = uniform(
        filters_stdev,
        (output_dim, input_dim, filter_size, filter_size)
    )

    filters = lib.param(name+'.Filters', filter_values)

    if weightnorm==None:
        weightnorm = _default_weightnorm
    if weightnorm:
        norm_values = np.linalg.norm(filter_values.reshape((filter_values.shape[0], -1)), axis=1)
        norms = lib.param(
            name + '.g',
            norm_values
        )
        filters = filters * (norms / filters.reshape((filters.shape[0],-1)).norm(2, axis=1)).dimshuffle(0,'x','x','x')

    if mask_type is not None:
        mask = np.ones(
            (output_dim, input_dim, filter_size, filter_size), 
            dtype=theano.config.floatX
        )
        center = filter_size // 2

        # Mask out future locations
        # filter shape is (out_channels, in_channels, height, width)
        mask[:, :, center+1:, :] = 0.
        mask[:, :, center, center+1:] = 0.

        # Mask out future channels
        for i in xrange(mask_n_channels):
            for j in xrange(mask_n_channels):
                if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j):
                    mask[
                        j::mask_n_channels,
                        i::mask_n_channels,
                        center,
                        center
                    ] = 0.

        filters = filters * mask

    if biases:
        _biases = lib.param(
            name+'.Biases',
            np.zeros(output_dim, dtype=theano.config.floatX)
        )

    result = T.nnet.conv2d(
        inputs, 
        filters, 
        border_mode='half',
        filter_flip=False,
        subsample=(stride,stride)
    )

    if biases:
        result = result + _biases[None, :, None, None]
    # result = lib.debug.print_stats(name, result)
    return result

Esempio n. 50

0

Mostra file

File: ops.py Progetto: sotelo/parrot

def Batchnorm(
    name,
    input_dim,
    inputs,
    stepwise=False,
    axes=None,
    wrt=None,
    i_gamma=None,
    i_beta=None):
    """
    From Ishaan's repo
    """
    if wrt is None:
        wrt = inputs

    if axes is not None:
        means = wrt.mean(axis=axes, keepdims=True)
        variances = wrt.var(axis=axes, keepdims=True)
    # elif stepwise:
    #     means = wrt.mean(axis=1, keepdims=True)
    #     variances = wrt.var(axis=1, keepdims=True)
    else:
        means = wrt.reshape((-1, input_dim)).mean(axis=0)
        variances = wrt.reshape((-1, input_dim)).var(axis=0)

    if i_gamma is None:
        i_gamma = lib.floatX(0.1) * numpy.ones(input_dim, dtype=theano.config.floatX)

    if i_beta is None:
        i_beta = numpy.zeros(input_dim, dtype=theano.config.floatX)

    gamma = lib.param(
        name + '.gamma',
        i_gamma
    )

    beta = lib.param(
        name + '.beta',
        i_beta
    )

    stdevs = T.sqrt(variances + lib.floatX(1e-6))

    stdevs.name = name+'.stdevs'
    means.name = name+'.means'

    # return (((inputs - means) / stdevs) * gamma) + beta
    if axes is not None:
        dimshuffle_pattern = [
            'x' if i in axes else 0
            for i in xrange(inputs.ndim)
        ]
        return T.nnet.bn.batch_normalization(
            inputs,
            gamma.dimshuffle(*dimshuffle_pattern),
            beta.dimshuffle(*dimshuffle_pattern),
            means,
            stdevs,
            mode='low_mem'
        )
    else:
        return T.nnet.bn.batch_normalization(
            inputs,
            gamma.dimshuffle('x',0),
            beta.dimshuffle('x',0),
            means.dimshuffle('x',0),
            stdevs.dimshuffle('x',0),
            mode='low_mem'
        )

Esempio n. 51

0

Mostra file

File: diagonal_bilstm.py Progetto: igul222/image_generation

def DiagonalLSTM(name, input_dim, output_dim, input_shape, inputs):
    """
    inputs_shape: (n_channels, height, width)
    inputs.shape: (batch size, input_dim, height, width)
    outputs.shape: (batch size, output_dim, height, width)
    """
    n_channels, height, width = input_shape

    inputs = _skew(height, width, inputs)

    # TODO benchmark running skew after input_to_state, might be faster
    input_to_state = lib.ops.conv2d.Conv2D(
        name+'.InputToState', 
        input_dim, 
        4*output_dim, 
        1, inputs, 
        mask_type=('b', n_channels), 
        he_init=False
    )

    batch_size = inputs.shape[0]

    c0_unbatched = lib.param(
        name + '.c0',
        np.zeros((output_dim, height), dtype=theano.config.floatX)
    )
    c0 = T.alloc(c0_unbatched, batch_size, output_dim, height)

    h0_unbatched = lib.param(
        name + '.h0',
        np.zeros((output_dim, height), dtype=theano.config.floatX)
    )
    h0 = T.alloc(h0_unbatched, batch_size, output_dim, height)

    def step_fn(current_input_to_state, prev_c, prev_h):
        # all args have shape (batch size, output_dim, height)

        # TODO consider learning this padding
        prev_h_padded = T.zeros((batch_size, output_dim, 1+height), dtype=theano.config.floatX)
        prev_h_padded = T.inc_subtensor(prev_h_padded[:,:,1:], prev_h)

        state_to_state = lib.ops.conv1d.Conv1D(
            name+'.StateToState', 
            output_dim, 
            4*output_dim, 
            2, 
            prev_h_padded, 
            biases=False
        )

        gates = current_input_to_state + state_to_state

        o_f_i = T.nnet.sigmoid(gates[:,:3*output_dim,:])
        o = o_f_i[:,0*output_dim:1*output_dim,:]
        f = o_f_i[:,1*output_dim:2*output_dim,:]
        i = o_f_i[:,2*output_dim:3*output_dim,:]
        g = T.tanh(gates[:,3*output_dim:4*output_dim,:])

        new_c = (f * prev_c) + (i * g)
        new_h = o * T.tanh(new_c)

        return (new_c, new_h)

    outputs, _ = theano.scan(
        step_fn,
        sequences=input_to_state.dimshuffle(3,0,1,2),
        outputs_info=[c0, h0]
    )
    all_cs = outputs[0].dimshuffle(1,2,3,0)
    all_hs = outputs[1].dimshuffle(1,2,3,0)

    return _unskew(height, width, all_hs)

Esempio n. 52

0

Mostra file

File: ops.py Progetto: sotelo/parrot

def Linear(
        name,
        input_dims,
        output_dim,
        inputs,
        biases=True,
        initialization=None,
        weightnorm=True,
        just_params=False):
    """
    Compute a linear transform of one or more inputs, optionally with a bias.

    :parameters:
        input_dims: list of ints, or int (if single input); the dimensionality of
                    the input(s).
        output_dim: the dimensionality of the output.
        biases:     whether or not to include a bias term.
        inputs:     a theano variable, or list of variables (if multiple inputs);
                    the inputs to which to apply the transform.
        initialization: one of None, `lecun`, `glorot`, `he`, `glorot_he`, `orthogonal`

    :todo:
        - get arbitrary numpy array as initialization. Check the dims as well.
    """
    if not isinstance(input_dims, list):
        input_dims = [input_dims]
        inputs = [inputs]

    terms = []
    params = []

    for i, (inp, inp_dim) in enumerate(zip(inputs, input_dims)):
        if isinstance(initialization, numpy.ndarray):
            weight_values = initialization
            assert weight_values.shape == (inp_dim, output_dim),\
                'Expecting an ndarray with shape ({}, {}) but got {}'.\
                format(inp_dim, output_dim, initialization.shape)
        elif initialization == 'lecun' or (initialization == None and inp_dim != output_dim):
            weight_values = uniform(numpy.sqrt(1. / inp_dim), (inp_dim, output_dim))
        elif initialization == 'glorot':
            weight_values = uniform(numpy.sqrt(2./(inp_dim+output_dim)), (inp_dim, output_dim))
        elif initialization == 'he':
            weight_values = uniform(numpy.sqrt(2. / inp_dim), (inp_dim, output_dim))
        elif initialization == 'glorot_he':
            weight_values = uniform(numpy.sqrt(4./(inp_dim+output_dim)), (inp_dim, output_dim))
        elif initialization == 'orthogonal' or (initialization == None and inp_dim == output_dim):
            # From lasagne
            def sample(shape):
                if len(shape) < 2:
                    raise RuntimeError("Only shapes of length 2 or more are supported.")
                flat_shape = (shape[0], numpy.prod(shape[1:]))
                # TODO: why normal and not uniform?
                a = numpy.random.normal(0.0, 1.0, flat_shape)
                u, _, v = numpy.linalg.svd(a, full_matrices=False)
                # pick the one with the correct shape
                q = u if u.shape == flat_shape else v
                q = q.reshape(shape)
                return q.astype(theano.config.floatX)
            weight_values = sample((inp_dim, output_dim))
        else:
            raise Exception("Invalid initialization ({})!"\
                    .format(repr(initialization)))

        weight = lib.param(
            name + '.W'+str(i),
            weight_values
        )
        params.append(weight)

        if weightnorm:
            norm_values = numpy.linalg.norm(weight_values, axis=0)
            norms = lib.param(
                name + '.g'+str(i),
                norm_values
            )
            params.append(norms)

            normed_weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle('x', 0)
            prepared_weight = normed_weight
        else:
            prepared_weight = weight
        terms.append(T.dot(inp, prepared_weight))

    if biases:
        layer_biases = lib.param(
            name + '.b',
            numpy.zeros((output_dim,), dtype=theano.config.floatX)
        )
        params.append(layer_biases)
        terms.append(layer_biases)

    if just_params:
        return params
    # otherwise, comlete/add to the computation graph
    out = reduce(lambda a,b: a+b, terms)
    out.name = name + '.output'
    return out

Esempio n. 53

0

Mostra file

File: ops.py Progetto: sotelo/parrot

def __LSTMStep(
        name,
        input_dim,
        hidden_dim,
        current_input,
        last_hidden,
        weightnorm=True,
        inp_bias_init=0.,
        forget_bias_init=3.,
        out_bias_init=0.,
        g_bias_init=0.):
    """
    CAUTION:
        Not for stand-alone usage. It is defined here (instead of
        inside LSTM function) to not clutter the code.

    Gates:
        i = sigm(X_t*U^i + S_{t-1}*W^i + b^i)
        f = sigm(X_t*U^f + S_{t-1}*W^f + b^f)
        o = sigm(X_t*U^o + S_{t-1}*W^o + b^o)
    Candidate/internal mempry/cell state and hidden state:
        g = tanh(X_t*U^g + S_{t-1}*W^g + b^g)
        c_t = c_{t-1}.f + g.i
    State:
        S_t = tanh(c_t).o
    last_hidden:
        dim: (2*hidden_dim)
        S_{t-1} = last_hidden[:hidden_dim]
        c_{t-1} = last_hidden[hidden_dim:]

    Note:
        Forget gate bias initalizations with large positive values (1. to 5.)
        is shown to be beneficial for learning an/or modeling long-term
        dependencies.
        sigmoid([0., 1., 2., 3., 5.]) = [.5, .73, .88, 95., .99]
    See:
        http://www.felixgers.de/papers/phd.pdf
        http://jmlr.org/proceedings/papers/v37/jozefowicz15.pdf

    :todo:
        - Better initializations, especially for the weight matrices.
        - Fix the 'concatenation' to use instead of T.concatention
    """
    # X_t*(U^i, U^f, U^o, U^g)
    processed_input = lib.ops.Linear(
        name+'.Input',
        input_dim,
        4 * hidden_dim,
        current_input,
        biases=False,
        weightnorm=weightnorm
    )

    # last_hidden is [batch size, S_{t-1};c_{t-1}]
    s_tm1 = last_hidden[:, :hidden_dim]
    c_tm1 = last_hidden[:, hidden_dim:]
    # S_{t-1}*(W^i, W^f, W^o, W^g)
    processed_last_hidden = lib.ops.Linear(
        name+'.Recurrent_Gates',
        hidden_dim,
        4 * hidden_dim,
        s_tm1,
        biases=False,
        weightnorm=weightnorm
    )

    # All the fancy bias initialization: b^i, b^f, b^o, b^g
    gate_bias_inits = numpy.zeros((4*hidden_dim,), dtype=theano.config.floatX)
    gate_bias_inits[:hidden_dim]               = inp_bias_init
    gate_bias_inits[hidden_dim:2*hidden_dim]   = forget_bias_init
    gate_bias_inits[2*hidden_dim:3*hidden_dim] = out_bias_init
    gate_bias_inits[3*hidden_dim:]             = g_bias_init
    biases = lib.param(name + '.b', gate_bias_inits)

    pre_gates  = processed_input + processed_last_hidden  # 4*dim
    pre_gates += biases  # 4*dim
    gates      = T.nnet.sigmoid(pre_gates[:, :3*hidden_dim])  # 3*dim

    inp    = gates[:, :hidden_dim]  # dim
    forget = gates[:, hidden_dim:2*hidden_dim]  # dim
    out    = gates[:, 2*hidden_dim:]  # dim

    g = T.tanh(pre_gates[:, 3*hidden_dim:])  # dim

    # internal memory/cell state
    c_t = c_tm1 * forget + g * inp  # dim
    # hidden state
    s_t = T.tanh(c_t) * out  # dim
    # TODO: Again, problem with concatenating tensors with (False, False)
    # broadcast pattern. If slow down as a result of transferring to CPU for
    # concatenation is not high, keep it this way.
    hidden_state = T.concatenate([s_t, c_t], axis=-1) # 2*dim, axis=1
    return hidden_state

Esempio n. 54

0

Mostra file

File: linear.py Progetto: Faruk-Ahmed/nn

def Linear(
        name, 
        input_dim, 
        output_dim, 
        inputs,
        biases=True,
        initialization=None,
        weightnorm=None
        ):
    """
    initialization: None, `lecun`, `he`, `orthogonal`, `("uniform", range)`
    """

    def uniform(stdev, size):
        return np.random.uniform(
            low=-stdev * np.sqrt(3),
            high=stdev * np.sqrt(3),
            size=size
        ).astype(theano.config.floatX)

    if initialization == 'lecun' or \
        (initialization == None and input_dim != output_dim):

        weight_values = uniform(np.sqrt(1./input_dim), (input_dim, output_dim))

    elif initialization == 'glorot':

        weight_values = uniform(np.sqrt(2./(input_dim+output_dim)), (input_dim, output_dim))

    elif initialization == 'he':

        weight_values = uniform(np.sqrt(2./input_dim), (input_dim, output_dim))

    elif initialization == 'glorot_he':

        weight_values = uniform(np.sqrt(4./(input_dim+output_dim)), (input_dim, output_dim))

    elif initialization == 'orthogonal' or \
        (initialization == None and input_dim == output_dim):
        
        # From lasagne
        def sample(shape):
            if len(shape) < 2:
                raise RuntimeError("Only shapes of length 2 or more are "
                                   "supported.")
            flat_shape = (shape[0], np.prod(shape[1:]))
             # TODO: why normal and not uniform?
            a = np.random.normal(0.0, 1.0, flat_shape)
            u, _, v = np.linalg.svd(a, full_matrices=False)
            # pick the one with the correct shape
            q = u if u.shape == flat_shape else v
            q = q.reshape(shape)
            return q.astype(theano.config.floatX)
        weight_values = sample((input_dim, output_dim))
    
    elif initialization[0] == 'uniform':
    
        weight_values = np.random.uniform(
            low=-initialization[1],
            high=initialization[1],
            size=(input_dim, output_dim)
        ).astype(theano.config.floatX)

    else:
        raise Exception("Invalid initialization!")

    weight = lib.param(
        name + '.W',
        weight_values
    )

    if weightnorm==None:
        weightnorm = _default_weightnorm
    if weightnorm:
        norm_values = np.linalg.norm(weight_values, axis=0)
        norms = lib.param(
            name + '.g',
            norm_values
        )

        weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle('x', 0)

    result = T.dot(inputs, weight)

    if biases:
        result = result + lib.param(
            name + '.b',
            np.zeros((output_dim,), dtype=theano.config.floatX)
        )

    # result = lib.debug.print_stats(name, result)
    return result

Esempio n. 55

0

Mostra file

File: my_two_tier.py Progetto: ritheshkumar95/speech

def frame_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, N_FRAMES * FRAME_SIZE)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, N_FRAMES * FRAME_SIZE, DIM)
    """
    batch_size = input_sequences.shape[0]
    n_frames = input_sequences.shape[1]/FRAME_SIZE

    emb = lib.ops.Embedding(
        'SampleLevel.Embedding',
        Q_LEVELS,
        Q_LEVELS,
        input_sequences
    )


    learned_h0 = lib.param(
        'FrameLevel.h0',
        numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
    )

    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    # frames = input_sequences.reshape((
    #     input_sequences.shape[0],
    #     input_sequences.shape[1] / FRAME_SIZE,
    #     FRAME_SIZE
    # ))

    frames = emb.reshape((
        input_sequences.shape[0],
        input_sequences.shape[1] / FRAME_SIZE,
        FRAME_SIZE*Q_LEVELS
    ))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
    # frames *= lib.floatX(2)

    gru1 = lib.ops.GRU('FrameLevel.GRU1', FRAME_SIZE*Q_LEVELS, DIM, frames, h0=h0[:, 0])
    gru2 = lib.ops.GRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1])
    gru3 = lib.ops.GRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2])

    #gru1,gru2,gru3 = lib.ops.myGRU('FrameLevel.GRU', FRAME_SIZE, DIM, frames, h0=h0)

    # gru3.shape = (batch_size,N_FRAMES,DIM)

    output = lib.ops.Dense(
        'FrameLevel.Output', 
        DIM,
        FRAME_SIZE * DIM,
        gru3.reshape((gru3.shape[0]*gru3.shape[1],gru3.shape[2])),
        init='he'
    )
    output = output.reshape((batch_size, n_frames * FRAME_SIZE, DIM))

    last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1)

    return (output, last_hidden)

Esempio n. 56

0

Mostra file

File: one_tier.py Progetto: adrianEVI/sampleRNN_ICLR2017

def sample_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, seq len)
    h0.shape:              (batch size, N_RNN, DIM)
    reset.shape:           ()
    output.shape:          (batch size, seq len, DIM)
    """

    # Embedded inputs
    # Handling EMB_SIZE
    #################
    FRAME_SIZE = EMB_SIZE
    frames = lib.ops.Embedding(
        'SampleLevel.Embedding',
        Q_LEVELS,
        EMB_SIZE,
        input_sequences)

    # Real-valued inputs
    ####################
    # # 'frames' of size 1
    # FRAME_SIZE = 1
    # frames = input_sequences.reshape((
    #     input_sequences.shape[0],
    #     input_sequences.shape[1],
    #     1
    # ))
    # # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # # (a reasonable range to pass as inputs to the RNN)
    # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
    # frames *= lib.floatX(2)

    # Initial state of RNNs
    learned_h0 = lib.param(
        'SampleLevel.h0',
        numpy.zeros((N_RNN, H0_MULT*DIM), dtype=theano.config.floatX)
    )
    # Handling LEARN_H0
    learned_h0.param = LEARN_H0
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_RNN, H0_MULT*DIM)
    learned_h0 = T.unbroadcast(learned_h0, 0, 1, 2)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    # Handling RNN_TYPE
    # Handling SKIP_CONN
    if RNN_TYPE == 'GRU':
        rnns_out, last_hidden = lib.ops.stackedGRU('SampleLevel.GRU',
                                                   N_RNN,
                                                   FRAME_SIZE,
                                                   DIM,
                                                   frames,
                                                   h0=h0,
                                                   weightnorm=WEIGHT_NORM,
                                                   skip_conn=SKIP_CONN)
    elif RNN_TYPE == 'LSTM':
        rnns_out, last_hidden = lib.ops.stackedLSTM('SampleLevel.LSTM',
                                                    N_RNN,
                                                    FRAME_SIZE,
                                                    DIM,
                                                    frames,
                                                    h0=h0,
                                                    weightnorm=WEIGHT_NORM,
                                                    skip_conn=SKIP_CONN)

    out = lib.ops.Linear(
        'SampleLevel.L1',
        DIM,
        DIM,
        rnns_out,
        initialization='he',
        weightnorm=WEIGHT_NORM
    )
    out = T.nnet.relu(out)

    out = lib.ops.Linear(
        'SampleLevel.L2',
        DIM,
        DIM,
        out,
        initialization='he',
        weightnorm=WEIGHT_NORM
    )
    out = T.nnet.relu(out)

    out = lib.ops.Linear(
        'SampleLevel.L3',
        DIM,
        DIM,
        out,
        initialization='he',
        weightnorm=WEIGHT_NORM
    )
    out = T.nnet.relu(out)

    # We apply the softmax later
    out = lib.ops.Linear(
        'SampleLevel.Output',
        DIM,
        Q_LEVELS,
        out,
        initialization='he',
        weightnorm=WEIGHT_NORM
    )

    return (out, last_hidden)

Esempio n. 57

0

Mostra file

File: conv.py Progetto: igul222/speech

def MaskedConv1D(name, input_dim, output_dim, filter_size, inputs, mask_type=None, he_init=False):
    """
    inputs.shape: (batch size, input_dim, 1, width)
    mask_type: None, 'a', 'b'
    output.shape: (batch size, output_dim, 1, width)
    """

    if mask_type is not None:
        mask = numpy.ones(
            (output_dim, input_dim, 1, filter_size), 
            dtype=theano.config.floatX
        )
        center = filter_size//2
        mask[:,:,0,center+1:] = 0.
        if mask_type == 'a':
            mask[:,:,0,center] = 0.

    def uniform(stdev, size):
        """uniform distribution with the given stdev and size"""
        return numpy.random.uniform(
            low=-stdev * numpy.sqrt(3),
            high=stdev * numpy.sqrt(3),
            size=size
        ).astype(theano.config.floatX)

    if mask_type=='a':
        n_in = filter_size//2
    elif mask_type=='b':
        n_in = filter_size//2 + 1
    else:
        n_in = filter_size
    n_in *= input_dim

    if he_init:
        init_stdev = numpy.sqrt(2./n_in)
    else:
        init_stdev = numpy.sqrt(1./n_in)

    filters = lib.param(
        name+'.Filters',
        uniform(
            init_stdev,
            (output_dim, input_dim, 1, filter_size)
        )
    )

    if mask_type is not None:
        filters = filters * mask

    # TODO benchmark against the lasagne 'conv1d' implementations
    result = T.nnet.conv2d(inputs, filters, filter_flip=False, border_mode='half')

    if mask_type is not None:
        result = result[:, :, :, :inputs.shape[3]]

    biases = lib.param(
        name+'.Biases',
        numpy.zeros(output_dim, dtype=theano.config.floatX)
    )
    result += biases[None, :, None, None]

    return result

Esempio n. 58

0

Mostra file

File: conv_two_tier.py Progetto: ritheshkumar95/speech

def encoder_decoder(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, N_FRAMES * FRAME_SIZE)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, N_FRAMES * FRAME_SIZE, DIM)
    """
    batch_size = input_sequences.shape[0]
    n_frames = input_sequences.shape[1]/FRAME_SIZE

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    
    X1 = ((input_sequences.astype(theano.config.floatX)/lib.floatX(Q_LEVELS/2)) - lib.floatX(1))*lib.floatX(2)
    X1 = X1[:,None,None,:]

    X2 = T.nnet.relu(lib.ops.conv1d('conv1',X1,kernel=4,stride=1,n_filters=128,depth=1))
    X3 = T.nnet.relu(lib.ops.conv1d('conv2',X2,kernel=6,stride=1,n_filters=64,depth=128))
    X4 = lib.ops.pool(X3) #(batch_size,256,1,62)

    X5 = T.nnet.relu(lib.ops.conv1d('conv3',X4,kernel=4,stride=1,n_filters=128,depth=256))
    X6 = T.nnet.relu(lib.ops.conv1d('conv4',X5,kernel=4,stride=1,n_filters=128,depth=128))

    X7 = lib.ops.pool(X6)

    learned_h0 = lib.param(
        'FrameLevel.h0',
        numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
    )

    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    gru_inp = X7[:,:,0,:].dimshuffle(0,2,1)
    gru1 = lib.ops.myGRU('FrameLevel.GRU1', DIM, DIM, gru_inp, h0=h0[:, 0])
    gru2 = lib.ops.myGRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1])
    gru3 = lib.ops.myGRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2])

    X8 = gru3.transpose(0,2,1)[:,:,None,:]
    X9 = lib.ops.upsample(X8)

    #Skip connectoin
    X10 = X9 + lib.ops.Dense(
        'SkipConnection1',
        128,
        128,
        X6[:,:,0,:].transpose(0,2,1),
        init='he',
        hidden_dim=X6.shape[3]
        ).transpose(0,2,1)[:,:,None,:]

    X11 = T.nnet.relu(lib.ops.conv1d('deconv1',X10,kernel=4,stride=1,n_filters=128,depth=128))
    X12 = T.nnet.relu(lib.ops.conv1d('deconv2',X11,kernel=4,stride=1,n_filters=128,depth=128))
    X13 = lib.ops.upsample(X12)

    #x3.shape (212,64)
    #SkipConnection 2
    X14 = X13 + lib.ops.Dense(
        'SkipConnection2',
        64,
        32,
        X3[:,:,0,:].transpose(0,2,1)[:,:968],
        hidden_dim=968
        ).transpose(0,2,1)[:,:,None,:]

    X15 = T.nnet.relu(lib.ops.conv1d('deconv3',X14,kernel=4,stride=1,n_filters=128,depth=32))
    X16 = T.nnet.relu(lib.ops.conv1d('deconv4',X15,kernel=4,stride=1,n_filters=256,depth=128))

    ##194

    output = X16[:,:,0,:].transpose(0,2,1)
    last_hidden = T.stack([gru1[:,-1],gru2[:,-1],gru3[:,-1]],axis=1)

    return (output.reshape((-1,output.shape[2])), last_hidden)

Esempio n. 59

0

Mostra file

File: ops.py Progetto: kylemcdonald/speech

def Linear(
        name, 
        input_dims, 
        output_dim, 
        inputs,
        biases=True,
        initialization='lecun',
        weightnorm=True
        ):
    """
    Compute a linear transform of one or more inputs, optionally with a bias.

    input_dims: list of ints, or int (if single input); the dimensionality of
                the input(s).
    output_dim: the dimensionality of the output.
    biases:     whether or not to include a bias term.
    inputs:     a theano variable, or list of variables (if multiple inputs);
                the inputs to which to apply the transform.
    initialization: one of `lecun`, `he`
    weightnorm: whether to use Weight Normalization (Salimans, Kingma 2016)
    """

    def uniform(stdev, size):
        """uniform distribution with the given stdev and size"""
        return numpy.random.uniform(
            low=-stdev * numpy.sqrt(3),
            high=stdev * numpy.sqrt(3),
            size=size
        ).astype(theano.config.floatX)

    if not isinstance(input_dims, list):
        input_dims = [input_dims]
        inputs = [inputs]

    terms = []

    for i, (inp, inp_dim) in enumerate(zip(inputs, input_dims)):

        if initialization == 'lecun' or (initialization == None and inp_dim != output_dim):
            weight_values = uniform(numpy.sqrt(1. / inp_dim), (inp_dim, output_dim))
        elif initialization == 'he':
            weight_values = uniform(numpy.sqrt(2. / inp_dim), (inp_dim, output_dim))
        else:
            raise Exception("Invalid initialization!")

        weight = lib.param(
            name + '.W'+str(i),
            weight_values
        )

        if weightnorm:
            norm_values = numpy.linalg.norm(weight_values, axis=0)
            norms = lib.param(
                name + '.g'+str(i),
                norm_values
            )
            normed_weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle('x', 0)
            terms.append(T.dot(inp, normed_weight))
        else:        
            terms.append(T.dot(inp, weight))

    if biases:
        terms.append(lib.param(
            name + '.b',
            numpy.zeros((output_dim,), dtype=theano.config.floatX)
        ))

    out = reduce(lambda a,b: a+b, terms)
    out.name = name + '.output'
    return out