def Conv1D(name, input_dim, output_dim, filter_size, inputs, biases=True): """ inputs.shape: (batch size, input_dim, height) output.shape: (batch size, output_dim, height) * performs valid convs """ def uniform(stdev, size): return np.random.uniform(low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size).astype(theano.config.floatX) filters = lib.param( name + '.Filters', uniform(1. / np.sqrt(input_dim * filter_size), (output_dim, input_dim, filter_size, 1))) inputs = inputs.dimshuffle(0, 1, 2, 'x') result = T.nnet.conv2d(inputs, filters, border_mode='valid') result = T.addbroadcast(result, 3) result = result.dimshuffle(0, 1, 2) if biases: biases_ = lib.param(name + '.Biases', np.zeros(output_dim, dtype=theano.config.floatX)) result = result + biases_[None, :, None] return result
def Conv1D(name, input_dim, output_dim, filter_size, inputs, biases=True): """ inputs.shape: (batch size, input_dim, height) output.shape: (batch size, output_dim, height) * performs valid convs """ def uniform(stdev, size): return np.random.uniform( low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size ).astype(theano.config.floatX) filters = lib.param( name+'.Filters', uniform( 1./np.sqrt(input_dim * filter_size), (output_dim, input_dim, filter_size, 1) ) ) inputs = inputs.dimshuffle(0, 1, 2, 'x') result = T.nnet.conv2d(inputs, filters, border_mode='valid') result = T.addbroadcast(result, 3) result = result.dimshuffle(0, 1, 2) if biases: biases_ = lib.param( name+'.Biases', np.zeros(output_dim, dtype=theano.config.floatX) ) result = result + biases_[None, :, None] return result
def myGRU(name, input_dim, hidden_dim, inputs, h0=None): #inputs.shape = (batch_size,N_FRAMES,FRAME_SIZE) inputs = inputs.transpose(1, 0, 2) weight_values = lasagne.init.GlorotUniform().sample( (input_dim + hidden_dim, 2 * hidden_dim)) W1 = lib.param(name + '.Gates.W', weight_values) b1 = lib.param(name + '.Gates.b', np.ones(2 * hidden_dim).astype(theano.config.floatX)) weight_values = lasagne.init.GlorotUniform().sample( (input_dim + hidden_dim, hidden_dim)) W2 = lib.param(name + '.Candidate.W', weight_values) b2 = lib.param(name + '.Candidate.b', np.zeros(hidden_dim).astype(theano.config.floatX)) def step(x_t, h_tm1): return recurrent_fn(x_t, h_tm1, name, input_dim, hidden_dim, W1, b1, W2, b2) outputs, _ = theano.scan( step, sequences=[inputs], outputs_info=[h0], ) out = outputs.dimshuffle(1, 0, 2) out.name = name + '.output' return out
def conv1d(name, input, kernel, stride, n_filters, depth, bias=False, batchnorm=False, pad='valid', filter_dilation=(1, 1), run_mode=0): W = lib.param( name + '.W', lasagne.init.HeNormal().sample( (n_filters, depth, kernel, 1)).astype('float32')) out = T.nnet.conv2d(input, W, subsample=(stride, 1), border_mode=pad, filter_dilation=filter_dilation) if bias: b = lib.param(name + '.b', np.zeros(n_filters).astype('float32')) out += b[None, :, None, None] if batchnorm: out = BatchNorm(name, out, n_filters, mode=1, run_mode=run_mode) return out
def Deconv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True): """ inputs: tensor of shape (batch size, num channels, height, width) returns: tensor of shape (batch size, num channels, 2*height, 2*width) """ def uniform(stdev, size): return np.random.uniform(low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size).astype(theano.config.floatX) filters_stdev = np.sqrt(1. / (input_dim * filter_size**2)) if he_init: filters_stdev *= np.sqrt(2.) filters = lib.param( name + '.Filters', uniform(filters_stdev, (input_dim, output_dim, filter_size, filter_size))) biases = lib.param(name + '.Biases', np.zeros(output_dim, dtype=theano.config.floatX)) pad = (filter_size - 1) / 2 result = _deconv2d( inputs, filters, subsample=(2, 2), border_mode=(pad, pad), ) result = result + biases[None, :, None, None] return result
def conv1d(name, input, input_dim, output_dim, filter_size, init='glorot', non_linearity='relu', bias=True): """ :author: Kundan Kumar (http://github.com/kundan2510) """ import lasagne inp = input.dimshuffle(0, 2, 1, 'x') if init == 'glorot': initializer = lasagne.init.GlorotUniform() elif init == 'he': initializer = lasagne.init.HeUniform() if non_linearity == 'gated': num_filters = 2 * output_dim else: num_filters = output_dim W_shape = (num_filters, input_dim, filter_size, 1) if bias: bias_shape = (num_filters, ) W = lib.param(name + ".W", initializer.sample(W_shape)) if bias: b = lib.param(name + ".b", lasagne.init.Constant(0.).sample(bias_shape)) conv_out = T.nnet.conv2d(inp, W, filter_flip=False, border_mode='valid') if bias: conv_out = conv_out + b[None, :, None, None] if non_linearity == 'gated': activation = gated_non_linerity elif non_linearity == 'relu': activation = T.nnet.relu elif non_linearity == 'elu': activation = lambda x: T.switch(x >= 0., x, T.exp(x) - floatX(1.)) elif non_linearity == 'identity': activation = lambda x: x else: raise NotImplementedError( "{} non-linearity not implemented!".format(non_linearity)) output = conv_out output = output.reshape( (output.shape[0], output.shape[1], output.shape[2])) output = output.dimshuffle(0, 2, 1) return output
def Deconv2D( name, input_dim, output_dim, filter_size, inputs, he_init=True, weightnorm=None, ): """ inputs: tensor of shape (batch size, num channels, height, width) returns: tensor of shape (batch size, num channels, 2*height, 2*width) """ def uniform(stdev, size): return np.random.uniform( low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size ).astype(theano.config.floatX) filters_stdev = np.sqrt(1./(input_dim * filter_size**2)) filters_stdev *= 2. # Because of the stride if he_init: filters_stdev *= np.sqrt(2.) filter_values = uniform( filters_stdev, (input_dim, output_dim, filter_size, filter_size) ) filters = lib.param( name+'.Filters', filter_values ) if weightnorm==None: weightnorm = _default_weightnorm if weightnorm: norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,2,3))) norms = lib.param( name + '.g', norm_values ) filters = filters * (norms / T.sqrt(T.sum(T.sqr(filters), axis=(0,2,3)))).dimshuffle('x',0,'x','x') biases = lib.param( name+'.Biases', np.zeros(output_dim, dtype=theano.config.floatX) ) pad = (filter_size-1)/2 result = _deconv2d( inputs, filters, subsample=(2,2), border_mode=(pad,pad), ) result = result + biases[None, :, None, None] # result = lib.debug.print_stats(name, result) return result
def Conv2D(name, input_dim, output_dim, filter_size, inputs, mask_type=None, he_init=False): """ inputs.shape: (batch size, height, width, input_dim) mask_type: None, 'a', 'b' output.shape: (batch size, height, width, output_dim) """ def uniform(stdev, size): """uniform distribution with the given stdev and size""" return numpy.random.uniform(low=-stdev * numpy.sqrt(3), high=stdev * numpy.sqrt(3), size=size).astype(theano.config.floatX) filters_init = uniform( 1. / numpy.sqrt(input_dim * filter_size * filter_size), # output dim, input dim, height, width (output_dim, input_dim, filter_size, filter_size)) if he_init: filters_init *= lib.floatX(numpy.sqrt(2.)) if mask_type is not None: filters_init *= lib.floatX(numpy.sqrt(2.)) filters = lib.param(name + '.Filters', filters_init) if mask_type is not None: mask = numpy.ones((output_dim, input_dim, filter_size, filter_size), dtype=theano.config.floatX) center = filter_size // 2 for i in xrange(filter_size): for j in xrange(filter_size): if (j > center) or (j == center and i > center): mask[:, :, j, i] = 0. for i in xrange(N_CHANNELS): for j in xrange(N_CHANNELS): if (mask_type == 'a' and i >= j) or (mask_type == 'b' and i > j): mask[j::N_CHANNELS, i::N_CHANNELS, center, center] = 0. filters = filters * mask # conv2d takes inputs as (batch size, input channels, height, width) inputs = inputs.dimshuffle(0, 3, 1, 2) result = T.nnet.conv2d(inputs, filters, border_mode='half', filter_flip=False) biases = lib.param(name + '.Biases', numpy.zeros(output_dim, dtype=theano.config.floatX)) result = result + biases[None, :, None, None] return result.dimshuffle(0, 2, 3, 1)
def Batchnorm(name, input_dim, inputs, stepwise=False, axes=None, wrt=None, i_gamma=None, i_beta=None): """ From Ishaan's repo """ if wrt is None: wrt = inputs if axes is not None: means = wrt.mean(axis=axes, keepdims=True) variances = wrt.var(axis=axes, keepdims=True) # elif stepwise: # means = wrt.mean(axis=1, keepdims=True) # variances = wrt.var(axis=1, keepdims=True) else: means = wrt.reshape((-1, input_dim)).mean(axis=0) variances = wrt.reshape((-1, input_dim)).var(axis=0) if i_gamma is None: i_gamma = lib.floatX(0.1) * numpy.ones(input_dim, dtype=theano.config.floatX) if i_beta is None: i_beta = numpy.zeros(input_dim, dtype=theano.config.floatX) gamma = lib.param(name + '.gamma', i_gamma) beta = lib.param(name + '.beta', i_beta) stdevs = T.sqrt(variances + lib.floatX(1e-6)) stdevs.name = name + '.stdevs' means.name = name + '.means' # return (((inputs - means) / stdevs) * gamma) + beta if axes is not None: dimshuffle_pattern = [ 'x' if i in axes else 0 for i in xrange(inputs.ndim) ] return T.nnet.bn.batch_normalization( inputs, gamma.dimshuffle(*dimshuffle_pattern), beta.dimshuffle(*dimshuffle_pattern), means, stdevs, mode='low_mem') else: return T.nnet.bn.batch_normalization(inputs, gamma.dimshuffle('x', 0), beta.dimshuffle('x', 0), means.dimshuffle('x', 0), stdevs.dimshuffle('x', 0), mode='low_mem')
def DilatedConv1D(name, input_dim, output_dim, filter_size, inputs, dilation, mask_type=None, apply_biases=True): """ inputs.shape: (batch size, length, input_dim) mask_type: None, 'a', 'b' output.shape: (batch size, length, output_dim) """ def uniform(stdev, size): """uniform distribution with the given stdev and size""" return numpy.random.uniform(low=-stdev * numpy.sqrt(3), high=stdev * numpy.sqrt(3), size=size).astype(theano.config.floatX) filters_init = uniform( 1. / numpy.sqrt(input_dim * filter_size), # output dim, input dim, height, width (output_dim, input_dim, filter_size, 1)) if mask_type is not None: filters_init *= lib.floatX(numpy.sqrt(2.)) filters = lib.param(name + '.Filters', filters_init) if mask_type is not None: mask = numpy.ones((output_dim, input_dim, filter_size, 1), dtype=theano.config.floatX) center = filter_size // 2 for i in xrange(filter_size): if (i > center): mask[:, :, i, :] = 0. # if (mask_type=='a' and i == center): # mask[:, :, center] = 0. filters = filters * mask inputs = inputs.reshape( (inputs.shape[0], inputs.shape[1], 1, inputs.shape[2])) # conv2d takes inputs as (batch size, input channels, height[?], width[?]) inputs = inputs.dimshuffle(0, 3, 1, 2) result = T.nnet.conv2d(inputs, filters, border_mode='half', filter_flip=False, filter_dilation=(dilation, 1)) if apply_biases: biases = lib.param(name + '.Biases', numpy.zeros(output_dim, dtype=theano.config.floatX)) result = result + biases[None, :, None, None] result = result.dimshuffle(0, 2, 3, 1) return result.reshape((result.shape[0], result.shape[1], result.shape[3]))
def DiagonalLSTM(name, input_dim, inputs): """ inputs.shape: (batch size, height, width, input_dim) outputs.shape: (batch size, height, width, DIM) """ inputs = Skew(inputs) input_to_state = Conv2D(name + '.InputToState', input_dim, 4 * DIM, 1, inputs, mask_type='b') batch_size = inputs.shape[0] c0_unbatched = lib.param( name + '.c0', numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX)) c0 = T.alloc(c0_unbatched, batch_size, HEIGHT, DIM) h0_unbatched = lib.param( name + '.h0', numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX)) h0 = T.alloc(h0_unbatched, batch_size, HEIGHT, DIM) def step_fn(current_input_to_state, prev_c, prev_h): # all args have shape (batch size, height, DIM) # TODO consider learning this padding prev_h = T.concatenate( [T.zeros((batch_size, 1, DIM), theano.config.floatX), prev_h], axis=1) state_to_state = Conv1D(name + '.StateToState', DIM, 4 * DIM, 2, prev_h, apply_biases=False) gates = current_input_to_state + state_to_state o_f_i = T.nnet.sigmoid(gates[:, :, :3 * DIM]) o = o_f_i[:, :, 0 * DIM:1 * DIM] f = o_f_i[:, :, 1 * DIM:2 * DIM] i = o_f_i[:, :, 2 * DIM:3 * DIM] g = T.tanh(gates[:, :, 3 * DIM:4 * DIM]) new_c = (f * prev_c) + (i * g) new_h = o * T.tanh(new_c) return (new_c, new_h) outputs, _ = theano.scan(step_fn, sequences=input_to_state.dimshuffle(2, 0, 1, 3), outputs_info=[c0, h0]) all_cs = outputs[0].dimshuffle(1, 2, 0, 3) all_hs = outputs[1].dimshuffle(1, 2, 0, 3) return Unskew(all_hs)
def conv1d(name, input, kernel, stride, n_filters, depth, bias=False): W = lib.param( name + '.W', glorot_uniform((n_filters, depth, 1, kernel)).astype('float32')) if bias: b = lib.param(name + '.b', np.zeros(n_filters).astype('float32')) return T.nnet.conv2d(input, W, filter_flip=False, subsample=(1, stride)) + b[None, :, None, None]
def DiagonalLSTM(name, input_dim, inputs): """ inputs.shape: (batch size, height, width, input_dim) outputs.shape: (batch size, height, width, DIM) """ inputs = Skew(inputs) input_to_state = Conv2D(name+'.InputToState', input_dim, 4*DIM, 1, inputs, mask_type='b') batch_size = inputs.shape[0] c0_unbatched = lib.param( name + '.c0', numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX) ) c0 = T.alloc(c0_unbatched, batch_size, HEIGHT, DIM) h0_unbatched = lib.param( name + '.h0', numpy.zeros((HEIGHT, DIM), dtype=theano.config.floatX) ) h0 = T.alloc(h0_unbatched, batch_size, HEIGHT, DIM) def step_fn(current_input_to_state, prev_c, prev_h): # all args have shape (batch size, height, DIM) # TODO consider learning this padding prev_h = T.concatenate([ T.zeros((batch_size, 1, DIM), theano.config.floatX), prev_h ], axis=1) state_to_state = Conv1D(name+'.StateToState', DIM, 4*DIM, 2, prev_h, apply_biases=False) gates = current_input_to_state + state_to_state o_f_i = T.nnet.sigmoid(gates[:,:,:3*DIM]) o = o_f_i[:,:,0*DIM:1*DIM] f = o_f_i[:,:,1*DIM:2*DIM] i = o_f_i[:,:,2*DIM:3*DIM] g = T.tanh(gates[:,:,3*DIM:4*DIM]) new_c = (f * prev_c) + (i * g) new_h = o * T.tanh(new_c) return (new_c, new_h) outputs, _ = theano.scan( step_fn, sequences=input_to_state.dimshuffle(2,0,1,3), outputs_info=[c0, h0] ) all_cs = outputs[0].dimshuffle(1,2,0,3) all_hs = outputs[1].dimshuffle(1,2,0,3) return Unskew(all_hs)
def DilatedConv2D(name, input_shape, output_dim, filter_size, inputs, he_init=True, dilation=(1, 1)): input_dim = input_shape[1] def uniform(stdev, size): return np.random.uniform(low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size).astype(theano.config.floatX) fan_in = input_dim * filter_size**2 fan_out = output_dim * filter_size**2 if he_init: filters_stdev = np.sqrt(2. / fan_in) else: # Normalized init (Glorot & Bengio) filters_stdev = np.sqrt(2. / (fan_in + fan_out)) W = lib.param( name + '.W', uniform(filters_stdev, (input_dim, output_dim, filter_size, filter_size))) b = lib.param(name + '.b', np.zeros(output_dim, dtype=theano.config.floatX)) # Manually apply 'same' padding beforehand pad = (filter_size - 1) / 2 input_shape = (input_shape[0], input_shape[1], input_shape[2] + pad, input_shape[3] + pad) inputs = lasagne.theano_extensions.padding.pad(inputs, width=pad, batch_ndim=2) layer = lasagne.layers.DilatedConv2DLayer( input_shape, output_dim, filter_size, dilation=dilation, pad=0, untie_biases=False, W=W, b=b, nonlinearity=None, flip_filters=False, ) return layer(inputs)
def Conv1D(name, input_dim, output_dim, filter_size, inputs, he_init=True, biases=True, stride=1, border_mode='half'): """ inputs.shape: (batch size, input_dim, height) output.shape: (batch size, output_dim, height) """ def uniform(stdev, size): return np.random.uniform( low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size ).astype(theano.config.floatX) fan_in = input_dim * filter_size fan_out = output_dim * filter_size fan_out /= stride if he_init: filters_stdev = np.sqrt(4./(fan_in+fan_out)) else: # Normalized init (Glorot & Bengio) filters_stdev = np.sqrt(2./(fan_in+fan_out)) filters = lib.param( name+'.Filters', uniform( filters_stdev, (output_dim, input_dim, filter_size, 1) ) ) inputs = inputs.dimshuffle(0, 1, 2, 'x') result = T.nnet.conv2d( inputs, filters, border_mode=border_mode, subsample=(stride, 1) ) result = T.addbroadcast(result, 3) result = result.dimshuffle(0, 1, 2) if biases: biases_ = lib.param( name+'.Biases', np.zeros(output_dim, dtype=theano.config.floatX) ) result = result + biases_[None, :, None] # result = lib.ops.batchnorm.Batchnorm( # name+'.BN', # input_dim=output_dim, # inputs=result, # axes=[0,2] # ) # result = lib.debug.print_stats(name, result) return result
def conv2d( name, input, kernel, stride, depth, n_filters, init=None, bias=True, batchnorm=False, train_bn=True, weightnorm=True, pad='valid', filter_dilation=(1,1), mode='train', **kwargs ): if isinstance(kernel, int): kernel_h = kernel_w = kernel else: kernel_h, kernel_w = kernel filter_values = initializer(init,(n_filters,depth,kernel_h,kernel_w),**kwargs) #weight_values = lasagne.init.HeNormal().sample((n_filters,depth,kernel_h,kernel_w)).astype('float32') W = lib.param( name+'.W', filter_values ) if weightnorm: norm_values = np.linalg.norm(filter_values.reshape((filter_values.shape[0], -1)), axis=1) norms = lib.param( name + '.g', norm_values ) W = W * (norms / W.reshape((W.shape[0],-1)).norm(2, axis=1)).dimshuffle(0,'x','x','x') out = T.nnet.conv2d(input,W,subsample=(stride,stride),border_mode=pad,filter_dilation=filter_dilation) if bias: b = lib.param( name + '.b', np.zeros(n_filters).astype('float32') ) out += b[None,:,None,None] if batchnorm: out = Batchnorm(name,out,n_filters,axes='spatial',mode=mode,trainable_weights=train_bn) return out
def Batchnorm( name, inputs, input_dim, axes=None, mode='train', trainable_weights=True ): #mult = lib.floatX(0.1) if trainable_weights else lib.floatX(1) gamma = lib.param( name+'.gamma', initializer('Normal',(input_dim,),mean=1.0,std=0.02), is_param=trainable_weights ) beta = lib.param( name+'.beta', np.zeros(input_dim).astype(theano.config.floatX), is_param=trainable_weights ) running_mean = lib.param( name+'.running_mean', np.zeros(input_dim).astype(theano.config.floatX), is_param=False ) running_var = lib.param( name+'.running_variance', np.zeros(input_dim).astype(theano.config.floatX), is_param=False ) if mode=='train': out,_,_,new_mean,new_var = T.nnet.bn.batch_normalization_train( inputs, axes=axes, gamma=gamma, beta=beta, running_mean=running_mean, running_var=running_var ) lib._updates[running_mean] = new_mean lib._updates[running_var] = new_var return out elif mode=='test': return T.nnet.bn.batch_normalization_test( inputs, axes=axes, gamma=gamma, beta=beta, mean=running_mean, var=running_var )
def conv1d(name,input,kernel,stride,n_filters,depth,bias=False): W = lib.param( name+'.W', glorot_uniform((n_filters,depth,1,kernel)).astype('float32') ) if bias: b = lib.param( name + '.b', np.zeros(n_filters).astype('float32') ) return T.nnet.conv2d(input,W,filter_flip=False,subsample=(1,stride)) + b[None,:,None,None]
def Deconv2D( name, input_dim, output_dim, filter_size, inputs, he_init=True, weightnorm=None, ): """ inputs: tensor of shape (batch size, num channels, height, width) returns: tensor of shape (batch size, num channels, 2*height, 2*width) """ def uniform(stdev, size): return np.random.uniform(low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size).astype(theano.config.floatX) filters_stdev = np.sqrt(1. / (input_dim * filter_size**2)) filters_stdev *= 2. # Because of the stride if he_init: filters_stdev *= np.sqrt(2.) filter_values = uniform(filters_stdev, (input_dim, output_dim, filter_size, filter_size)) filters = lib.param(name + '.Filters', filter_values) if weightnorm == None: weightnorm = _default_weightnorm if weightnorm: norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0, 2, 3))) norms = lib.param(name + '.g', norm_values) filters = filters * ( norms / T.sqrt(T.sum(T.sqr(filters), axis=(0, 2, 3)))).dimshuffle( 'x', 0, 'x', 'x') biases = lib.param(name + '.Biases', np.zeros(output_dim, dtype=theano.config.floatX)) pad = (filter_size - 1) / 2 result = _deconv2d( inputs, filters, subsample=(2, 2), border_mode=(pad, pad), ) result = result + biases[None, :, None, None] # result = lib.debug.print_stats(name, result) return result
def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None): if not isinstance(inputs, list): inputs = [inputs] if not isinstance(hidden_dims, list): hidden_dims = [hidden_dims] if h0s is None: h0s = [None]*len(hidden_dims) for i in xrange(len(hidden_dims)): if h0s[i] is None: h0_unbatched = lib.param( name + '.h0_' + str(i), numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX) ) num_batches = inputs[0].shape[1] h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i]) h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim) outputs, _ = theano.scan( step_fn, sequences=inputs, outputs_info=h0s, non_sequences=non_sequences ) return outputs
def create_wavenet_block(inp, num_dilation_layer, input_dim, output_dim, name=None): assert name is not None layer_out = inp skip_contrib = [] skip_weights = lib.param(name + ".parametrized_weights", lib.floatX(numpy.ones((num_dilation_layer, )))) for i in range(num_dilation_layer): layer_out, skip_c = lib.ops.dil_conv_1D( layer_out, output_dim, input_dim if i == 0 else output_dim, 2, dilation=2**i, non_linearity='gated', name=name + ".dilation_{}".format(i + 1)) skip_c = skip_c * skip_weights[i] skip_contrib.append(skip_c) skip_out = skip_contrib[-1] j = 0 for i in range(num_dilation_layer - 1): j += 2**(num_dilation_layer - i - 1) skip_out = skip_out + skip_contrib[num_dilation_layer - 2 - i][:, j:] return layer_out, skip_out
def create_wavenet_block(inp, num_dilation_layer, input_dim, output_dim, name =None): assert name is not None layer_out = inp skip_contrib = [] skip_weights = lib.param(name+".parametrized_weights", lib.floatX(numpy.ones((num_dilation_layer,)))) for i in range(num_dilation_layer): layer_out, skip_c = lib.ops.dil_conv_1D( layer_out, output_dim, input_dim if i == 0 else output_dim, 2, dilation = 2**i, non_linearity = 'gated', name = name+".dilation_{}".format(i+1) ) skip_c = skip_c*skip_weights[i] skip_contrib.append(skip_c) skip_out = skip_contrib[-1] j = 0 for i in range(num_dilation_layer-1): j += 2**(num_dilation_layer-i-1) skip_out = skip_out + skip_contrib[num_dilation_layer-2 - i][:,j:] return layer_out, skip_out
def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None): if not isinstance(inputs, list): inputs = [inputs] if not isinstance(hidden_dims, list): hidden_dims = [hidden_dims] if h0s is None: h0s = [None]*len(hidden_dims) for i in xrange(len(hidden_dims)): if h0s[i] is None: h0_unbatched = lib.param( name + '.h0_' + str(i), numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX) ) num_batches = inputs[0].shape[1] h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i]) h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim) outputs, _ = theano.scan( step_fn, sequences=inputs, outputs_info=h0s, non_sequences=non_sequences ) return outputs
def Recurrence(processed_frames, h0, reset): """ processed_frames.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames, DIM) """ # print "warning no recurrence" # return T.zeros_like(processed_frames), h0 learned_h0 = lib.param( 'Recurrence.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) gru0 = lib.ops.LowMemGRU('Recurrence.GRU0', DIM, DIM, processed_frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('Recurrence.GRU' + str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1) return (grus[-1], last_hidden)
def encoder(input_sequences, h0, reset): """ input_sequences.shape: (batch size, N_FRAMES * FRAME_SIZE) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, N_FRAMES * FRAME_SIZE, DIM) """ batch_size = input_sequences.shape[0] n_frames = (input_sequences.shape[1]-3)/FRAME_SIZE # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) emb = lib.ops.Embedding( 'Embedding', Q_LEVELS, Q_LEVELS, input_sequences, ).transpose(0,2,1) #X1 = ((input_sequences.astype(theano.config.floatX)/lib.floatX(Q_LEVELS/2)) - lib.floatX(1))*lib.floatX(2) X1 = emb[:,:,None,:] #(128,256,1,259) X2 = T.nnet.relu(lib.ops.conv1d('conv1',X1,kernel=4,stride=1,n_filters=512,depth=256,bias=True)) #(128,512,1,256) #X3 = T.nnet.relu(lib.ops.conv1d('conv2',X2,kernel=1,stride=1,n_filters=512,depth=512,bias=True)) #(128,512,1,256) X4 = lib.ops.pool(X2) #(128,2048,1,64) learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) gru_inp = T.concatenate((X4[:,:,0,:].dimshuffle(0,2,1),emb.transpose(0,2,1)[:,:256,:].reshape((batch_size,n_frames,FRAME_SIZE*Q_LEVELS))),axis=2) gru1 = lib.ops.GRU('FrameLevel.GRU1', 3072, DIM, gru_inp, h0=h0[:, 0]) gru2 = lib.ops.GRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.GRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) ## (128,64,512) X9 = lib.ops.Dense( 'Projection', 512, 2048, gru3, hidden_dim=gru3.shape[1] ).reshape((batch_size,4*gru3.shape[1],DIM)).transpose(0,2,1)[:,:,None,:] #(128,64,2048) --> (128,256,512) --> (128,512,256) X10 = T.nnet.relu(X9+X2) X11 = T.nnet.relu(lib.ops.conv1d('deconv1',X10,kernel=1,stride=1,n_filters=512,depth=512,bias=True)) #(128,512,1,256) X12 = T.nnet.relu(lib.ops.conv1d('deconv2',X11,kernel=1,stride=1,n_filters=512,depth=512,bias=True)) #(128,512,1,256) X13 = lib.ops.conv1d('deconv3',X12,kernel=1,stride=1,n_filters=256,depth=512,bias=True) #(128,256,1,256) last_hidden = T.stack([gru1[:,-1],gru2[:,-1],gru3[:,-1]],axis=1) output = X13[:,:,0,:].transpose(0,2,1) return (output.reshape((-1,output.shape[2])),last_hidden)
def Embedding(name, n_symbols, output_dim, indices): vectors = lib.param( name, numpy.random.randn(n_symbols, output_dim).astype(theano.config.floatX)) output_shape = tuple(list(indices.shape) + [output_dim]) return vectors[indices.flatten()].reshape(output_shape)
def frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, N_FRAMES * FRAME_SIZE) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, N_FRAMES * FRAME_SIZE, DIM) """ batch_size = input_sequences.shape[0] n_frames = input_sequences.shape[1] / FRAME_SIZE emb = lib.ops.Embedding('SampleLevel.Embedding', Q_LEVELS, Q_LEVELS, input_sequences) learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) # frames = input_sequences.reshape(( # input_sequences.shape[0], # input_sequences.shape[1] / FRAME_SIZE, # FRAME_SIZE # )) frames = emb.reshape( (input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE * Q_LEVELS)) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) # frames *= lib.floatX(2) gru1 = lib.ops.GRU('FrameLevel.GRU1', FRAME_SIZE * Q_LEVELS, DIM, frames, h0=h0[:, 0]) gru2 = lib.ops.GRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.GRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) #gru1,gru2,gru3 = lib.ops.myGRU('FrameLevel.GRU', FRAME_SIZE, DIM, frames, h0=h0) # gru3.shape = (batch_size,N_FRAMES,DIM) output = lib.ops.Dense('FrameLevel.Output', DIM, FRAME_SIZE * DIM, gru3.reshape( (gru3.shape[0] * gru3.shape[1], gru3.shape[2])), init='he') output = output.reshape((batch_size, n_frames * FRAME_SIZE, DIM)) last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1) return (output, last_hidden)
def Deconv2D( name, input_dim, output_dim, filter_size, inputs, he_init=True ): """ inputs: tensor of shape (batch size, num channels, height, width) returns: tensor of shape (batch size, num channels, 2*height, 2*width) """ def uniform(stdev, size): return np.random.uniform( low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size ).astype(theano.config.floatX) filters_stdev = np.sqrt(1./(input_dim * filter_size**2)) if he_init: filters_stdev *= np.sqrt(2.) filters = lib.param( name+'.Filters', uniform( filters_stdev, (input_dim, output_dim, filter_size, filter_size) ) ) biases = lib.param( name+'.Biases', np.zeros(output_dim, dtype=theano.config.floatX) ) pad = (filter_size-1)/2 result = _deconv2d( inputs, filters, subsample=(2,2), border_mode=(pad,pad), ) result = result + biases[None, :, None, None] return result
def Embedding(name, n_symbols, output_dim, inputs): vectors = lib.param( name, np.random.randn(n_symbols, output_dim).astype(theano.config.floatX)) output_shape = [inputs.shape[i] for i in xrange(inputs.ndim)] + [output_dim] return vectors[inputs.flatten()].reshape(output_shape)
def Embedding(name, n_symbols, output_dim, indices): vectors = lib.param( name, initializer('Normal', (n_symbols,output_dim), std=1/np.sqrt(output_dim)).astype(theano.config.floatX) ) output_shape = tuple(list(indices.shape) + [output_dim]) return vectors[indices.flatten()].reshape(output_shape)
def Recurrent( name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None, reset=None ): if not isinstance(inputs, list): inputs = [inputs] if not isinstance(hidden_dims, list): hidden_dims = [hidden_dims] if h0s is None: h0s = [None]*len(hidden_dims) for i in xrange(len(hidden_dims)): if h0s[i] is None: h0_unbatched = lib.param( name + '.h0_' + str(i), np.zeros((hidden_dims[i],), dtype=theano.config.floatX) ) num_batches = inputs[0].shape[1] h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i]) h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim) if reset is not None: last_hiddens = [] for i in xrange(len(h0s)): # The shape of last_hidden doesn't matter right now; we assume # it won't be used until we put something proper in it. last_hidden = theano.shared( np.zeros([1]*h0s[i].ndim, dtype=h0s[i].dtype), name=name+'.last_hidden_'+str(i) ) last_hiddens.append(last_hidden) h0s[i] = theano.ifelse.ifelse(reset, h0s[i], last_hidden) outputs, _ = theano.scan( step_fn, sequences=inputs, outputs_info=h0s, non_sequences=non_sequences ) if reset is not None: if len(last_hiddens) == 1: last_hiddens[0].default_update = outputs[-1] else: for i in xrange(len(last_hiddens)): last_hiddens[i].default_update = outputs[i][-1] return outputs
def big_frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE) h0.shape: (batch size, N_BIG_GRUS, BIG_DIM) reset.shape: () output[0].shape: (batch size, n frames, DIM) output[1].shape: same as h0.shape output[2].shape: (batch size, seq len, Q_LEVELS) """ learned_h0 = lib.param( 'BigFrameLevel.h0', numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape( (input_sequences.shape[0], input_sequences.shape[1] / BIG_FRAME_SIZE, BIG_FRAME_SIZE)) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS / 2)) - lib.floatX(1) frames *= lib.floatX(2) gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_BIG_GRUS): gru = lib.ops.LowMemGRU('BigFrameLevel.GRU' + str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear('BigFrameLevel.Output', BIG_DIM, DIM * BIG_FRAME_SIZE / FRAME_SIZE, grus[-1]) output = output.reshape( (output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1) independent_preds = lib.ops.Linear('BigFrameLevel.IndependentPreds', BIG_DIM, Q_LEVELS * BIG_FRAME_SIZE, grus[-1]) independent_preds = independent_preds.reshape( (independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS)) return (output, last_hidden, independent_preds)
def Decoder(latent_var, text_features, name=""): dec_name = "Decoder.{}".format(name) learned_h0 = lib.param( '{}.h0'.format(dec_name), numpy.zeros((N_RNN, H0_MULT * DIM), dtype=theano.config.floatX)) # Handling LEARN_H0 learned_h0.param = True learned_h0 = T.alloc(learned_h0, latent_var.shape[0], N_RNN, H0_MULT * DIM) learned_h0 = T.unbroadcast(learned_h0, 0, 1, 2) h0 = learned_h0 latent_var_repeated = T.extra_ops.repeat(latent_var[:, None, :], text_features.shape[1], axis=1) features = T.concatenate([text_features, latent_var_repeated], axis=2) RNN_INPUT_DIM = INPUT_DIM + LATENT_DIM if RNN_TYPE == 'LSTM': rnns_out, last_hidden = lib.ops.stackedLSTM('{}.LSTM'.format(dec_name), N_RNN, RNN_INPUT_DIM, DIM, features, h0=h0, weightnorm=WEIGHT_NORM, skip_conn=SKIP_CONN) else: rnns_out, last_hidden = lib.ops.stackedGRU('{}.GRU'.format(dec_name), N_RNN, RNN_INPUT_DIM, DIM, features, h0=h0, weightnorm=WEIGHT_NORM, skip_conn=SKIP_CONN, use_input_every_layer=True) output1 = T.nnet.relu(rnns_out) output2 = lib.ops.Linear('{}.Output1'.format(dec_name), DIM, DIM, output1, weightnorm=WEIGHT_NORM) output3 = T.nnet.relu(output2) output = lib.ops.Linear('{}.Output2'.format(dec_name), DIM, OUTPUT_DIM, output3, initialization='he', weightnorm=WEIGHT_NORM) return output
def frame_level_rnn(input_sequences, other_input, h0, reset): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) other_input.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape( (input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE)) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS / 2)) - lib.floatX(1) frames *= lib.floatX(2) gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM, frames) + other_input gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', DIM, DIM, gru_input, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('FrameLevel.GRU' + str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear('FrameLevel.Output', DIM, FRAME_SIZE * DIM, grus[-1], initialization='he') output = output.reshape( (output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1) return (output, last_hidden)
def big_frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE) h0.shape: (batch size, N_BIG_GRUS, BIG_DIM) reset.shape: () output[0].shape: (batch size, n frames, DIM) output[1].shape: same as h0.shape output[2].shape: (batch size, seq len, Q_LEVELS) """ learned_h0 = lib.param( 'BigFrameLevel.h0', numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / BIG_FRAME_SIZE, BIG_FRAME_SIZE )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) frames *= lib.floatX(2) gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_BIG_GRUS): gru = lib.ops.LowMemGRU('BigFrameLevel.GRU'+str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear( 'BigFrameLevel.Output', BIG_DIM, DIM * BIG_FRAME_SIZE / FRAME_SIZE, grus[-1] ) output = output.reshape((output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) independent_preds = lib.ops.Linear( 'BigFrameLevel.IndependentPreds', BIG_DIM, Q_LEVELS * BIG_FRAME_SIZE, grus[-1] ) independent_preds = independent_preds.reshape((independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS)) return (output, last_hidden, independent_preds)
def sample_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, seq len) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, seq len, DIM) """ if N_GRUS != 3: raise Exception('N_GRUS must be 3, at least for now') learned_h0 = lib.param( 'SampleLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) # Embedded inputs ################# FRAME_SIZE = Q_LEVELS frames = lib.ops.Embedding('SampleLevel.Embedding', Q_LEVELS, Q_LEVELS, input_sequences) # Real-valued inputs #################### # # 'frames' of size 1 # FRAME_SIZE = 1 # frames = input_sequences.reshape(( # input_sequences.shape[0], # input_sequences.shape[1], # 1 # )) # # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # # (a reasonable range to pass as inputs to the RNN) # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) # frames *= lib.floatX(2) gru1 = lib.ops.LowMemGRU('SampleLevel.GRU1', FRAME_SIZE, DIM, frames, h0=h0[:, 0]) gru2 = lib.ops.LowMemGRU('SampleLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.LowMemGRU('SampleLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) # We apply the softmax later output = lib.ops.Linear('Output', DIM, Q_LEVELS, gru3) last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1) return (output, last_hidden)
def Embedding(name, n_symbols, output_dim, indices): vectors = lib.param( name, numpy.random.randn( n_symbols, output_dim ).astype(theano.config.floatX) ) output_shape = tuple(list(indices.shape) + [output_dim]) return vectors[indices.flatten()].reshape(output_shape)
def myGRU(name, input_dim, hidden_dim, inputs, h0=None): #inputs.shape = (batch_size,N_FRAMES,FRAME_SIZE) inputs = inputs.transpose(1, 0, 2) weight_values = init_weights(input_dim + hidden_dim, 2 * hidden_dim) W1 = lib.param(name + '.Gates.W', weight_values) norm_values = numpy.linalg.norm(weight_values, axis=0) norms = lib.param(name + 'Gates.W.g', norm_values) n_W1 = W1 * (norms / W1.norm(2, axis=0)).dimshuffle('x', 0) b1 = lib.param(name + '.Gates.b', np.ones(2 * hidden_dim).astype(theano.config.floatX)) weight_values = init_weights(input_dim + hidden_dim, hidden_dim) W2 = lib.param(name + '.Candidate.W', weight_values) norm_values = numpy.linalg.norm(weight_values, axis=0) norms = lib.param(name + 'Candidate.W.g', norm_values) n_W2 = W2 * (norms / W2.norm(2, axis=0)).dimshuffle('x', 0) b2 = lib.param(name + '.Candidate.b', np.zeros(hidden_dim).astype(theano.config.floatX)) outputs, _ = theano.scan(recurrent_fn, sequences=[inputs], outputs_info=[h0], non_sequences=[hidden_dim, n_W1, b1, n_W2, b2]) out = outputs.dimshuffle(1, 0, 2) out.name = name + '.output' return out
def Conv1D(name, input_dim, output_dim, filter_size, inputs, apply_biases=True): """ inputs.shape: (batch size, height, input_dim) output.shape: (batch size, height, output_dim) * performs valid convs """ def uniform(stdev, size): """uniform distribution with the given stdev and size""" return numpy.random.uniform(low=-stdev * numpy.sqrt(3), high=stdev * numpy.sqrt(3), size=size).astype(theano.config.floatX) filters = lib.param( name + '.Filters', uniform( 1. / numpy.sqrt(input_dim * filter_size), # output dim, input dim, height, width (output_dim, input_dim, filter_size, 1))) # conv2d takes inputs as (batch size, input channels, height[?], width[?]) inputs = inputs.reshape( (inputs.shape[0], inputs.shape[1], 1, inputs.shape[2])) inputs = inputs.dimshuffle(0, 3, 1, 2) result = T.nnet.conv2d(inputs, filters, border_mode='valid', filter_flip=False) if apply_biases: biases = lib.param(name + '.Biases', numpy.zeros(output_dim, dtype=theano.config.floatX)) result = result + biases[None, :, None, None] result = result.dimshuffle(0, 2, 3, 1) return result.reshape((result.shape[0], result.shape[1], result.shape[3]))
def Dense(name, input_dim, output_dim, inputs, bias=True, init=None, weightnorm=True,hidden_dim=None): weight_values = init_weights(input_dim,output_dim,init) weight = lib.param( name + '.W', weight_values ) batch_size = None if inputs.ndim==3: batch_size = inputs.shape[0] inputs = inputs.reshape((-1,input_dim)) if weightnorm: norm_values = numpy.linalg.norm(weight_values, axis=0) norms = lib.param( name + '.g', norm_values ) normed_weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle('x', 0) result = T.dot(inputs, normed_weight) else: result = T.dot(inputs, weight) if bias: b = lib.param( name + '.b', numpy.zeros((output_dim,), dtype=theano.config.floatX) ) result += b result.name = name+".output" if batch_size!=None: return result.reshape((batch_size,hidden_dim,output_dim)) else: return result
def Dense(name, input_dim, output_dim, inputs, bias=True, init=None, weightnorm=True, hidden_dim=None): weight_values = init_weights(input_dim, output_dim, init) weight = lib.param(name + '.W', weight_values) batch_size = None if inputs.ndim == 3: batch_size = inputs.shape[0] inputs = inputs.reshape((-1, input_dim)) if weightnorm: norm_values = numpy.linalg.norm(weight_values, axis=0) norms = lib.param(name + '.g', norm_values) normed_weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle( 'x', 0) result = T.dot(inputs, normed_weight) else: result = T.dot(inputs, weight) if bias: b = lib.param(name + '.b', numpy.zeros((output_dim, ), dtype=theano.config.floatX)) result += b result.name = name + ".output" if batch_size != None: return result.reshape((batch_size, hidden_dim, output_dim)) else: return result
def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None, reset=None): if not isinstance(inputs, list): inputs = [inputs] if not isinstance(hidden_dims, list): hidden_dims = [hidden_dims] if h0s is None: h0s = [None] * len(hidden_dims) for i in xrange(len(hidden_dims)): if h0s[i] is None: h0_unbatched = lib.param( name + '.h0_' + str(i), np.zeros((hidden_dims[i], ), dtype=theano.config.floatX)) num_batches = inputs[0].shape[1] h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i]) h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim) if reset is not None: last_hiddens = [] for i in xrange(len(h0s)): # The shape of last_hidden doesn't matter right now; we assume # it won't be used until we put something proper in it. last_hidden = theano.shared(np.zeros([1] * h0s[i].ndim, dtype=h0s[i].dtype), name=name + '.last_hidden_' + str(i)) last_hiddens.append(last_hidden) h0s[i] = theano.ifelse.ifelse(reset, h0s[i], last_hidden) outputs, _ = theano.scan(step_fn, sequences=inputs, outputs_info=h0s, non_sequences=non_sequences) if reset is not None: if len(last_hiddens) == 1: last_hiddens[0].default_update = outputs[-1] else: for i in xrange(len(last_hiddens)): last_hiddens[i].default_update = outputs[i][-1] return outputs
def Conv1D(name, input_dim, output_dim, filter_size, inputs, apply_biases=True): """ inputs.shape: (batch size, height, input_dim) output.shape: (batch size, height, output_dim) * performs valid convs """ def uniform(stdev, size): """uniform distribution with the given stdev and size""" return numpy.random.uniform( low=-stdev * numpy.sqrt(3), high=stdev * numpy.sqrt(3), size=size ).astype(theano.config.floatX) filters = lib.param( name+'.Filters', uniform( 1./numpy.sqrt(input_dim * filter_size), # output dim, input dim, height, width (output_dim, input_dim, filter_size, 1) ) ) # conv2d takes inputs as (batch size, input channels, height[?], width[?]) inputs = inputs.reshape((inputs.shape[0], inputs.shape[1], 1, inputs.shape[2])) inputs = inputs.dimshuffle(0, 3, 1, 2) result = T.nnet.conv2d(inputs, filters, border_mode='valid', filter_flip=False) if apply_biases: biases = lib.param( name+'.Biases', numpy.zeros(output_dim, dtype=theano.config.floatX) ) result = result + biases[None, :, None, None] result = result.dimshuffle(0, 2, 3, 1) return result.reshape((result.shape[0], result.shape[1], result.shape[3]))
def sample_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, seq len) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, seq len, DIM) """ if N_GRUS != 3: raise Exception('N_GRUS must be 3, at least for now') learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) # Embedded inputs ################# FRAME_SIZE = Q_LEVELS frames = lib.ops.Embedding('SampleLevel.Embedding', Q_LEVELS, Q_LEVELS, input_sequences) # Real-valued inputs #################### # # 'frames' of size 1 # FRAME_SIZE = 1 # frames = input_sequences.reshape(( # input_sequences.shape[0], # input_sequences.shape[1], # 1 # )) # # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # # (a reasonable range to pass as inputs to the RNN) # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) # frames *= lib.floatX(2) gru1 = lib.ops.LowMemGRU('SampleLevel.GRU1', FRAME_SIZE, DIM, frames, h0=h0[:, 0]) gru2 = lib.ops.LowMemGRU('SampleLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.LowMemGRU('SampleLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) # We apply the softmax later output = lib.ops.Linear('Output', DIM, Q_LEVELS, gru3) last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1) return (output, last_hidden)
def Embedding(name, n_symbols, output_dim, inputs): vectors = lib.param( name, np.random.randn( n_symbols, output_dim ).astype(theano.config.floatX) ) output_shape = [ inputs.shape[i] for i in xrange(inputs.ndim) ] + [output_dim] return vectors[inputs.flatten()].reshape(output_shape)
def frame_level_rnn(input_sequences, other_input, h0, reset): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) other_input.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) frames *= lib.floatX(2) gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM, frames) + other_input gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', DIM, DIM, gru_input, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('FrameLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear( 'FrameLevel.Output', DIM, FRAME_SIZE * DIM, grus[-1], initialization='he' ) output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) return (output, last_hidden)
def frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ if N_GRUS != 3: raise Exception('N_GRUS must be 3, at least for now') learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) frames *= lib.floatX(2) gru1 = lib.ops.LowMemGRU('FrameLevel.GRU1', FRAME_SIZE, DIM, frames, h0=h0[:, 0]) gru2 = lib.ops.LowMemGRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.LowMemGRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) output = lib.ops.Linear( 'FrameLevel.Output', DIM, FRAME_SIZE * DIM, gru3, initialization='he' ) output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1) return (output, last_hidden)
def myGRU(name, input_dim, hidden_dim, inputs, h0=None): #inputs.shape = (batch_size,N_FRAMES,FRAME_SIZE) inputs = inputs.transpose(1,0,2) weight_values = init_weights(input_dim+hidden_dim,2*hidden_dim) W1 = lib.param( name+'.Gates.W', weight_values ) norm_values = numpy.linalg.norm(weight_values, axis=0) norms = lib.param( name + 'Gates.W.g', norm_values ) n_W1 = W1 * (norms / W1.norm(2, axis=0)).dimshuffle('x', 0) b1 = lib.param( name+'.Gates.b', np.ones(2*hidden_dim).astype(theano.config.floatX) ) weight_values = init_weights(input_dim+hidden_dim,hidden_dim) W2 = lib.param( name+'.Candidate.W', weight_values ) norm_values = numpy.linalg.norm(weight_values, axis=0) norms = lib.param( name + 'Candidate.W.g', norm_values ) n_W2 = W2 * (norms / W2.norm(2, axis=0)).dimshuffle('x', 0) b2 = lib.param( name+'.Candidate.b', np.zeros(hidden_dim).astype(theano.config.floatX) ) outputs, _ = theano.scan( recurrent_fn, sequences=[inputs], outputs_info=[h0], non_sequences=[hidden_dim,n_W1,b1,n_W2,b2] ) out = outputs.dimshuffle(1,0,2) out.name = name+'.output' return out
def Conv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True): """ inputs: tensor of shape (batch size, num channels, height, width) mask_type: one of None, 'a', 'b' returns: tensor of shape (batch size, num channels, height, width) """ if mask_type is not None: mask_type, mask_n_channels = mask_type def uniform(stdev, size): return np.random.uniform( low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size ).astype(theano.config.floatX) fan_in = input_dim * filter_size**2 fan_out = output_dim * filter_size**2 # TOOD: shouldn't fan_out be divided by stride if mask_type is not None: # only approximately correct fan_in /= 2. fan_out /= 2. if he_init: filters_stdev = np.sqrt(4./(fan_in+fan_out)) else: # Normalized init (Glorot & Bengio) filters_stdev = np.sqrt(2./(fan_in+fan_out)) filter_values = uniform( filters_stdev, (output_dim, input_dim, filter_size, filter_size) ) filters = lib.param(name+'.Filters', filter_values) if weightnorm==None: weightnorm = _default_weightnorm if weightnorm: norm_values = np.linalg.norm(filter_values.reshape((filter_values.shape[0], -1)), axis=1) norms = lib.param( name + '.g', norm_values ) filters = filters * (norms / filters.reshape((filters.shape[0],-1)).norm(2, axis=1)).dimshuffle(0,'x','x','x') if mask_type is not None: mask = np.ones( (output_dim, input_dim, filter_size, filter_size), dtype=theano.config.floatX ) center = filter_size // 2 # Mask out future locations # filter shape is (out_channels, in_channels, height, width) mask[:, :, center+1:, :] = 0. mask[:, :, center, center+1:] = 0. # Mask out future channels for i in xrange(mask_n_channels): for j in xrange(mask_n_channels): if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j): mask[ j::mask_n_channels, i::mask_n_channels, center, center ] = 0. filters = filters * mask if biases: _biases = lib.param( name+'.Biases', np.zeros(output_dim, dtype=theano.config.floatX) ) result = T.nnet.conv2d( inputs, filters, border_mode='half', filter_flip=False, subsample=(stride,stride) ) if biases: result = result + _biases[None, :, None, None] # result = lib.debug.print_stats(name, result) return result
def Batchnorm( name, input_dim, inputs, stepwise=False, axes=None, wrt=None, i_gamma=None, i_beta=None): """ From Ishaan's repo """ if wrt is None: wrt = inputs if axes is not None: means = wrt.mean(axis=axes, keepdims=True) variances = wrt.var(axis=axes, keepdims=True) # elif stepwise: # means = wrt.mean(axis=1, keepdims=True) # variances = wrt.var(axis=1, keepdims=True) else: means = wrt.reshape((-1, input_dim)).mean(axis=0) variances = wrt.reshape((-1, input_dim)).var(axis=0) if i_gamma is None: i_gamma = lib.floatX(0.1) * numpy.ones(input_dim, dtype=theano.config.floatX) if i_beta is None: i_beta = numpy.zeros(input_dim, dtype=theano.config.floatX) gamma = lib.param( name + '.gamma', i_gamma ) beta = lib.param( name + '.beta', i_beta ) stdevs = T.sqrt(variances + lib.floatX(1e-6)) stdevs.name = name+'.stdevs' means.name = name+'.means' # return (((inputs - means) / stdevs) * gamma) + beta if axes is not None: dimshuffle_pattern = [ 'x' if i in axes else 0 for i in xrange(inputs.ndim) ] return T.nnet.bn.batch_normalization( inputs, gamma.dimshuffle(*dimshuffle_pattern), beta.dimshuffle(*dimshuffle_pattern), means, stdevs, mode='low_mem' ) else: return T.nnet.bn.batch_normalization( inputs, gamma.dimshuffle('x',0), beta.dimshuffle('x',0), means.dimshuffle('x',0), stdevs.dimshuffle('x',0), mode='low_mem' )
def DiagonalLSTM(name, input_dim, output_dim, input_shape, inputs): """ inputs_shape: (n_channels, height, width) inputs.shape: (batch size, input_dim, height, width) outputs.shape: (batch size, output_dim, height, width) """ n_channels, height, width = input_shape inputs = _skew(height, width, inputs) # TODO benchmark running skew after input_to_state, might be faster input_to_state = lib.ops.conv2d.Conv2D( name+'.InputToState', input_dim, 4*output_dim, 1, inputs, mask_type=('b', n_channels), he_init=False ) batch_size = inputs.shape[0] c0_unbatched = lib.param( name + '.c0', np.zeros((output_dim, height), dtype=theano.config.floatX) ) c0 = T.alloc(c0_unbatched, batch_size, output_dim, height) h0_unbatched = lib.param( name + '.h0', np.zeros((output_dim, height), dtype=theano.config.floatX) ) h0 = T.alloc(h0_unbatched, batch_size, output_dim, height) def step_fn(current_input_to_state, prev_c, prev_h): # all args have shape (batch size, output_dim, height) # TODO consider learning this padding prev_h_padded = T.zeros((batch_size, output_dim, 1+height), dtype=theano.config.floatX) prev_h_padded = T.inc_subtensor(prev_h_padded[:,:,1:], prev_h) state_to_state = lib.ops.conv1d.Conv1D( name+'.StateToState', output_dim, 4*output_dim, 2, prev_h_padded, biases=False ) gates = current_input_to_state + state_to_state o_f_i = T.nnet.sigmoid(gates[:,:3*output_dim,:]) o = o_f_i[:,0*output_dim:1*output_dim,:] f = o_f_i[:,1*output_dim:2*output_dim,:] i = o_f_i[:,2*output_dim:3*output_dim,:] g = T.tanh(gates[:,3*output_dim:4*output_dim,:]) new_c = (f * prev_c) + (i * g) new_h = o * T.tanh(new_c) return (new_c, new_h) outputs, _ = theano.scan( step_fn, sequences=input_to_state.dimshuffle(3,0,1,2), outputs_info=[c0, h0] ) all_cs = outputs[0].dimshuffle(1,2,3,0) all_hs = outputs[1].dimshuffle(1,2,3,0) return _unskew(height, width, all_hs)
def Linear( name, input_dims, output_dim, inputs, biases=True, initialization=None, weightnorm=True, just_params=False): """ Compute a linear transform of one or more inputs, optionally with a bias. :parameters: input_dims: list of ints, or int (if single input); the dimensionality of the input(s). output_dim: the dimensionality of the output. biases: whether or not to include a bias term. inputs: a theano variable, or list of variables (if multiple inputs); the inputs to which to apply the transform. initialization: one of None, `lecun`, `glorot`, `he`, `glorot_he`, `orthogonal` :todo: - get arbitrary numpy array as initialization. Check the dims as well. """ if not isinstance(input_dims, list): input_dims = [input_dims] inputs = [inputs] terms = [] params = [] for i, (inp, inp_dim) in enumerate(zip(inputs, input_dims)): if isinstance(initialization, numpy.ndarray): weight_values = initialization assert weight_values.shape == (inp_dim, output_dim),\ 'Expecting an ndarray with shape ({}, {}) but got {}'.\ format(inp_dim, output_dim, initialization.shape) elif initialization == 'lecun' or (initialization == None and inp_dim != output_dim): weight_values = uniform(numpy.sqrt(1. / inp_dim), (inp_dim, output_dim)) elif initialization == 'glorot': weight_values = uniform(numpy.sqrt(2./(inp_dim+output_dim)), (inp_dim, output_dim)) elif initialization == 'he': weight_values = uniform(numpy.sqrt(2. / inp_dim), (inp_dim, output_dim)) elif initialization == 'glorot_he': weight_values = uniform(numpy.sqrt(4./(inp_dim+output_dim)), (inp_dim, output_dim)) elif initialization == 'orthogonal' or (initialization == None and inp_dim == output_dim): # From lasagne def sample(shape): if len(shape) < 2: raise RuntimeError("Only shapes of length 2 or more are supported.") flat_shape = (shape[0], numpy.prod(shape[1:])) # TODO: why normal and not uniform? a = numpy.random.normal(0.0, 1.0, flat_shape) u, _, v = numpy.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) return q.astype(theano.config.floatX) weight_values = sample((inp_dim, output_dim)) else: raise Exception("Invalid initialization ({})!"\ .format(repr(initialization))) weight = lib.param( name + '.W'+str(i), weight_values ) params.append(weight) if weightnorm: norm_values = numpy.linalg.norm(weight_values, axis=0) norms = lib.param( name + '.g'+str(i), norm_values ) params.append(norms) normed_weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle('x', 0) prepared_weight = normed_weight else: prepared_weight = weight terms.append(T.dot(inp, prepared_weight)) if biases: layer_biases = lib.param( name + '.b', numpy.zeros((output_dim,), dtype=theano.config.floatX) ) params.append(layer_biases) terms.append(layer_biases) if just_params: return params # otherwise, comlete/add to the computation graph out = reduce(lambda a,b: a+b, terms) out.name = name + '.output' return out
def __LSTMStep( name, input_dim, hidden_dim, current_input, last_hidden, weightnorm=True, inp_bias_init=0., forget_bias_init=3., out_bias_init=0., g_bias_init=0.): """ CAUTION: Not for stand-alone usage. It is defined here (instead of inside LSTM function) to not clutter the code. Gates: i = sigm(X_t*U^i + S_{t-1}*W^i + b^i) f = sigm(X_t*U^f + S_{t-1}*W^f + b^f) o = sigm(X_t*U^o + S_{t-1}*W^o + b^o) Candidate/internal mempry/cell state and hidden state: g = tanh(X_t*U^g + S_{t-1}*W^g + b^g) c_t = c_{t-1}.f + g.i State: S_t = tanh(c_t).o last_hidden: dim: (2*hidden_dim) S_{t-1} = last_hidden[:hidden_dim] c_{t-1} = last_hidden[hidden_dim:] Note: Forget gate bias initalizations with large positive values (1. to 5.) is shown to be beneficial for learning an/or modeling long-term dependencies. sigmoid([0., 1., 2., 3., 5.]) = [.5, .73, .88, 95., .99] See: http://www.felixgers.de/papers/phd.pdf http://jmlr.org/proceedings/papers/v37/jozefowicz15.pdf :todo: - Better initializations, especially for the weight matrices. - Fix the 'concatenation' to use instead of T.concatention """ # X_t*(U^i, U^f, U^o, U^g) processed_input = lib.ops.Linear( name+'.Input', input_dim, 4 * hidden_dim, current_input, biases=False, weightnorm=weightnorm ) # last_hidden is [batch size, S_{t-1};c_{t-1}] s_tm1 = last_hidden[:, :hidden_dim] c_tm1 = last_hidden[:, hidden_dim:] # S_{t-1}*(W^i, W^f, W^o, W^g) processed_last_hidden = lib.ops.Linear( name+'.Recurrent_Gates', hidden_dim, 4 * hidden_dim, s_tm1, biases=False, weightnorm=weightnorm ) # All the fancy bias initialization: b^i, b^f, b^o, b^g gate_bias_inits = numpy.zeros((4*hidden_dim,), dtype=theano.config.floatX) gate_bias_inits[:hidden_dim] = inp_bias_init gate_bias_inits[hidden_dim:2*hidden_dim] = forget_bias_init gate_bias_inits[2*hidden_dim:3*hidden_dim] = out_bias_init gate_bias_inits[3*hidden_dim:] = g_bias_init biases = lib.param(name + '.b', gate_bias_inits) pre_gates = processed_input + processed_last_hidden # 4*dim pre_gates += biases # 4*dim gates = T.nnet.sigmoid(pre_gates[:, :3*hidden_dim]) # 3*dim inp = gates[:, :hidden_dim] # dim forget = gates[:, hidden_dim:2*hidden_dim] # dim out = gates[:, 2*hidden_dim:] # dim g = T.tanh(pre_gates[:, 3*hidden_dim:]) # dim # internal memory/cell state c_t = c_tm1 * forget + g * inp # dim # hidden state s_t = T.tanh(c_t) * out # dim # TODO: Again, problem with concatenating tensors with (False, False) # broadcast pattern. If slow down as a result of transferring to CPU for # concatenation is not high, keep it this way. hidden_state = T.concatenate([s_t, c_t], axis=-1) # 2*dim, axis=1 return hidden_state
def Linear( name, input_dim, output_dim, inputs, biases=True, initialization=None, weightnorm=None ): """ initialization: None, `lecun`, `he`, `orthogonal`, `("uniform", range)` """ def uniform(stdev, size): return np.random.uniform( low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size ).astype(theano.config.floatX) if initialization == 'lecun' or \ (initialization == None and input_dim != output_dim): weight_values = uniform(np.sqrt(1./input_dim), (input_dim, output_dim)) elif initialization == 'glorot': weight_values = uniform(np.sqrt(2./(input_dim+output_dim)), (input_dim, output_dim)) elif initialization == 'he': weight_values = uniform(np.sqrt(2./input_dim), (input_dim, output_dim)) elif initialization == 'glorot_he': weight_values = uniform(np.sqrt(4./(input_dim+output_dim)), (input_dim, output_dim)) elif initialization == 'orthogonal' or \ (initialization == None and input_dim == output_dim): # From lasagne def sample(shape): if len(shape) < 2: raise RuntimeError("Only shapes of length 2 or more are " "supported.") flat_shape = (shape[0], np.prod(shape[1:])) # TODO: why normal and not uniform? a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) return q.astype(theano.config.floatX) weight_values = sample((input_dim, output_dim)) elif initialization[0] == 'uniform': weight_values = np.random.uniform( low=-initialization[1], high=initialization[1], size=(input_dim, output_dim) ).astype(theano.config.floatX) else: raise Exception("Invalid initialization!") weight = lib.param( name + '.W', weight_values ) if weightnorm==None: weightnorm = _default_weightnorm if weightnorm: norm_values = np.linalg.norm(weight_values, axis=0) norms = lib.param( name + '.g', norm_values ) weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle('x', 0) result = T.dot(inputs, weight) if biases: result = result + lib.param( name + '.b', np.zeros((output_dim,), dtype=theano.config.floatX) ) # result = lib.debug.print_stats(name, result) return result
def frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, N_FRAMES * FRAME_SIZE) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, N_FRAMES * FRAME_SIZE, DIM) """ batch_size = input_sequences.shape[0] n_frames = input_sequences.shape[1]/FRAME_SIZE emb = lib.ops.Embedding( 'SampleLevel.Embedding', Q_LEVELS, Q_LEVELS, input_sequences ) learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) # frames = input_sequences.reshape(( # input_sequences.shape[0], # input_sequences.shape[1] / FRAME_SIZE, # FRAME_SIZE # )) frames = emb.reshape(( input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE*Q_LEVELS )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) # frames *= lib.floatX(2) gru1 = lib.ops.GRU('FrameLevel.GRU1', FRAME_SIZE*Q_LEVELS, DIM, frames, h0=h0[:, 0]) gru2 = lib.ops.GRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.GRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) #gru1,gru2,gru3 = lib.ops.myGRU('FrameLevel.GRU', FRAME_SIZE, DIM, frames, h0=h0) # gru3.shape = (batch_size,N_FRAMES,DIM) output = lib.ops.Dense( 'FrameLevel.Output', DIM, FRAME_SIZE * DIM, gru3.reshape((gru3.shape[0]*gru3.shape[1],gru3.shape[2])), init='he' ) output = output.reshape((batch_size, n_frames * FRAME_SIZE, DIM)) last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1) return (output, last_hidden)
def sample_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, seq len) h0.shape: (batch size, N_RNN, DIM) reset.shape: () output.shape: (batch size, seq len, DIM) """ # Embedded inputs # Handling EMB_SIZE ################# FRAME_SIZE = EMB_SIZE frames = lib.ops.Embedding( 'SampleLevel.Embedding', Q_LEVELS, EMB_SIZE, input_sequences) # Real-valued inputs #################### # # 'frames' of size 1 # FRAME_SIZE = 1 # frames = input_sequences.reshape(( # input_sequences.shape[0], # input_sequences.shape[1], # 1 # )) # # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # # (a reasonable range to pass as inputs to the RNN) # frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) # frames *= lib.floatX(2) # Initial state of RNNs learned_h0 = lib.param( 'SampleLevel.h0', numpy.zeros((N_RNN, H0_MULT*DIM), dtype=theano.config.floatX) ) # Handling LEARN_H0 learned_h0.param = LEARN_H0 learned_h0 = T.alloc(learned_h0, h0.shape[0], N_RNN, H0_MULT*DIM) learned_h0 = T.unbroadcast(learned_h0, 0, 1, 2) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) # Handling RNN_TYPE # Handling SKIP_CONN if RNN_TYPE == 'GRU': rnns_out, last_hidden = lib.ops.stackedGRU('SampleLevel.GRU', N_RNN, FRAME_SIZE, DIM, frames, h0=h0, weightnorm=WEIGHT_NORM, skip_conn=SKIP_CONN) elif RNN_TYPE == 'LSTM': rnns_out, last_hidden = lib.ops.stackedLSTM('SampleLevel.LSTM', N_RNN, FRAME_SIZE, DIM, frames, h0=h0, weightnorm=WEIGHT_NORM, skip_conn=SKIP_CONN) out = lib.ops.Linear( 'SampleLevel.L1', DIM, DIM, rnns_out, initialization='he', weightnorm=WEIGHT_NORM ) out = T.nnet.relu(out) out = lib.ops.Linear( 'SampleLevel.L2', DIM, DIM, out, initialization='he', weightnorm=WEIGHT_NORM ) out = T.nnet.relu(out) out = lib.ops.Linear( 'SampleLevel.L3', DIM, DIM, out, initialization='he', weightnorm=WEIGHT_NORM ) out = T.nnet.relu(out) # We apply the softmax later out = lib.ops.Linear( 'SampleLevel.Output', DIM, Q_LEVELS, out, initialization='he', weightnorm=WEIGHT_NORM ) return (out, last_hidden)
def MaskedConv1D(name, input_dim, output_dim, filter_size, inputs, mask_type=None, he_init=False): """ inputs.shape: (batch size, input_dim, 1, width) mask_type: None, 'a', 'b' output.shape: (batch size, output_dim, 1, width) """ if mask_type is not None: mask = numpy.ones( (output_dim, input_dim, 1, filter_size), dtype=theano.config.floatX ) center = filter_size//2 mask[:,:,0,center+1:] = 0. if mask_type == 'a': mask[:,:,0,center] = 0. def uniform(stdev, size): """uniform distribution with the given stdev and size""" return numpy.random.uniform( low=-stdev * numpy.sqrt(3), high=stdev * numpy.sqrt(3), size=size ).astype(theano.config.floatX) if mask_type=='a': n_in = filter_size//2 elif mask_type=='b': n_in = filter_size//2 + 1 else: n_in = filter_size n_in *= input_dim if he_init: init_stdev = numpy.sqrt(2./n_in) else: init_stdev = numpy.sqrt(1./n_in) filters = lib.param( name+'.Filters', uniform( init_stdev, (output_dim, input_dim, 1, filter_size) ) ) if mask_type is not None: filters = filters * mask # TODO benchmark against the lasagne 'conv1d' implementations result = T.nnet.conv2d(inputs, filters, filter_flip=False, border_mode='half') if mask_type is not None: result = result[:, :, :, :inputs.shape[3]] biases = lib.param( name+'.Biases', numpy.zeros(output_dim, dtype=theano.config.floatX) ) result += biases[None, :, None, None] return result
def encoder_decoder(input_sequences, h0, reset): """ input_sequences.shape: (batch size, N_FRAMES * FRAME_SIZE) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, N_FRAMES * FRAME_SIZE, DIM) """ batch_size = input_sequences.shape[0] n_frames = input_sequences.shape[1]/FRAME_SIZE # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) X1 = ((input_sequences.astype(theano.config.floatX)/lib.floatX(Q_LEVELS/2)) - lib.floatX(1))*lib.floatX(2) X1 = X1[:,None,None,:] X2 = T.nnet.relu(lib.ops.conv1d('conv1',X1,kernel=4,stride=1,n_filters=128,depth=1)) X3 = T.nnet.relu(lib.ops.conv1d('conv2',X2,kernel=6,stride=1,n_filters=64,depth=128)) X4 = lib.ops.pool(X3) #(batch_size,256,1,62) X5 = T.nnet.relu(lib.ops.conv1d('conv3',X4,kernel=4,stride=1,n_filters=128,depth=256)) X6 = T.nnet.relu(lib.ops.conv1d('conv4',X5,kernel=4,stride=1,n_filters=128,depth=128)) X7 = lib.ops.pool(X6) learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) gru_inp = X7[:,:,0,:].dimshuffle(0,2,1) gru1 = lib.ops.myGRU('FrameLevel.GRU1', DIM, DIM, gru_inp, h0=h0[:, 0]) gru2 = lib.ops.myGRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.myGRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) X8 = gru3.transpose(0,2,1)[:,:,None,:] X9 = lib.ops.upsample(X8) #Skip connectoin X10 = X9 + lib.ops.Dense( 'SkipConnection1', 128, 128, X6[:,:,0,:].transpose(0,2,1), init='he', hidden_dim=X6.shape[3] ).transpose(0,2,1)[:,:,None,:] X11 = T.nnet.relu(lib.ops.conv1d('deconv1',X10,kernel=4,stride=1,n_filters=128,depth=128)) X12 = T.nnet.relu(lib.ops.conv1d('deconv2',X11,kernel=4,stride=1,n_filters=128,depth=128)) X13 = lib.ops.upsample(X12) #x3.shape (212,64) #SkipConnection 2 X14 = X13 + lib.ops.Dense( 'SkipConnection2', 64, 32, X3[:,:,0,:].transpose(0,2,1)[:,:968], hidden_dim=968 ).transpose(0,2,1)[:,:,None,:] X15 = T.nnet.relu(lib.ops.conv1d('deconv3',X14,kernel=4,stride=1,n_filters=128,depth=32)) X16 = T.nnet.relu(lib.ops.conv1d('deconv4',X15,kernel=4,stride=1,n_filters=256,depth=128)) ##194 output = X16[:,:,0,:].transpose(0,2,1) last_hidden = T.stack([gru1[:,-1],gru2[:,-1],gru3[:,-1]],axis=1) return (output.reshape((-1,output.shape[2])), last_hidden)
def Linear( name, input_dims, output_dim, inputs, biases=True, initialization='lecun', weightnorm=True ): """ Compute a linear transform of one or more inputs, optionally with a bias. input_dims: list of ints, or int (if single input); the dimensionality of the input(s). output_dim: the dimensionality of the output. biases: whether or not to include a bias term. inputs: a theano variable, or list of variables (if multiple inputs); the inputs to which to apply the transform. initialization: one of `lecun`, `he` weightnorm: whether to use Weight Normalization (Salimans, Kingma 2016) """ def uniform(stdev, size): """uniform distribution with the given stdev and size""" return numpy.random.uniform( low=-stdev * numpy.sqrt(3), high=stdev * numpy.sqrt(3), size=size ).astype(theano.config.floatX) if not isinstance(input_dims, list): input_dims = [input_dims] inputs = [inputs] terms = [] for i, (inp, inp_dim) in enumerate(zip(inputs, input_dims)): if initialization == 'lecun' or (initialization == None and inp_dim != output_dim): weight_values = uniform(numpy.sqrt(1. / inp_dim), (inp_dim, output_dim)) elif initialization == 'he': weight_values = uniform(numpy.sqrt(2. / inp_dim), (inp_dim, output_dim)) else: raise Exception("Invalid initialization!") weight = lib.param( name + '.W'+str(i), weight_values ) if weightnorm: norm_values = numpy.linalg.norm(weight_values, axis=0) norms = lib.param( name + '.g'+str(i), norm_values ) normed_weight = weight * (norms / weight.norm(2, axis=0)).dimshuffle('x', 0) terms.append(T.dot(inp, normed_weight)) else: terms.append(T.dot(inp, weight)) if biases: terms.append(lib.param( name + '.b', numpy.zeros((output_dim,), dtype=theano.config.floatX) )) out = reduce(lambda a,b: a+b, terms) out.name = name + '.output' return out