def Encoder(inputs): if MODE=='256ary': inputs = inputs.astype(theano.config.floatX) * lib.floatX(2./255) inputs -= lib.floatX(0.5) if FC: mu_and_log_sigma = lib.ops.mlp.MLP( 'Encoder', input_dim=N_CHANNELS*HEIGHT*WIDTH, hidden_dim=FC_DIM, output_dim=2*LATENT_DIM, n_layers=FC_LAYERS, inputs=inputs.reshape((-1, N_CHANNELS*HEIGHT*WIDTH)) ) return mu_and_log_sigma[:, ::2], mu_and_log_sigma[:, 1::2] else: mu_and_log_sigma = lib.ops.conv_2d_encoder.Conv2DEncoder( 'Encoder', input_n_channels=N_CHANNELS, input_size=WIDTH, n_pools=CONV_N_POOLS, base_n_filters=CONV_BASE_N_FILTERS, filter_size=CONV_FILTER_SIZE, output_dim=2*LATENT_DIM, inputs=inputs ) return mu_and_log_sigma[:, ::2], mu_and_log_sigma[:, 1::2]
def __init__(self, num_input, num_cells, input_layer=None, name=""): """ LSTM Layer Takes as input sequence of inputs, returns sequence of outputs """ self.name = name self.num_input = num_input self.num_cells = num_cells self.X = input_layer.output() self.h0 = theano.shared(floatX(np.zeros(num_cells))) self.s0 = theano.shared(floatX(np.zeros(num_cells))) self.W_gx = random_weights((num_input, num_cells)) self.W_ix = random_weights((num_input, num_cells)) self.W_fx = random_weights((num_input, num_cells)) self.W_ox = random_weights((num_input, num_cells)) self.W_gh = random_weights((num_cells, num_cells)) self.W_ih = random_weights((num_cells, num_cells)) self.W_fh = random_weights((num_cells, num_cells)) self.W_oh = random_weights((num_cells, num_cells)) self.b_g = zeros(num_cells) self.b_i = zeros(num_cells) self.b_f = zeros(num_cells) self.b_o = zeros(num_cells) self.params = [self.W_gx, self.W_ix, self.W_ox, self.W_fx, self.W_gh, self.W_ih, self.W_oh, self.W_fh, self.b_g, self.b_i, self.b_f, self.b_o, ]
def softmax_and_sample(logits, temperature=1.): """ :temperature: default 1. For high temperatures (temperature -> +Inf), all actions have nearly the same probability and the lower the temperature, the more expected rewards affect the probability. For a low temperature (temperature -> 0+), the probability of the action with the highest expected reward (max operation) tends to 1. """ temperature = lib.floatX(temperature) ZEROX = lib.floatX(0.) assert temperature >= ZEROX, "`temperature` should be a non-negative value!" old_shape = logits.shape flattened_logits = logits.reshape((-1, logits.shape[logits.ndim - 1])) if temperature == ZEROX: # Get max instead of (biased) sample. # Equivalent to directly get the argmax but with this it's easier to # extract the probabilities later on too. samples = T.nnet.softmax(flattened_logits) else: # > 0 flattened_logits /= temperature samples = T.cast( srng.multinomial(pvals=T.nnet.softmax(flattened_logits)), theano.config.floatX) samples = samples.reshape(old_shape) return T.argmax(samples, axis=samples.ndim - 1)
def GMM_nll(x, mus, sigmas, mix_weights): """ D is dimension of each observation (e.g. frame_size) for each component (multivariate Normal with diagonal covariance matrix) See `gaussian_nll` x : (batch_size, D) mus : (batch_size, D, num_gaussians) sigmas : (batch_size, D, num_gaussians) mix_weights : (batch_size, num_gaussians) """ x = x.dimshuffle(0, 1, 'x') # Similar to `gaussian_nll` ll_component_wise = lib.floatX(numpy.log(2. * numpy.pi)) ll_component_wise += 2. * T.log(sigmas) ll_component_wise += ((x - mus) / sigmas) ** 2. ll_component_wise = ll_component_wise.sum(axis=1) # on FRAME_SIZE ll_component_wise *= lib.floatX(-0.5) # LL not NLL # Now ready to take care of weights of each component # Simply applying exp could potentially cause inf/NaN. # Look up LogSumExp trick, Softmax in theano, or this: # hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/ weighted_ll = ll_component_wise + T.log(mix_weights) ll_max = T.max(weighted_ll, axis=1, keepdims=True) nll = T.log(T.sum(T.exp(weighted_ll - ll_max), axis=1, keepdims=True)) nll += ll_max nll = -nll.sum(axis=1) return nll
def softmax_and_sample(logits, temperature=1.): """ :temperature: default 1. For high temperatures (temperature -> +Inf), all actions have nearly the same probability and the lower the temperature, the more expected rewards affect the probability. For a low temperature (temperature -> 0+), the probability of the action with the highest expected reward (max operation) tends to 1. """ temperature = lib.floatX(temperature) ZEROX = lib.floatX(0.) assert temperature >= ZEROX, "`temperature` should be a non-negative value!" old_shape = logits.shape flattened_logits = logits.reshape((-1, logits.shape[logits.ndim-1])) if temperature == ZEROX: # Get max instead of (biased) sample. # Equivalent to directly get the argmax but with this it's easier to # extract the probabilities later on too. samples = T.nnet.softmax(flattened_logits) else: # > 0 flattened_logits /= temperature samples = T.cast( srng.multinomial(pvals=T.nnet.softmax(flattened_logits)), theano.config.floatX ) samples = samples.reshape(old_shape) return T.argmax(samples, axis=samples.ndim-1)
def __init__(self, dnodex,inputdim,dim): X=T.ivector() Y=T.ivector() Z=T.lscalar() eta = T.scalar() temperature=T.scalar() self.dnodex=dnodex num_input = inputdim dnodex.umatrix=theano.shared(floatX(np.random.randn(*(self.dnodex.nuser,inputdim, inputdim)))) dnodex.pmatrix=theano.shared(floatX(np.random.randn(*(self.dnodex.npoi,inputdim)))) dnodex.p_l2_norm=(dnodex.pmatrix**2).sum() dnodex.u_l2_norm=(dnodex.umatrix**2).sum() num_hidden = dim num_output = inputdim inputs = InputPLayer(dnodex.pmatrix[X,:], dnodex.umatrix[Z,:,:], name="inputs") lstm1 = LSTMLayer(num_input, num_hidden, input_layer=inputs, name="lstm1") lstm2 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm1, name="lstm2") lstm3 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm2, name="lstm3") softmax = SoftmaxPLayer(num_hidden, num_output, dnodex.umatrix[Z,:,:], input_layer=lstm3, name="yhat", temperature=temperature) Y_hat = softmax.output() self.layers = inputs, lstm1,lstm2,lstm3,softmax params = get_params(self.layers) #caches = make_caches(params) cost = T.mean(T.nnet.categorical_crossentropy(Y_hat, T.dot(dnodex.pmatrix[Y,:],dnodex.umatrix[Z,:,:])))+eta*dnodex.p_l2_norm+eta*dnodex.u_l2_norm updates = PerSGD(cost,params,eta,X,Z,dnodex)#momentum(cost, params, caches, eta) self.train = theano.function([X,Y,Z, eta, temperature], cost, updates=updates, allow_input_downcast=True) predict_updates = one_step_updates(self.layers) self.predict_char = theano.function([X, Z, temperature], Y_hat, updates=predict_updates, allow_input_downcast=True)
def GMM_nll(x, mus, sigmas, mix_weights): """ D is dimension of each observation (e.g. frame_size) for each component (multivariate Normal with diagonal covariance matrix) See `gaussian_nll` x : (batch_size, D) mus : (batch_size, D, num_gaussians) sigmas : (batch_size, D, num_gaussians) mix_weights : (batch_size, num_gaussians) """ x = x.dimshuffle(0, 1, 'x') # Similar to `gaussian_nll` ll_component_wise = lib.floatX(numpy.log(2. * numpy.pi)) ll_component_wise += 2. * T.log(sigmas) ll_component_wise += ((x - mus) / sigmas)**2. ll_component_wise = ll_component_wise.sum(axis=1) # on FRAME_SIZE ll_component_wise *= lib.floatX(-0.5) # LL not NLL # Now ready to take care of weights of each component # Simply applying exp could potentially cause inf/NaN. # Look up LogSumExp trick, Softmax in theano, or this: # hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/ weighted_ll = ll_component_wise + T.log(mix_weights) ll_max = T.max(weighted_ll, axis=1, keepdims=True) nll = T.log(T.sum(T.exp(weighted_ll - ll_max), axis=1, keepdims=True)) nll += ll_max nll = -nll.sum(axis=1) return nll
def Conv2D(name, input_dim, output_dim, filter_size, inputs, mask_type=None, he_init=False): """ inputs.shape: (batch size, height, width, input_dim) mask_type: None, 'a', 'b' output.shape: (batch size, height, width, output_dim) """ def uniform(stdev, size): """uniform distribution with the given stdev and size""" return numpy.random.uniform(low=-stdev * numpy.sqrt(3), high=stdev * numpy.sqrt(3), size=size).astype(theano.config.floatX) filters_init = uniform( 1. / numpy.sqrt(input_dim * filter_size * filter_size), # output dim, input dim, height, width (output_dim, input_dim, filter_size, filter_size)) if he_init: filters_init *= lib.floatX(numpy.sqrt(2.)) if mask_type is not None: filters_init *= lib.floatX(numpy.sqrt(2.)) filters = lib.param(name + '.Filters', filters_init) if mask_type is not None: mask = numpy.ones((output_dim, input_dim, filter_size, filter_size), dtype=theano.config.floatX) center = filter_size // 2 for i in xrange(filter_size): for j in xrange(filter_size): if (j > center) or (j == center and i > center): mask[:, :, j, i] = 0. for i in xrange(N_CHANNELS): for j in xrange(N_CHANNELS): if (mask_type == 'a' and i >= j) or (mask_type == 'b' and i > j): mask[j::N_CHANNELS, i::N_CHANNELS, center, center] = 0. filters = filters * mask # conv2d takes inputs as (batch size, input channels, height, width) inputs = inputs.dimshuffle(0, 3, 1, 2) result = T.nnet.conv2d(inputs, filters, border_mode='half', filter_flip=False) biases = lib.param(name + '.Biases', numpy.zeros(output_dim, dtype=theano.config.floatX)) result = result + biases[None, :, None, None] return result.dimshuffle(0, 2, 3, 1)
def Batchnorm(name, input_dim, inputs, stepwise=False, axes=None, wrt=None, i_gamma=None, i_beta=None): """ From Ishaan's repo """ if wrt is None: wrt = inputs if axes is not None: means = wrt.mean(axis=axes, keepdims=True) variances = wrt.var(axis=axes, keepdims=True) # elif stepwise: # means = wrt.mean(axis=1, keepdims=True) # variances = wrt.var(axis=1, keepdims=True) else: means = wrt.reshape((-1, input_dim)).mean(axis=0) variances = wrt.reshape((-1, input_dim)).var(axis=0) if i_gamma is None: i_gamma = lib.floatX(0.1) * numpy.ones(input_dim, dtype=theano.config.floatX) if i_beta is None: i_beta = numpy.zeros(input_dim, dtype=theano.config.floatX) gamma = lib.param(name + '.gamma', i_gamma) beta = lib.param(name + '.beta', i_beta) stdevs = T.sqrt(variances + lib.floatX(1e-6)) stdevs.name = name + '.stdevs' means.name = name + '.means' # return (((inputs - means) / stdevs) * gamma) + beta if axes is not None: dimshuffle_pattern = [ 'x' if i in axes else 0 for i in xrange(inputs.ndim) ] return T.nnet.bn.batch_normalization( inputs, gamma.dimshuffle(*dimshuffle_pattern), beta.dimshuffle(*dimshuffle_pattern), means, stdevs, mode='low_mem') else: return T.nnet.bn.batch_normalization(inputs, gamma.dimshuffle('x', 0), beta.dimshuffle('x', 0), means.dimshuffle('x', 0), stdevs.dimshuffle('x', 0), mode='low_mem')
def __init__(self, num_input, num_cells, input_layer=None, name=""): """ LSTM Layer Takes as input sequence of inputs, returns sequence of outputs Currently takes only one input layer """ self.name = name self.num_input = num_input self.num_cells = num_cells #Setting the X as the input layer self.X = input_layer.output() self.h0 = theano.shared(floatX(np.zeros((1, num_cells)))) self.s0 = theano.shared(floatX(np.zeros((1, num_cells)))) #Initializing the weights self.W_gx = random_weights((num_input, num_cells), name=self.name + "W_gx") self.W_ix = random_weights((num_input, num_cells), name=self.name + "W_ix") self.W_fx = random_weights((num_input, num_cells), name=self.name + "W_fx") self.W_ox = random_weights((num_input, num_cells), name=self.name + "W_ox") self.W_gh = random_weights((num_cells, num_cells), name=self.name + "W_gh") self.W_ih = random_weights((num_cells, num_cells), name=self.name + "W_ih") self.W_fh = random_weights((num_cells, num_cells), name=self.name + "W_fh") self.W_oh = random_weights((num_cells, num_cells), name=self.name + "W_oh") self.b_g = zeros(num_cells, name=self.name + "b_g") self.b_i = zeros(num_cells, name=self.name + "b_i") self.b_f = zeros(num_cells, name=self.name + "b_f") self.b_o = zeros(num_cells, name=self.name + "b_o") self.params = [ self.W_gx, self.W_ix, self.W_ox, self.W_fx, self.W_gh, self.W_ih, self.W_oh, self.W_fh, self.b_g, self.b_i, self.b_f, self.b_o, ] self.output()
def DecoderB(name, latent_dim, hidden_dim, output_dim, latents): latents = T.clip(latents, lib.floatX(-50), lib.floatX(50)) output = T.nnet.relu( lib.ops.conv1d.Conv1D( name + '.Input', input_dim=latent_dim, output_dim=8 * hidden_dim, filter_size=1, inputs=latents, )) output = T.nnet.relu( lib.ops.conv1d.Conv1D( name + '.Conv1', input_dim=8 * hidden_dim, output_dim=8 * hidden_dim, filter_size=1, inputs=output, )) output = T.nnet.relu( lib.ops.conv1d.Conv1D( name + '.Conv2', input_dim=8 * hidden_dim, output_dim=8 * hidden_dim, filter_size=1, inputs=output, )) output = T.nnet.relu( lib.ops.deconv1d.Deconv1D( # output = T.nnet.relu(lib.ops.conv1d.Conv1D( name + '.Conv2U', input_dim=8 * hidden_dim, output_dim=hidden_dim, filter_size=17, inputs=output, stride=8)) # output = T.nnet.relu(lib.ops.deconv1d.Deconv1D( # # output = T.nnet.relu(lib.ops.conv1d.Conv1D( # name+'.Conv3U', # input_dim=2*hidden_dim, # output_dim=hidden_dim, # filter_size=5, # inputs=output, # )) output = lib.ops.conv1d.Conv1D(name + '.Output', input_dim=hidden_dim, output_dim=output_dim, filter_size=1, inputs=output, he_init=False) return output
def big_frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE) h0.shape: (batch size, N_BIG_GRUS, BIG_DIM) reset.shape: () output[0].shape: (batch size, n frames, DIM) output[1].shape: same as h0.shape output[2].shape: (batch size, seq len, Q_LEVELS) """ learned_h0 = lib.param( 'BigFrameLevel.h0', numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape( (input_sequences.shape[0], input_sequences.shape[1] / BIG_FRAME_SIZE, BIG_FRAME_SIZE)) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS / 2)) - lib.floatX(1) frames *= lib.floatX(2) gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_BIG_GRUS): gru = lib.ops.LowMemGRU('BigFrameLevel.GRU' + str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear('BigFrameLevel.Output', BIG_DIM, DIM * BIG_FRAME_SIZE / FRAME_SIZE, grus[-1]) output = output.reshape( (output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1) independent_preds = lib.ops.Linear('BigFrameLevel.IndependentPreds', BIG_DIM, Q_LEVELS * BIG_FRAME_SIZE, grus[-1]) independent_preds = independent_preds.reshape( (independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS)) return (output, last_hidden, independent_preds)
def DecoderB(name, latent_dim, hidden_dim, output_dim, latents): latents = T.clip(latents, lib.floatX(-50), lib.floatX(50)) output = T.nnet.relu(lib.ops.conv1d.Conv1D( name+'.Input', input_dim=latent_dim, output_dim=8*hidden_dim, filter_size=1, inputs=latents, )) output = T.nnet.relu(lib.ops.conv1d.Conv1D( name+'.Conv1', input_dim=8*hidden_dim, output_dim=8*hidden_dim, filter_size=1, inputs=output, )) output = T.nnet.relu(lib.ops.conv1d.Conv1D( name+'.Conv2', input_dim=8*hidden_dim, output_dim=8*hidden_dim, filter_size=1, inputs=output, )) output = T.nnet.relu(lib.ops.deconv1d.Deconv1D( # output = T.nnet.relu(lib.ops.conv1d.Conv1D( name+'.Conv2U', input_dim=8*hidden_dim, output_dim=hidden_dim, filter_size=17, inputs=output, stride=8 )) # output = T.nnet.relu(lib.ops.deconv1d.Deconv1D( # # output = T.nnet.relu(lib.ops.conv1d.Conv1D( # name+'.Conv3U', # input_dim=2*hidden_dim, # output_dim=hidden_dim, # filter_size=5, # inputs=output, # )) output = lib.ops.conv1d.Conv1D( name+'.Output', input_dim=hidden_dim, output_dim=output_dim, filter_size=1, inputs=output, he_init=False ) return output
def frame_level_rnn(input_sequences, other_input, h0, reset): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) other_input.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape( (input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE)) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS / 2)) - lib.floatX(1) frames *= lib.floatX(2) gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM, frames) + other_input gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', DIM, DIM, gru_input, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('FrameLevel.GRU' + str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear('FrameLevel.Output', DIM, FRAME_SIZE * DIM, grus[-1], initialization='he') output = output.reshape( (output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1) return (output, last_hidden)
def big_frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE) h0.shape: (batch size, N_BIG_GRUS, BIG_DIM) reset.shape: () output[0].shape: (batch size, n frames, DIM) output[1].shape: same as h0.shape output[2].shape: (batch size, seq len, Q_LEVELS) """ learned_h0 = lib.param( 'BigFrameLevel.h0', numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / BIG_FRAME_SIZE, BIG_FRAME_SIZE )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) frames *= lib.floatX(2) gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_BIG_GRUS): gru = lib.ops.LowMemGRU('BigFrameLevel.GRU'+str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear( 'BigFrameLevel.Output', BIG_DIM, DIM * BIG_FRAME_SIZE / FRAME_SIZE, grus[-1] ) output = output.reshape((output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) independent_preds = lib.ops.Linear( 'BigFrameLevel.IndependentPreds', BIG_DIM, Q_LEVELS * BIG_FRAME_SIZE, grus[-1] ) independent_preds = independent_preds.reshape((independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS)) return (output, last_hidden, independent_preds)
def D5(latents): latents = T.clip(latents, lib.floatX(-50), lib.floatX(50)) output = lib.ops.mlp.MLP( 'D5', LATENT_DIM, L5_FC_DIM, 4*4*2*LATENT_DIM, 5, latents ) return output.reshape((-1, 2*LATENT_DIM, 4, 4))
def Enc2(latents): latents = T.clip(latents, lib.floatX(-50), lib.floatX(50)) output = latents output = T.nnet.relu( lib.ops.conv2d.Conv2D('Enc2.1', input_dim=LATENT_DIM_1, output_dim=DIM_3, filter_size=3, inputs=output)) output = T.nnet.relu( lib.ops.conv2d.Conv2D('Enc2.2', input_dim=DIM_3, output_dim=DIM_4, filter_size=3, inputs=output, stride=2)) output = T.nnet.relu( lib.ops.conv2d.Conv2D('Enc2.3', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, inputs=output)) output = T.nnet.relu( lib.ops.conv2d.Conv2D('Enc2.4', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, inputs=output)) output = output.reshape((output.shape[0], 4 * 4 * DIM_4)) output = T.nnet.relu( lib.ops.linear.Linear('Enc2.5', input_dim=4 * 4 * DIM_4, output_dim=DIM_5, initialization='glorot_he', inputs=output)) output = T.nnet.relu( lib.ops.linear.Linear('Enc2.6', input_dim=DIM_5, output_dim=DIM_5, initialization='glorot_he', inputs=output)) output = lib.ops.linear.Linear('Enc2.7', input_dim=DIM_5, output_dim=2 * LATENT_DIM_2, inputs=output) return output
def __init__(self, dnodex,inputdim,dim): X=T.ivector() Y=T.ivector() Z=T.lscalar() NP=T.ivector() lambd = T.scalar() eta = T.scalar() temperature=T.scalar() num_input = inputdim self.umatrix=theano.shared(floatX(np.random.rand(dnodex.nuser,inputdim, inputdim))) self.pmatrix=theano.shared(floatX(np.random.rand(dnodex.npoi,inputdim))) self.p_l2_norm=(self.pmatrix**2).sum() self.u_l2_norm=(self.umatrix**2).sum() num_hidden = dim num_output = inputdim inputs = InputPLayer(self.pmatrix[X,:], self.umatrix[Z,:,:], name="inputs") lstm1 = LSTMLayer(num_input, num_hidden, input_layer=inputs, name="lstm1") #lstm2 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm1, name="lstm2") #lstm3 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm2, name="lstm3") softmax = SoftmaxPLayer(num_hidden, num_output, self.umatrix[Z,:,:], input_layer=lstm1, name="yhat", temperature=temperature) Y_hat = softmax.output() self.layers = inputs, lstm1,softmax params = get_params(self.layers) #caches = make_caches(params) tmp_u=T.mean(T.dot(self.pmatrix[X,:],self.umatrix[Z,:,:]),axis=0) tr=T.dot(tmp_u,(self.pmatrix[X,:]-self.pmatrix[NP,:]).transpose()) pfp_loss1=sigmoid(tr) pfp_loss=pfp_loss1*(T.ones_like(pfp_loss1)-pfp_loss1) tmp_u1=T.reshape(T.repeat(tmp_u,X.shape[0]),(inputdim,X.shape[0])).T pfp_lossv=T.reshape(T.repeat(pfp_loss,inputdim),(inputdim,X.shape[0])).T cost = lambd*10*T.mean(T.nnet.categorical_crossentropy(Y_hat, T.dot(self.pmatrix[Y,:],self.umatrix[Z,:,:])))+lambd*self.p_l2_norm+lambd*self.u_l2_norm # updates = PerSGD(cost,params,eta,X,Z,dnodex)#momentum(cost, params, caches, eta) updates = [] grads = T.grad(cost=cost, wrt=params) updates.append([self.pmatrix,T.set_subtensor(self.pmatrix[X,:],self.pmatrix[X,:]-eta*grads[0])]) updates.append([self.umatrix,T.set_subtensor(self.umatrix[Z,:,:],self.umatrix[Z,:,:]-eta*grads[1])]) for p,g in zip(params[2:], grads[2:]): updates.append([p, p - eta * g]) rlist=T.argsort(T.dot(tmp_u,self.pmatrix.T))[::-1] n_updates=[(self.pmatrix, T.set_subtensor(self.pmatrix[NP,:],self.pmatrix[NP,:]-eta*pfp_lossv*tmp_u1-eta*lambd*self.pmatrix[NP,:]))] p_updates=[(self.pmatrix, T.set_subtensor(self.pmatrix[X,:],self.pmatrix[X,:]+eta*pfp_lossv*tmp_u1-eta*lambd*self.pmatrix[X,:])),(self.umatrix, T.set_subtensor(self.umatrix[Z,:,:],self.umatrix[Z,:,:]+eta*T.mean(pfp_loss)*(T.reshape(tmp_u,(tmp_u.shape[0],1))*T.mean(self.pmatrix[X,:]-self.pmatrix[NP,:],axis=0)))-eta*lambd*self.umatrix[Z,:,:])] self.train = theano.function([X,Y,Z, eta, lambd, temperature], cost, updates=updates, allow_input_downcast=True) self.trainpos=theano.function([X,NP,Z,eta, lambd],tmp_u, updates=p_updates,allow_input_downcast=True) self.trainneg=theano.function([X,NP,Z,eta, lambd],T.mean(pfp_loss), updates=n_updates,allow_input_downcast=True) self.predict_pfp = theano.function([X,Z], rlist, allow_input_downcast=True)
def __init__(self, dnodex,inputdim, name=""): pos_p=T.lscalar() neg_poi=T.lscalar() user=T.lscalar() eta=T.scalar() pfp_loss=T.scalar() if dnodex.pmatrix is None: dnodex.umatrix=theano.shared(floatX(np.random.randn(*(dnodex.nuser, inputdim)))) dnodex.pmatrix=theano.shared(floatX(np.random.randn(*(dnodex.npoi,inputdim)))) n_updates=[(dnodex.pmatrix, T.set_subtensor(dnodex.pmatrix[neg_poi,:],dnodex.pmatrix[neg_poi,:]-eta*pfp_loss*dnodex.umatrix[user,:]-eta*eta*dnodex.pmatrix[neg_poi,:]))] p_updates=[(dnodex.pmatrix, T.set_subtensor(dnodex.pmatrix[pos_p,:],dnodex.pmatrix[pos_p,:]+eta*pfp_loss*dnodex.umatrix[user,:]-eta*eta*dnodex.pmatrix[pos_p,:])),(dnodex.umatrix, T.set_subtensor(dnodex.umatrix[user,:],dnodex.umatrix[user,:]+eta*pfp_loss*(dnodex.pmatrix[pos_p,:]-dnodex.pmatrix[neg_poi,:])-eta*eta*dnodex.umatrix[user,:]))] self.trainpos=theano.function([pos_p,neg_poi,user,eta,pfp_loss],updates=p_updates,allow_input_downcast=True) self.trainneg=theano.function([neg_poi,user,eta,pfp_loss],updates=n_updates,allow_input_downcast=True)
def split_output(output, sampling_bias = 0.): mu_raw = output[:,:,:OUTPUT_DIM*K_GMM] mu = T.clip(mu_raw, lib.floatX(-6.), lib.floatX(6.)) log_sig = output[:, :, OUTPUT_DIM*K_GMM : 2*OUTPUT_DIM*K_GMM] sig = T.exp(log_sig - sampling_bias) + lib.floatX(EPS) # sig = T.clip(sig, , lib.floatX(2.)) weights_raw = output[:, :, -K_GMM:] weights = T.nnet.softmax(weights_raw.reshape((-1, K_GMM))*(1. + sampling_bias)).reshape(weights_raw.shape) + lib.floatX(EPS) return mu, sig, weights
def frame_level_rnn(input_sequences, other_input, h0, reset): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) other_input.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) frames *= lib.floatX(2) gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM, frames) + other_input gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', DIM, DIM, gru_input, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('FrameLevel.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear( 'FrameLevel.Output', DIM, FRAME_SIZE * DIM, grus[-1], initialization='he' ) output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) return (output, last_hidden)
def Encoder(speech, h0, mask, name="", ldim=None): """ Create inference model to infer one single latent variable using bidirectional GRU \ followed by non-causal dilated convolutions """ if ldim == None: ldim = LATENT_DIM enc_name = "Encoder.{}".format(name) if RNN_TYPE == 'GRU': rnns_out, last_hidden = lib.ops.stackedGRU('{}.GRU'.format(enc_name), N_RNN, VOCODER_DIM, DIM, speech, h0=h0, weightnorm=WEIGHT_NORM, skip_conn=False) elif RNN_TYPE == 'LSTM': rnns_out, last_hidden = lib.ops.stackedLSTM('{}.LSTM'.format(enc_name), N_RNN, VOCODER_DIM, DIM, speech, h0=h0, weightnorm=WEIGHT_NORM, skip_conn=False) rnns_out = rnns_out*mask[:,:,None] rnns_out = rnns_out.sum(axis = 1)/(mask.sum(axis = 1)[:,None] + lib.floatX(EPS)) output1 = T.nnet.relu(rnns_out) output2 = lib.ops.Linear( '{}.Output2'.format(enc_name), DIM, DIM, output1, weightnorm=WEIGHT_NORM ) output3 = T.nnet.relu(output2) output4 = lib.ops.Linear( '{}.Output4'.format(enc_name), DIM, 2*ldim, output3, initialization='he', weightnorm=WEIGHT_NORM ) mu = output4[:,::2] log_sigma = output4[:,1::2] return mu, log_sigma, last_hidden
def GRUStep(name, input_dim, hidden_dim, current_input, last_hidden): processed_input = lib.ops.linear.Linear(name + '.Input', input_dim, 3 * hidden_dim, current_input) gates = T.nnet.sigmoid( lib.ops.linear.Linear(name + '.Recurrent_Gates', hidden_dim, 2 * hidden_dim, last_hidden, biases=False) + processed_input[:, :2 * hidden_dim]) update = gates[:, :hidden_dim] reset = gates[:, hidden_dim:] scaled_hidden = reset * last_hidden candidate = T.tanh( lib.ops.linear.Linear(name + '.Recurrent_Candidate', hidden_dim, hidden_dim, scaled_hidden, biases=False, initialization='orthogonal') + processed_input[:, 2 * hidden_dim:]) one = lib.floatX(1.0) return (update * candidate) + ((one - update) * last_hidden)
def gaussian_nll(x, mu, log_sigma): sigma_squared = T.exp(2*log_sigma) return ( lib.floatX(0.5*numpy.log(2*numpy.pi)) + (2*log_sigma) + ( ((x-mu)**2) / (2*sigma_squared) ) )
def create_wavenet_block(inp, num_dilation_layer, input_dim, output_dim, name=None): assert name is not None layer_out = inp skip_contrib = [] skip_weights = lib.param(name + ".parametrized_weights", lib.floatX(numpy.ones((num_dilation_layer, )))) for i in range(num_dilation_layer): layer_out, skip_c = lib.ops.dil_conv_1D( layer_out, output_dim, input_dim if i == 0 else output_dim, 2, dilation=2**i, non_linearity='gated', name=name + ".dilation_{}".format(i + 1)) skip_c = skip_c * skip_weights[i] skip_contrib.append(skip_c) skip_out = skip_contrib[-1] j = 0 for i in range(num_dilation_layer - 1): j += 2**(num_dilation_layer - i - 1) skip_out = skip_out + skip_contrib[num_dilation_layer - 2 - i][:, j:] return layer_out, skip_out
def load_model_params_dumb(self, filename): f = gzip.open(filename, 'rb') to_load = cPickle.load(f) assert (to_load['num_input'] == self.num_input) assert (to_load['num_output'] == self.num_output) saved_nb_hidden = to_load['num_hidden'] try: len(saved_nb_hidden) except: assert (np.all([saved_nb_hidden == h for h in self.num_hidden])) else: assert (len(saved_nb_hidden) == len(self.num_hidden)) assert (np.all([ h1 == h2 for h1, h2 in zip(saved_nb_hidden, self.num_hidden) ])) if 'clip_at' in to_load: assert (to_load['clip_at'] == self.clip_at) if 'scale_norm' in to_load: assert (to_load['scale_norm'] == self.scale_norm) for l in self.layers: for p in l.get_params(): p.set_value(floatX(to_load[p.name]))
def step(current_processed_input, last_hidden): gates = T.nnet.sigmoid( lib.ops.linear.Linear( name+'.Recurrent_Gates', hidden_dim, 2 * hidden_dim, last_hidden, biases=False ) + current_processed_input[:, :2*hidden_dim] ) update = gates[:, :hidden_dim] reset = gates[:, hidden_dim:] scaled_hidden = reset * last_hidden candidate = T.tanh( lib.ops.linear.Linear( name+'.Recurrent_Candidate', hidden_dim, hidden_dim, scaled_hidden, biases=False, initialization='orthogonal' ) + current_processed_input[:, 2*hidden_dim:] ) one = lib.floatX(1.0) return (update * candidate) + ((one - update) * last_hidden)
def recurrent_fn(x_t, h_tm1, name, input_dim, hidden_dim, W1, b1, W2, b2): A1 = T.nnet.sigmoid( BatchNorm(name + ".Inp2Hid", T.dot(x_t, W1[:input_dim]), 2 * hidden_dim, layer='recurrent') + BatchNorm(name + ".Hid2Hid", T.dot(h_tm1, W1[input_dim:]), 2 * hidden_dim, layer='recurrent') + b1) #A1 = T.nnet.sigmoid(T.dot(T.concatenate((x_t,h_tm1),axis=1),W1) + b1) z = A1[:, :hidden_dim] r = A1[:, hidden_dim:] scaled_hidden = r * h_tm1 h = T.tanh( BatchNorm(name + ".Candidate", T.dot(T.concatenate((scaled_hidden, x_t), axis=1), W2), hidden_dim, layer='recurrent') + b2) # h = T.tanh(T.dot(T.concatenate((scaled_hidden,x_t),axis=1),W2)+b2) one = lib.floatX(1.0) return ((z * h) + ((one - z) * h_tm1)).astype('float32')
def clamp_logsig(logsig): beta = T.minimum( 1, T.cast(total_iters, theano.config.floatX) / lib.floatX(BETA_ITERS)) result = T.nnet.relu(logsig, alpha=beta) result = T.maximum(-3, result) return result
def create_wavenet_block(inp, num_dilation_layer, input_dim, output_dim, name =None): assert name is not None layer_out = inp skip_contrib = [] skip_weights = lib.param(name+".parametrized_weights", lib.floatX(numpy.ones((num_dilation_layer,)))) for i in range(num_dilation_layer): layer_out, skip_c = lib.ops.dil_conv_1D( layer_out, output_dim, input_dim if i == 0 else output_dim, 2, dilation = 2**i, non_linearity = 'gated', name = name+".dilation_{}".format(i+1) ) skip_c = skip_c*skip_weights[i] skip_contrib.append(skip_c) skip_out = skip_contrib[-1] j = 0 for i in range(num_dilation_layer-1): j += 2**(num_dilation_layer-i-1) skip_out = skip_out + skip_contrib[num_dilation_layer-2 - i][:,j:] return layer_out, skip_out
def GRUStep(name, input_dim, hidden_dim, x_t, h_tm1): processed_input = lib.ops.Dense(name + '.Input', input_dim, 3 * hidden_dim, x_t) gates = T.nnet.sigmoid( lib.ops.Dense(name + '.Recurrent_Gates', hidden_dim, 2 * hidden_dim, h_tm1, bias=False) + processed_input[:, :2 * hidden_dim]) update = gates[:, :hidden_dim] reset = gates[:, hidden_dim:] scaled_hidden = reset * h_tm1 candidate = T.tanh( lib.ops.Dense(name + '.Recurrent_Candidate', hidden_dim, hidden_dim, scaled_hidden, bias=False, init='orthogonal') + processed_input[:, 2 * hidden_dim:]) one = lib.floatX(1.0) return (update * candidate) + ((one - update) * h_tm1)
def GRUStep(name, input_dim, hidden_dim, current_input, last_hidden): processed_input = lib.ops.Linear( name+'.Input', input_dim, 3 * hidden_dim, current_input ) gates = T.nnet.sigmoid( lib.ops.Linear( name+'.Recurrent_Gates', hidden_dim, 2 * hidden_dim, last_hidden, biases=False ) + processed_input[:, :2*hidden_dim] ) update = gates[:, :hidden_dim] reset = gates[:, hidden_dim:] scaled_hidden = reset * last_hidden candidate = T.tanh( lib.ops.Linear( name+'.Recurrent_Candidate', hidden_dim, hidden_dim, scaled_hidden, biases=False ) + processed_input[:, 2*hidden_dim:] ) one = lib.floatX(1.0) return (update * candidate) + ((one - update) * last_hidden)
def frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ if N_GRUS != 3: raise Exception('N_GRUS must be 3, at least for now') learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape( (input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE)) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS / 2)) - lib.floatX(1) frames *= lib.floatX(2) gru1 = lib.ops.LowMemGRU('FrameLevel.GRU1', FRAME_SIZE, DIM, frames, h0=h0[:, 0]) gru2 = lib.ops.LowMemGRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.LowMemGRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) output = lib.ops.Linear('FrameLevel.Output', DIM, FRAME_SIZE * DIM, gru3, initialization='he') output = output.reshape( (output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1) return (output, last_hidden)
def gaussian_nll(x, mus, sigmas): """ NLL for Multivariate Normal with diagonal covariance matrix See: wikipedia.org/wiki/Multivariate_normal_distribution#Likelihood_function where \Sigma = diag(s_1^2,..., s_n^2). x, mus, sigmas all should have the same shape. sigmas (s_1,..., s_n) should be strictly positive. Results in output shape of similar but without the last dimension. """ nll = lib.floatX(numpy.log(2. * numpy.pi)) nll += 2. * T.log(sigmas) nll += ((x - mus) / sigmas) ** 2. nll = nll.sum(axis=-1) nll *= lib.floatX(0.5) return nll
def gaussian_nll(x, mus, sigmas): """ NLL for Multivariate Normal with diagonal covariance matrix See: wikipedia.org/wiki/Multivariate_normal_distribution#Likelihood_function where \Sigma = diag(s_1^2,..., s_n^2). x, mus, sigmas all should have the same shape. sigmas (s_1,..., s_n) should be strictly positive. Results in output shape of similar but without the last dimension. """ nll = lib.floatX(numpy.log(2. * numpy.pi)) nll += 2. * T.log(sigmas) nll += ((x - mus) / sigmas)**2. nll = nll.sum(axis=-1) nll *= lib.floatX(0.5) return nll
def DilatedConv1D(name, input_dim, output_dim, filter_size, inputs, dilation, mask_type=None, apply_biases=True): """ inputs.shape: (batch size, length, input_dim) mask_type: None, 'a', 'b' output.shape: (batch size, length, output_dim) """ def uniform(stdev, size): """uniform distribution with the given stdev and size""" return numpy.random.uniform(low=-stdev * numpy.sqrt(3), high=stdev * numpy.sqrt(3), size=size).astype(theano.config.floatX) filters_init = uniform( 1. / numpy.sqrt(input_dim * filter_size), # output dim, input dim, height, width (output_dim, input_dim, filter_size, 1)) if mask_type is not None: filters_init *= lib.floatX(numpy.sqrt(2.)) filters = lib.param(name + '.Filters', filters_init) if mask_type is not None: mask = numpy.ones((output_dim, input_dim, filter_size, 1), dtype=theano.config.floatX) center = filter_size // 2 for i in xrange(filter_size): if (i > center): mask[:, :, i, :] = 0. # if (mask_type=='a' and i == center): # mask[:, :, center] = 0. filters = filters * mask inputs = inputs.reshape( (inputs.shape[0], inputs.shape[1], 1, inputs.shape[2])) # conv2d takes inputs as (batch size, input channels, height[?], width[?]) inputs = inputs.dimshuffle(0, 3, 1, 2) result = T.nnet.conv2d(inputs, filters, border_mode='half', filter_flip=False, filter_dilation=(dilation, 1)) if apply_biases: biases = lib.param(name + '.Biases', numpy.zeros(output_dim, dtype=theano.config.floatX)) result = result + biases[None, :, None, None] result = result.dimshuffle(0, 2, 3, 1) return result.reshape((result.shape[0], result.shape[1], result.shape[3]))
def __GRUStep(name, input_dim, hidden_dim, current_input, last_hidden, weightnorm=True): """ CAUTION: Not for stand-alone usage. It is defined here (instead of inside VanillaRNN function) to not clutter the code. Note: No 'Output' gate. 'Input' and 'Forget' gates coupled by an update gate z and the reset gate r is applied directly to the previous hidden state. Thus, the responsibility of the reset gate in a LSTM is really split up into both r and z. Gates: z = sigm(X_t*U^z + S_{t-1}*W^z) r = sigm(X_t*U^r + S_{t-1}*W^r) Candidate: h = tanh(X_t*U^h + (S_{t-1}.r)*W^h) S_t = (1 - z).h + z.S_{t-1} """ # x_t*(U^z, U^r, U^h) # Also contains biases processed_input = lib.ops.Linear(name + '.Input', input_dim, 3 * hidden_dim, current_input, weightnorm=weightnorm) gates = T.nnet.sigmoid( lib.ops.Linear(name + '.Recurrent_Gates', hidden_dim, 2 * hidden_dim, last_hidden, biases=False, weightnorm=weightnorm) + processed_input[:, :2 * hidden_dim]) update = gates[:, :hidden_dim] reset = gates[:, hidden_dim:] scaled_hidden = reset * last_hidden candidate = T.tanh( lib.ops.Linear(name + '.Recurrent_Candidate', hidden_dim, hidden_dim, scaled_hidden, biases=False, initialization='orthogonal', weightnorm=weightnorm) + processed_input[:, 2 * hidden_dim:]) one = lib.floatX(1.0) return (update * candidate) + ((one - update) * last_hidden)
def frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ if N_GRUS != 3: raise Exception('N_GRUS must be 3, at least for now') learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) frames *= lib.floatX(2) gru1 = lib.ops.LowMemGRU('FrameLevel.GRU1', FRAME_SIZE, DIM, frames, h0=h0[:, 0]) gru2 = lib.ops.LowMemGRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.LowMemGRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) output = lib.ops.Linear( 'FrameLevel.Output', DIM, FRAME_SIZE * DIM, gru3, initialization='he' ) output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1) return (output, last_hidden)
def Enc2(latents): latents = T.clip(latents, lib.floatX(-50), lib.floatX(50)) output = latents output = T.nnet.relu(lib.ops.conv2d.Conv2D('Enc2.1', input_dim=LATENT_DIM_1, output_dim=DIM_3, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Enc2.2', input_dim=DIM_3, output_dim=DIM_4, filter_size=3, inputs=output, stride=2)) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Enc2.3', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Enc2.4', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, inputs=output)) output = output.reshape((output.shape[0], 4*4*DIM_4)) output = T.nnet.relu(lib.ops.linear.Linear('Enc2.5', input_dim=4*4*DIM_4, output_dim=DIM_5, initialization='glorot_he', inputs=output)) output = T.nnet.relu(lib.ops.linear.Linear('Enc2.6', input_dim=DIM_5, output_dim=DIM_5, initialization='glorot_he', inputs=output)) output = lib.ops.linear.Linear('Enc2.7', input_dim=DIM_5, output_dim=2*LATENT_DIM_2, inputs=output) return output
def __init__(self, input_layer, num_input, num_cells,batch_size = 8, name="", go_backwards=False, return_sequences = False): """ LSTM Layer """ self.name = name self.num_input = num_input self.num_cells = num_cells self.return_sequences = return_sequences self.X = input_layer.output() self.h0 = theano.shared(floatX(np.zeros(num_cells,))) self.s0 = theano.shared(floatX(np.zeros(num_cells,))) self.go_backwards = go_backwards W_bound_sx = np.sqrt(6. / (num_input + num_cells)) rng = np.random.RandomState(23456) self.W_gx = theano.shared(np.asarray(rng.uniform(low=-W_bound_sx, high=W_bound_sx, size=(num_input,num_cells)), dtype=theano.config.floatX), name = name + " W_gx",borrow=True) self.W_ix = theano.shared(np.asarray(rng.uniform(low=-W_bound_sx, high=W_bound_sx, size=(num_input,num_cells)), dtype=theano.config.floatX), name = name + " W_ix",borrow=True) self.W_fx = theano.shared(np.asarray(rng.uniform(low=-W_bound_sx, high=W_bound_sx, size=(num_input,num_cells)), dtype=theano.config.floatX), name = name + " W_fx",borrow=True) self.W_ox = theano.shared(np.asarray(rng.uniform(low=-W_bound_sx, high=W_bound_sx, size=(num_input,num_cells)), dtype=theano.config.floatX), name = name + " W_ox",borrow=True) W_bound_sh = np.sqrt(6. / (num_cells + num_cells)) self.W_gh = theano.shared(np.asarray(rng.uniform(low=-W_bound_sh, high=W_bound_sh, size=(num_cells,num_cells)), dtype=theano.config.floatX), name = name + " W_gh",borrow=True) self.W_ih = theano.shared(np.asarray(rng.uniform(low=-W_bound_sh, high=W_bound_sh, size=(num_cells,num_cells)), dtype=theano.config.floatX), name = name + " W_ih",borrow=True) self.W_fh = theano.shared(np.asarray(rng.uniform(low=-W_bound_sh, high=W_bound_sh, size=(num_cells,num_cells)), dtype=theano.config.floatX), name = name + " W_fh",borrow=True) self.W_oh = theano.shared(np.asarray(rng.uniform(low=-W_bound_sh, high=W_bound_sh, size=(num_cells,num_cells)), dtype=theano.config.floatX), name = name + " W_oh",borrow=True) self.b_g = random_weights((num_cells,), name=self.name+" b_g") self.b_i = random_weights((num_cells,), name=self.name+" b_i") self.b_f = random_weights((num_cells,), name=self.name+" b_f") self.b_o = random_weights((num_cells,), name=self.name+" b_o") self.params = [self.W_gx, self.W_ix, self.W_ox, self.W_fx, self.W_gh, self.W_ih, self.W_oh, self.W_fh, ] self.bias = [self.b_g, self.b_i, self.b_f, self.b_o]
def create_model(inp): out = (inp.astype(theano.config.floatX)/lib.floatX(Q_LEVELS-1) - lib.floatX(0.5)) l_out = out.dimshuffle(0,1,'x') skips = [] for i in range(args.wavenet_blocks): l_out, skip_out = create_wavenet_block(l_out, args.dilation_layers_per_block, 1 if i == 0 else args.dim, args.dim, name = "block_{}".format(i+1)) skips.append(skip_out) out = skips[-1] for i in range(args.wavenet_blocks - 1): out = out + skips[args.wavenet_blocks - 2 - i][:,(2**args.dilation_layers_per_block - 1)*(i+1):] for i in range(3): out = lib.ops.conv1d("out_{}".format(i+1), out, args.dim, args.dim, 1, non_linearity='relu') out = lib.ops.conv1d("final", out, args.dim, args.q_levels, 1, non_linearity='identity') return out
def __init__(self, num_input, num_cells, input_layers=None, name="", go_backwards=False): """ LSTM Layer Takes as input sequence of inputs, returns sequence of outputs """ self.name = name self.num_input = num_input self.num_cells = num_cells if len(input_layers) >= 2: self.X = T.concatenate([input_layer.output() for input_layer in input_layers], axis=1) else: self.X = input_layers[0].output() self.h0 = theano.shared(floatX(np.zeros(num_cells))) self.s0 = theano.shared(floatX(np.zeros(num_cells))) self.go_backwards = go_backwards self.W_gx = random_weights((num_input, num_cells), name=self.name+"W_gx") self.W_ix = random_weights((num_input, num_cells), name=self.name+"W_ix") self.W_fx = random_weights((num_input, num_cells), name=self.name+"W_fx") self.W_ox = random_weights((num_input, num_cells), name=self.name+"W_ox") self.W_gh = random_weights((num_cells, num_cells), name=self.name+"W_gh") self.W_ih = random_weights((num_cells, num_cells), name=self.name+"W_ih") self.W_fh = random_weights((num_cells, num_cells), name=self.name+"W_fh") self.W_oh = random_weights((num_cells, num_cells), name=self.name+"W_oh") self.b_g = zeros(num_cells, name=self.name+"b_g") self.b_i = zeros(num_cells, name=self.name+"b_i") self.b_f = zeros(num_cells, name=self.name+"b_f") self.b_o = zeros(num_cells, name=self.name+"b_o") self.params = [self.W_gx, self.W_ix, self.W_ox, self.W_fx, self.W_gh, self.W_ih, self.W_oh, self.W_fh, self.b_g, self.b_i, self.b_f, self.b_o, ] self.output()
def Dec1(latents, images): latents = T.clip(latents, lib.floatX(-50), lib.floatX(50)) output = latents # output = leakyrelu(lib.ops.deconv2d.Deconv2D('Dec1.A', input_dim=LATENT_DIM_1, output_dim=DIM_2, filter_size=3, inputs=output)) # output = leakyrelu(lib.ops.deconv2d.Deconv2D('Dec1.B', input_dim=DIM_2, output_dim=DIM_1, filter_size=3, inputs=output)) output = leakyrelu(lib.ops.conv2d.Conv2D('Dec1.1', input_dim=LATENT_DIM_1, output_dim=DIM_3, filter_size=3, inputs=output)) # output = leakyrelu(lib.ops.conv2d.Conv2D('Dec1.2', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, inputs=output)) output = leakyrelu(lib.ops.deconv2d.Deconv2D('Dec1.3', input_dim=DIM_3, output_dim=DIM_2, filter_size=3, inputs=output)) output = leakyrelu(lib.ops.conv2d.Conv2D( 'Dec1.4', input_dim=DIM_2, output_dim=DIM_2, filter_size=3, inputs=output)) output = leakyrelu(lib.ops.deconv2d.Deconv2D('Dec1.5', input_dim=DIM_2, output_dim=DIM_1, filter_size=3, inputs=output)) output = leakyrelu(lib.ops.conv2d.Conv2D( 'Dec1.6', input_dim=DIM_1, output_dim=DIM_1, filter_size=3, inputs=output)) output = lib.ops.conv2d.Conv2D('Dec1.Out', input_dim=DIM_1, output_dim=256*N_CHANNELS, filter_size=1, inputs=output, mask_type=('b', N_CHANNELS), he_init=False) # images = ((T.cast(images, 'float32') / 128) - 1) * 5 # masked_images = leakyrelu(lib.ops.conv2d.Conv2D( # 'Dec1.Pix1', # input_dim=N_CHANNELS, # output_dim=DIM_1, # filter_size=5, # inputs=images, # mask_type=('a', N_CHANNELS) # )) # output = T.concatenate([masked_images, output], axis=1) # output = leakyrelu(lib.ops.conv2d.Conv2D('Dec1.Pix2', input_dim=2*DIM_1, output_dim=DIM_PIX_1, filter_size=5, inputs=output, mask_type=('b', N_CHANNELS))) # output = leakyrelu(lib.ops.conv2d.Conv2D('Dec1.Pix3', input_dim=DIM_PIX_1, output_dim=DIM_PIX_1, filter_size=5, inputs=output, mask_type=('b', N_CHANNELS))) # output = leakyrelu(lib.ops.conv2d.Conv2D('Dec1.Pix4', input_dim=DIM_PIX_1, output_dim=DIM_PIX_1, filter_size=5, inputs=output, mask_type=('b', N_CHANNELS))) # output = leakyrelu(lib.ops.conv2d.Conv2D('Dec1.Pix5', input_dim=DIM_PIX_1, output_dim=DIM_PIX_1, filter_size=1, inputs=output, mask_type=('b', N_CHANNELS))) # output = lib.ops.conv2d.Conv2D('Dec1.Out', input_dim=DIM_PIX_1, output_dim=256*N_CHANNELS, filter_size=1, inputs=output, mask_type=('b', N_CHANNELS), he_init=False) return output.reshape((-1, 256, N_CHANNELS, HEIGHT, WIDTH)).dimshuffle(0,2,3,4,1)
def recurrent_fn(x_t, h_tm1, hidden_dim, W1, b1, W2, b2): A1 = T.nnet.sigmoid(T.dot(T.concatenate((x_t, h_tm1), axis=1), W1) + b1) z = A1[:, :hidden_dim] r = A1[:, hidden_dim:] scaled_hidden = r * h_tm1 h = T.tanh(T.dot(T.concatenate((scaled_hidden, x_t), axis=1), W2) + b2) one = lib.floatX(1.0) return ((z * h) + ((one - z) * h_tm1)).astype('float32')
def recurrent_fn(x_t, h_tm1,hidden_dim,W1,b1,W2,b2): A1 = T.nnet.sigmoid(T.dot(T.concatenate((x_t,h_tm1),axis=1),W1) + b1) z = A1[:,:hidden_dim] r = A1[:,hidden_dim:] scaled_hidden = r*h_tm1 h = T.tanh(T.dot(T.concatenate((scaled_hidden,x_t),axis=1),W2)+b2) one = lib.floatX(1.0) return ((z * h) + ((one - z) * h_tm1)).astype('float32')
def Prior(latents): latents = T.clip(latents, lib.floatX(-50), lib.floatX(50)) output = latents skips = [] output = leakyrelu(lib.ops.conv2d.Conv2D('Prior.Pix1', input_dim=LATENT_DIM_1, output_dim=DIM_PIX_2, filter_size=5, inputs=output, mask_type=('a', LATENT_BLOCKS))) output = leakyrelu(lib.ops.conv2d.Conv2D('Prior.Pix2', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=5, inputs=output, mask_type=('b', LATENT_BLOCKS))) skips.append(output) output = leakyrelu(lib.ops.conv2d.Conv2D('Prior.Pix3', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=5, inputs=output, mask_type=('b', LATENT_BLOCKS))) output = leakyrelu(lib.ops.conv2d.Conv2D('Prior.Pix4', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=5, inputs=output, mask_type=('b', LATENT_BLOCKS))) skips.append(output) output = leakyrelu(lib.ops.conv2d.Conv2D('Prior.Pix5', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=1, inputs=output, mask_type=('b', LATENT_BLOCKS))) output = leakyrelu(lib.ops.conv2d.Conv2D('Prior.Pix6', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=1, inputs=output, mask_type=('b', LATENT_BLOCKS))) skips.append(output) output = T.concatenate(skips, axis=1) output = lib.ops.conv2d.Conv2D('Prior.Out', input_dim=len(skips)*DIM_PIX_2, output_dim=2*LATENT_DIM_1, filter_size=1, inputs=output, mask_type=('b', LATENT_BLOCKS), he_init=False) return output.reshape((-1, 2, LATENT_DIM_1, 8, 8)).dimshuffle(0,2,1,3,4).reshape((-1, 2*LATENT_DIM_1, 8, 8))
def __init__(self, num_input, num_cells, input_layer=None, name=""): """ LSTM Layer Takes as input sequence of inputs, returns sequence of outputs Currently takes only one input layer """ self.name = name self.num_input = num_input self.num_cells = num_cells #Setting the X as the input layer self.X = input_layer.output() self.h0 = theano.shared(floatX(np.zeros((1, num_cells)))) self.s0 = theano.shared(floatX(np.zeros((1, num_cells)))) #Initializing the weights self.W_gx = random_weights((num_input, num_cells), name=self.name+"W_gx") self.W_ix = random_weights((num_input, num_cells), name=self.name+"W_ix") self.W_fx = random_weights((num_input, num_cells), name=self.name+"W_fx") self.W_ox = random_weights((num_input, num_cells), name=self.name+"W_ox") self.W_gh = random_weights((num_cells, num_cells), name=self.name+"W_gh") self.W_ih = random_weights((num_cells, num_cells), name=self.name+"W_ih") self.W_fh = random_weights((num_cells, num_cells), name=self.name+"W_fh") self.W_oh = random_weights((num_cells, num_cells), name=self.name+"W_oh") self.b_g = zeros(num_cells, name=self.name+"b_g") self.b_i = zeros(num_cells, name=self.name+"b_i") self.b_f = zeros(num_cells, name=self.name+"b_f") self.b_o = zeros(num_cells, name=self.name+"b_o") self.params = [self.W_gx, self.W_ix, self.W_ox, self.W_fx, self.W_gh, self.W_ih, self.W_oh, self.W_fh, self.b_g, self.b_i, self.b_f, self.b_o,] self.output()
def Adam(cost, params, lr=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8,gradClip=True,value=1.): gparams = [] iter = 1 for param in params: gparam = T.grad(cost,param) if gradClip: gparam = T.clip(gparam,lib.floatX(-value), lib.floatX(value)) gparams.append(gparam) print str(iter) + " completed" iter += 1 updates = [] for p, g in zip(params, gparams): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_new = beta1 * m + (1 - beta1) * g v_new = beta2 * v + (1 - beta2) * (g ** 2) gradient_scaling = T.sqrt(v_new + epsilon) updates.append((m, m_new)) updates.append((v, v_new)) updates.append((p, p - lr * m / gradient_scaling)) return updates
def Dec2(latents, targets): latents = T.clip(latents, lib.floatX(-50), lib.floatX(50)) output = latents output = T.nnet.relu(lib.ops.linear.Linear('Dec2.1', input_dim=LATENT_DIM_2, output_dim=DIM_5, initialization='glorot_he', inputs=output)) output = T.nnet.relu(lib.ops.linear.Linear('Dec2.2', input_dim=DIM_5, output_dim=DIM_5, initialization='glorot_he', inputs=output)) output = T.nnet.relu(lib.ops.linear.Linear('Dec2.3', input_dim=DIM_5, output_dim=4*4*DIM_4, initialization='glorot_he', inputs=output)) output = output.reshape((output.shape[0], DIM_4, 4, 4)) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.4', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.5', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.deconv2d.Deconv2D('Dec2.6', input_dim=DIM_4, output_dim=DIM_3, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.conv2d.Conv2D( 'Dec2.7', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, inputs=output)) masked_targets = T.nnet.relu(lib.ops.conv2d.Conv2D( 'Dec2.Pix1', input_dim=LATENT_DIM_1, output_dim=DIM_3, filter_size=5, inputs=targets, mask_type=('a', 1) )) output = T.concatenate([masked_targets, output], axis=1) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.Pix3', input_dim=2*DIM_3, output_dim=DIM_PIX_2, filter_size=3, inputs=output, mask_type=('b', 1))) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.Pix4', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=3, inputs=output, mask_type=('b', 1))) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.Pix7', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=1, inputs=output, mask_type=('b', 1))) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec2.Pix8', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=1, inputs=output, mask_type=('b', 1))) output = lib.ops.conv2d.Conv2D('Dec2.Out', input_dim=DIM_PIX_2, output_dim=2*LATENT_DIM_1, filter_size=1, inputs=output, mask_type=('b', 1), he_init=False) return output
def Dec1(latents, images): latents = T.clip(latents, lib.floatX(-50), lib.floatX(50)) output = latents output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec1.1', input_dim=LATENT_DIM_1, output_dim=DIM_3, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec1.2', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.deconv2d.Deconv2D('Dec1.3', input_dim=DIM_3, output_dim=DIM_2, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.conv2d.Conv2D( 'Dec1.4', input_dim=DIM_2, output_dim=DIM_2, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.deconv2d.Deconv2D('Dec1.5', input_dim=DIM_2, output_dim=DIM_1, filter_size=3, inputs=output)) output = T.nnet.relu(lib.ops.conv2d.Conv2D( 'Dec1.6', input_dim=DIM_1, output_dim=DIM_1, filter_size=3, inputs=output)) images = ((T.cast(images, 'float32') / 128) - 1) * 5 masked_images = T.nnet.relu(lib.ops.conv2d.Conv2D( 'Dec1.Pix1', input_dim=N_CHANNELS, output_dim=DIM_1, filter_size=7, inputs=images, mask_type=('a', N_CHANNELS) )) # Warning! Because of the masked convolutions it's very important that masked_images comes first in this concat output = T.concatenate([masked_images, output], axis=1) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec1.Pix3', input_dim=2*DIM_1, output_dim=DIM_PIX_1, filter_size=3, inputs=output, mask_type=('b', N_CHANNELS))) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec1.Pix4', input_dim=DIM_PIX_1, output_dim=DIM_PIX_1, filter_size=3, inputs=output, mask_type=('b', N_CHANNELS))) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec1.Pix5', input_dim=DIM_PIX_1, output_dim=DIM_PIX_1, filter_size=1, inputs=output, mask_type=('b', N_CHANNELS))) output = T.nnet.relu(lib.ops.conv2d.Conv2D('Dec1.Pix6', input_dim=DIM_PIX_1, output_dim=DIM_PIX_1, filter_size=1, inputs=output, mask_type=('b', N_CHANNELS))) output = lib.ops.conv2d.Conv2D('Dec1.Out', input_dim=DIM_PIX_1, output_dim=256*N_CHANNELS, filter_size=1, inputs=output, mask_type=('b', N_CHANNELS), he_init=False) return output.reshape((-1, 256, N_CHANNELS, HEIGHT, WIDTH)).dimshuffle(0,2,3,4,1)
def frame_level_rnn(input_sequences, h0): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) h0.shape: (batch size, N_GRUS, DIM) output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE )) # Rescale prev_frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2.)) - lib.floatX(1) frames *= lib.floatX(2) if N_GRUS != 3: raise Exception('N_GRUS must be 3, at least for now') gru1 = lib.ops.LowMemGRU('FrameLevel.GRU1', FRAME_SIZE, DIM, frames, h0=h0[:, 0]) gru2 = lib.ops.LowMemGRU('FrameLevel.GRU2', DIM, DIM, gru1, h0=h0[:, 1]) gru3 = lib.ops.LowMemGRU('FrameLevel.GRU3', DIM, DIM, gru2, h0=h0[:, 2]) output = lib.ops.Linear( 'FrameLevel.Output', DIM, FRAME_SIZE * DIM, gru3, initialization='he' ) output = output.reshape((output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru1[:, -1], gru2[:, -1], gru3[:, -1]], axis=1) return (output, last_hidden)
def create_model(inp): out = (inp.astype(theano.config.floatX) / lib.floatX(Q_LEVELS - 1) - lib.floatX(0.5)) l_out = out.dimshuffle(0, 1, 'x') skips = [] for i in range(args.wavenet_blocks): l_out, skip_out = create_wavenet_block(l_out, args.dilation_layers_per_block, 1 if i == 0 else args.dim, args.dim, name="block_{}".format(i + 1)) skips.append(skip_out) out = skips[-1] for i in range(args.wavenet_blocks - 1): out = out + skips[args.wavenet_blocks - 2 - i][:, (2**args.dilation_layers_per_block - 1) * (i + 1):] for i in range(3): out = lib.ops.conv1d("out_{}".format(i + 1), out, args.dim, args.dim, 1, non_linearity='relu') out = lib.ops.conv1d("final", out, args.dim, args.q_levels, 1, non_linearity='identity') return out
def rmsprop(loss_or_grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6, sign='-', clip=False, clip_val=1.): grads = loss_or_grads from collections import OrderedDict updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = T.constant(1) for param, grad in zip(params, grads): value = param.get_value(borrow=True) accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) accu_new = rho * accu + (one - rho) * grad ** 2 updates[accu] = accu_new if sign=='-': updated_param = param - (learning_rate * grad / T.sqrt(accu_new + epsilon)) else: updated_param = param + (learning_rate * grad / T.sqrt(accu_new + epsilon)) if clip: updated_param = T.clip(updated_param,lib.floatX(-clip_val),lib.floatX(clip_val)) updates[param] = updated_param return updates
def recurrent_fn_hred(x_t, h_tm1, hidden_dim, W1, b1, W2, b2): global DIM #A1 = T.nnet.sigmoid(lib.ops.BatchNorm(T.dot(T.concatenate((x_t,h_tm1),axis=1),W1),name="FrameLevel.GRU"+str(name)+".Input.",length=2*512) + b1) A1 = T.nnet.sigmoid(T.dot(T.concatenate((x_t, h_tm1), axis=1), W1) + b1) z = A1[:, :hidden_dim] r = A1[:, hidden_dim:] scaled_hidden = r * h_tm1 #h = T.tanh(lib.ops.BatchNorm(T.dot(T.concatenate((scaled_hidden,x_t),axis=1),W2),name="FrameLevel.GRU"+str(name)+".Output.",length=512)+b2) h = T.tanh(T.dot(T.concatenate((scaled_hidden, x_t), axis=1), W2) + b2) one = lib.floatX(1.0) return ((z * h) + ((one - z) * h_tm1)).astype('float32')
def recurrent_fn2(x_t, h1_tm1, h2_tm1, h3_tm1, hidden_dim, W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6): A1 = T.nnet.sigmoid(T.dot(T.concatenate((x_t, h1_tm1), axis=1), W1) + b1) z1 = A1[:, :hidden_dim] r1 = A1[:, hidden_dim:] scaled_hidden1 = r1 * h1_tm1 h1 = T.tanh(T.dot(T.concatenate((scaled_hidden1, x_t), axis=1), W2) + b2) one = lib.floatX(1.0) h1_t = (z1 * h1) + ((one - z1) * h1_tm1) ###################################################################### A2 = T.nnet.sigmoid(T.dot(T.concatenate((h1_t, h2_tm1), axis=1), W3) + b3) z2 = A2[:, :hidden_dim] r2 = A2[:, hidden_dim:] scaled_hidden2 = r2 * h2_tm1 h2 = T.tanh(T.dot(T.concatenate((scaled_hidden2, h1_t), axis=1), W4) + b4) h2_t = (z2 * h2) + ((one - z2) * h2_tm1) ######################################################################## A3 = T.nnet.sigmoid(T.dot(T.concatenate((h2_t, h3_tm1), axis=1), W5) + b5) z3 = A3[:, :hidden_dim] r3 = A3[:, hidden_dim:] scaled_hidden3 = r3 * h3_tm1 h3 = T.tanh(T.dot(T.concatenate((scaled_hidden3, h2_t), axis=1), W6) + b6) h3_t = (z3 * h3) + ((one - z3) * h3_tm1) return h1_t, h2_t, h3_t
def load_model_params(self, filename): f = gzip.open(filename, 'rb') to_load = cPickle.load(f) assert(to_load['num_input'] == self.num_input) assert(to_load['num_output'] == self.num_output) saved_nb_hidden = to_load['num_hidden'] try: len(saved_nb_hidden) except: assert(np.all([ saved_nb_hidden == h for h in self.num_hidden ])) else: assert(len(saved_nb_hidden) == len(self.num_hidden)) assert(np.all([ h1 == h2 for h1,h2 in zip(saved_nb_hidden, self.num_hidden) ])) for l in self.layers: for p in l.get_params(): p.set_value(floatX(to_load[p.name]))
def recurrent_fn2(x_t, h1_tm1, h2_tm1, h3_tm1,hidden_dim,W1,b1,W2,b2,W3,b3,W4,b4,W5,b5,W6,b6): A1 = T.nnet.sigmoid(T.dot(T.concatenate((x_t,h1_tm1),axis=1),W1) + b1) z1 = A1[:,:hidden_dim] r1 = A1[:,hidden_dim:] scaled_hidden1 = r1*h1_tm1 h1 = T.tanh(T.dot(T.concatenate((scaled_hidden1,x_t),axis=1),W2)+b2) one = lib.floatX(1.0) h1_t = (z1 * h1) + ((one - z1) * h1_tm1) ###################################################################### A2 = T.nnet.sigmoid(T.dot(T.concatenate((h1_t,h2_tm1),axis=1),W3) + b3) z2 = A2[:,:hidden_dim] r2 = A2[:,hidden_dim:] scaled_hidden2 = r2*h2_tm1 h2 = T.tanh(T.dot(T.concatenate((scaled_hidden2,h1_t),axis=1),W4)+b4) h2_t = (z2 * h2) + ((one - z2) * h2_tm1) ######################################################################## A3 = T.nnet.sigmoid(T.dot(T.concatenate((h2_t,h3_tm1),axis=1),W5) + b5) z3 = A3[:,:hidden_dim] r3 = A3[:,hidden_dim:] scaled_hidden3 = r3*h3_tm1 h3 = T.tanh(T.dot(T.concatenate((scaled_hidden3,h2_t),axis=1),W6)+b6) h3_t = (z3 * h3) + ((one - z3) * h3_tm1) return h1_t,h2_t,h3_t