def get_train_output_for(self, inputX, inputy=None): self.W = init.GlorotNormal().sample( (inputX.shape[1], self.hidden_unit)) self.b = init.Normal().sample(self.hidden_unit) H = dotbiasact_decomp(inputX, self.W, self.b) self.beta = compute_beta(H, inputy, self.C) out = dot_decomp(H, self.beta) return out
def fit(self, inputX, inputy): n_hidden = int(self.n_times * inputX.shape[1]) inputy = myUtils.load.one_hot(inputy, len(np.unique(inputy))) self.W = init.GlorotNormal().sample((inputX.shape[1], n_hidden)) self.b = init.Normal().sample(n_hidden) H = np.dot(inputX, self.W) + self.b H = relu(H) self.beta = compute_beta(H, inputy, self.C) return self
def get_train_output_for(self, inputX, inputy=None): inputX = self.pca.fit_transform(inputX) n_hidden = int(self.n_times * inputX.shape[1]) self.W = init.GlorotNormal().sample((inputX.shape[1], n_hidden)) self.b = init.Normal().sample(n_hidden) H = dotbiasact_decomp(inputX, self.W, self.b) self.beta = compute_beta(H, inputy, self.C) out = dot_decomp(H, self.beta) return out
def get_train_output(self, inputX, inputy): self.W = init.GlorotNormal().sample( (inputX.shape[1], self.hidden_unit)) self.b = init.Normal().sample(self.hidden_unit) H = np.dot(inputX, self.W) + self.b H = relu(H) self.beta = compute_beta(H, inputy, self.C) out = np.dot(H, self.beta) return out
def fit_transform(self, X, y): y = myUtils.load.one_hot(y, len(np.unique(y))) self.W = init.GlorotNormal().sample((X.shape[1], self.n_hidden)) self.b = init.Normal().sample(self.n_hidden) H = np.dot(X, self.W) + self.b H = relu(H) self.beta = compute_beta(H, y, self.C) out = np.dot(H, self.beta) return out
def get_train_output_for(self, inputX, inputy=None): n_hidden = int(self.n_times * inputX.shape[1]) self.W = init.GlorotNormal().sample((inputX.shape[1], n_hidden)) self.b = init.Normal().sample(n_hidden) H = np.dot(inputX, self.W) + self.b H = relu(H) self.beta = compute_beta_val(H, inputy, 3) out = np.dot(H, self.beta) return out
def __init__(self, incoming, num_units, peepholes=True, mask_input=None, encoder_input=None, encoder_mask_input=None, **kwargs): super(MatchLSTM, self).__init__(incoming, num_units, peepholes=peepholes, precompute_input=False, mask_input=mask_input, only_return_final=True, **kwargs) # encoder mask self.encoder_input_incoming_index = -1 self.encoder_mask_incoming_index = -1 if encoder_mask_input is not None: self.input_layers.append(encoder_mask_input) self.input_shapes.append(encoder_mask_input.output_shape) self.encoder_mask_incoming_index = len(self.input_layers) - 1 if encoder_input is not None: self.input_layers.append(encoder_input) encoder_input_output_shape = encoder_input.output_shape self.input_shapes.append(encoder_input_output_shape) self.encoder_input_incoming_index = len(self.input_layers) - 1 # hidden state length should equal to embedding size assert encoder_input_output_shape[-1] == num_units # input features length should equal to embedding size plus hidden state length assert encoder_input_output_shape[ -1] + num_units == self.input_shapes[0][-1] # initializes attention weights self.W_y_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_y_attend') self.W_h_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_h_attend') # doesn't need transpose self.w_attend = self.add_param(init.Normal(0.1), (num_units, 1), 'w_attend') self.W_m_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_m_attend')
def __init__( self, n_words, dim_emb, num_units, n_classes, w_emb=None, dropout=0.2, use_final=False, lr=0.001, pretrain=None, ): self.n_words = n_words self.dim_emb = dim_emb self.num_units = num_units self.n_classes = n_classes self.lr = lr if w_emb is None: w_emb = init.Normal() self.l_x = layers.InputLayer((None, None)) self.l_m = layers.InputLayer((None, None)) self.l_emb = layers.EmbeddingLayer(self.l_x, n_words, dim_emb, W=w_emb) self.l_ebd = self.l_emb if dropout: self.l_emb = layers.dropout(self.l_emb, dropout) if use_final: self.l_enc = layers.LSTMLayer(self.l_emb, num_units, mask_input=self.l_m, only_return_final=True, grad_clipping=10.0, gradient_steps=400) self.l_rnn = self.l_enc else: self.l_enc = layers.LSTMLayer(self.l_emb, num_units, mask_input=self.l_m, only_return_final=False, grad_clipping=10.0, gradient_steps=400) self.l_rnn = self.l_enc self.l_enc = MeanLayer(self.l_enc, self.l_m) if dropout: self.l_enc = layers.dropout(self.l_enc, dropout) self.l_y = layers.DenseLayer(self.l_enc, n_classes, nonlinearity=nonlinearities.softmax) if pretrain: self.load_pretrain(pretrain)
def pvFWRF(__mst_data, nf, nv, nt, add_bias=True): ''' Create a symbolic lasagne network for the per voxel candidate case. returns a symbolic outpuy of shape (bn, bv, bt). ''' _input = L.InputLayer((None, nf, nv, nt), input_var=__mst_data.reshape((-1, nf, nv, nt))) ## try to add a parametrized local nonlinearity layer. if add_bias: _pred = pvFWRFLayer(_input, W=I.Normal(0.02), b=I.Constant(0.), nonlinearity=None) else: _pred = pvFWRFLayer(_input, W=I.Normal(0.02), b=None, nonlinearity=None) return _pred
def net_lenet5(input_shape, nclass): input_x, target_y, Winit = T.tensor4("input"), T.vector( "target", dtype='int32'), init.Normal() net = ll.InputLayer(input_shape, input_x) net = layers.Conv2DVarDropOutARD(net, 20, 5, W=init.Normal()) net = MaxPool2DLayer(net, 2) net = layers.Conv2DVarDropOutARD(net, 50, 5, W=init.Normal()) net = MaxPool2DLayer(net, 2) net = layers.DenseVarDropOutARD(net, 500, W=init.Normal()) net = layers.DenseVarDropOutARD(net, nclass, W=init.Normal(), nonlinearity=nl.softmax) return net, input_x, target_y, 1
def __init__(self, W_in=init.Normal(0.1), W_hid=init.Normal(0.1), W_cell=init.Normal(0.1), W_tid=init.Normal(0.1), b=init.Constant(0.), nonlinearity=nonlinearities.sigmoid): self.W_in = W_in self.W_hid = W_hid self.W_tid = W_tid # Don't store a cell weight vector when cell is None if W_cell is not None: self.W_cell = W_cell self.b = b # For the nonlinearity, if None is supplied, use identity if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity
def train(env, policy, policy_init, num_episodes, episode_cap, horizon, **alg_args): # Getting the environment env_class = rllab_env_from_name(env) env = normalize(env_class()) # Policy initialization if policy_init == 'zeros': initializer = LI.Constant(0) elif policy_init == 'normal': initializer = LI.Normal() else: raise Exception('Unrecognized policy initialization.') # Creating the policy if policy == 'linear': obs_dim = env.observation_space.flat_dim action_dim = env.action_space.flat_dim mean_network = MLP( input_shape=(obs_dim,), output_dim=action_dim, hidden_sizes=tuple(), hidden_nonlinearity=NL.tanh, output_nonlinearity=None, output_b_init=None, output_W_init=initializer, ) policy = GaussianMLPPolicy( env_spec=env.spec, # The neural network policy should have two hidden layers, each with 32 hidden units. hidden_sizes=tuple(), mean_network=mean_network, log_weights=True, ) else: raise Exception('NOT IMPLEMENTED.') #Â Creating baseline baseline = LinearFeatureBaseline(env_spec=env.spec) #Â Adding max_episodes constraint. If -1, this is unbounded if episode_cap: alg_args['max_episodes'] = num_episodes #Â Run algorithm algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=horizon * num_episodes, whole_paths=True, max_path_length=horizon, **alg_args ) algo.train()
def net_vgglike(k, input_shape, nclass): input_x, target_y, Winit = T.tensor4("input"), T.vector("target", dtype='int32'), init.Normal() net = ll.InputLayer(input_shape, input_x) net = conv_bn_rectify(net, 64 * k) net = ll.DropoutLayer(net, 0.3) net = conv_bn_rectify(net, 64 * k) net = MaxPool2DLayer(net, 2, 2) net = conv_bn_rectify(net, 128 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 128 * k) net = MaxPool2DLayer(net, 2, 2) net = conv_bn_rectify(net, 256 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 256 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 256 * k) net = MaxPool2DLayer(net, 2, 2) net = conv_bn_rectify(net, 512 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 512 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 512 * k) net = MaxPool2DLayer(net, 2, 2) net = conv_bn_rectify(net, 512 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 512 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 512 * k) net = MaxPool2DLayer(net, 2, 2) net = ll.DenseLayer(net, int(512 * k), W=init.Normal(), nonlinearity=nl.rectify) net = BatchNormLayer(net, epsilon=1e-3) net = ll.NonlinearityLayer(net) net = ll.DropoutLayer(net, 0.5) net = ll.DenseLayer(net, nclass, W=init.Normal(), nonlinearity=nl.softmax) return net, input_x, target_y, k
def IAF_dense_layer(incoming, num_units=200, L=2, num_hids=1, W=init.Normal(0.0001), r=init.Normal(0.0001), b=init.Constant(0.), nonlinearity=nonlinearities.rectify): layer_temp = IAFDenseLayer(incoming, num_units, num_hids, L=L, cond_bias=False) layer = IndexLayer(layer_temp, 0) logdets_layers = IndexLayer(layer_temp, 1) return layer, [ logdets_layers, ]
def conv_bn_rectify(net, num_filters): net = ConvLayer(net, int(num_filters), 3, W=init.Normal(), pad=1, nonlinearity=None) net = BatchNormLayer(net, epsilon=1e-3) net = ll.NonlinearityLayer(net) return net
def svFWRF(__mst_data, nf, nv, nt, add_bias=True): ''' Create a symbolic lasagne network for the shared voxel candidate case. returns a symbolic outpuy of shape (bn, bv, bt). ''' _input = L.InputLayer((None, nf, nt), input_var=__mst_data.reshape((-1, nf, nt))) if add_bias: _pred = svFWRFLayer(_input, nvoxels=nv, W=I.Normal(0.02), b=I.Constant(0.), nonlinearity=None) else: _pred = svFWRFLayer(_input, nvoxels=nv, W=I.Normal(0.02), b=None, nonlinearity=None) return _pred
def conv_bn_rectify(net, num_filters): net = layers.Conv2DVarDropOutARD(net, int(num_filters), 3, W=init.Normal(), pad=1, nonlinearity=nl.linear) net = BatchNormLayer(net, epsilon=1e-3) net = ll.NonlinearityLayer(net) return net
def cls_net(_incoming): _drop1 = L.DropoutLayer(_incoming, p=0.2, rescale=True) _conv1 = batch_norm( conv(_drop1, num_filters=64, filter_size=7, stride=3, pad=0, W=I.Normal(0.02), b=None, nonlinearity=NL.rectify)) _drop2 = L.DropoutLayer(_conv1, p=0.2, rescale=True) _conv2 = batch_norm( conv(_drop2, num_filters=128, filter_size=3, stride=1, pad=0, W=I.Normal(0.02), b=None, nonlinearity=NL.rectify)) _pool2 = L.MaxPool2DLayer(_conv2, pool_size=2) _fc1 = batch_norm( L.DenseLayer(L.FlattenLayer(_pool2, outdim=2), 256, W=I.Normal(0.02), b=None, nonlinearity=NL.rectify)) _fc2 = L.DenseLayer(_fc1, ny, W=I.Normal(0.02), b=None, nonlinearity=NL.sigmoid) _aux = [ tanh(_conv1), tanh(_conv2), tanh(L.DimshuffleLayer(_fc1, (0, 1, 'x', 'x'))), L.DimshuffleLayer(_fc2, (0, 1, 'x', 'x')) ] return _aux, _fc2
def __init__(self, input_sequence, query, num_units, mask_input = None, key_sequence=None, nonlinearity = T.tanh, probs_nonlinearity=T.nnet.softmax, W_enc = init.Normal(), W_dec = init.Normal(), W_out = init.Normal(), **kwargs ): assert len(input_sequence.output_shape)==3,"input_sequence must be a 3-dimensional (batch,time,units)" assert len(query.output_shape) == 2, "query must be a 2-dimensional for single tick (batch,units)" assert mask_input is None or len(mask_input.output_shape)==2,"mask_input must be 2-dimensional (batch,time) or None" assert key_sequence is None or len(key_sequence.output_shape) == 3, "key_sequence must be 3-dimensional " \ "of shape (batch,time,heads,units) or None" #if no key sequence is given, use input_sequence as key key_sequence = key_sequence or input_sequence batch_size,seq_len,key_units = key_sequence.output_shape value_units = input_sequence.output_shape[-1] dec_units = query.output_shape[-1] incomings = [input_sequence, key_sequence, query] if mask_input is not None: incomings.append(mask_input) output_shapes = {'attn':(batch_size,value_units), 'probs':(batch_size,seq_len)} super(AttentionLayer,self).__init__(incomings,output_shapes,**kwargs) self.W_enc = self.add_param(W_enc,(key_units,num_units),name='enc_to_hid') self.W_dec = self.add_param(W_dec,(dec_units,num_units),name='dec_to_hid') self.W_out = self.add_param(W_out,(num_units,1),name='hid_to_logit') self.nonlinearity = nonlinearity self.probs_nonlinearity = probs_nonlinearity
def cond_erg_dec_net(_emb, _cond): _deconv2 = plu.concat_tc(_emb, _cond) _deconv2 = deconv(_deconv2, num_filters=128, filter_size=4, stride=2, crop=1, W=I.Normal(0.02), b=I.Constant(0), nonlinearity=NL.LeakyRectify(0.02)) _deconv1 = plu.concat_tc(_deconv2, _cond) _deconv1 = deconv(_deconv1, num_filters=npc, filter_size=4, stride=2, crop=1, W=I.Normal(0.02), b=I.Constant(0), nonlinearity=None) return _deconv1
def __call__(self, layer, spec, shape, **tags): # case when user uses default init specs if not isinstance(spec, dict): spec = {'mu': spec} # important! # we declare that params we add next # are the ones we need to fit the distribution tags['variational'] = True rho_spec = spec.get('rho', init.Normal(1)) mu_spec = spec.get('mu', init.Normal(1)) rho = layer.add_param(rho_spec, shape, **tags) mean = layer.add_param(mu_spec, shape, **tags) e = layer.acc.srng.normal(shape, std=1) W = mean + T.log1p(T.exp(rho)) * e q_p = self.log_posterior_approx(W, mean, rho) - self.log_prior(W) layer.acc.add_cost(q_p) return W
def __init__(self, incoming, input_size, output_size, W=init.Normal(), **kwargs): super(EmbeddingLayer, self).__init__(incoming, **kwargs) self.input_size = input_size self.output_size = output_size self.W = self.add_param(W, (input_size, output_size), name="W")
def __init__( self, incoming, num_units, W=init.Normal(1), r=init.Normal(0.0001), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, uncoupled_init=0, # >0 --> downscale identity connection by default **kwargs): super(CoupledWNDenseLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity) self.uncoupled_init = uncoupled_init self.num_units = num_units num_inputs = int(np.prod(self.input_shape[1] / 2)) self.W1 = self.add_param(W, (num_inputs, num_units), name="cpds_W1") self.W21 = self.add_param(W, (num_units, num_inputs), name="cpds_W21") self.W22 = self.add_param(W, (num_units, num_inputs), name="cdds_W22") self.r1 = self.add_param(r, (num_units, ), name='cpds_r1') self.r21 = self.add_param(r, (num_inputs, ), name='cpds_r21') self.r22 = self.add_param(r, (num_inputs, ), name='cpds_r22') if b is None: self.b1 = None self.b21 = None self.b22 = None else: self.b1 = self.add_param(b, (num_units, ), name="cpds_b1", regularizable=False) self.b21 = self.add_param(b, (num_inputs, ), name="cpds_b21", regularizable=False) self.b22 = self.add_param(b, (num_inputs, ), name="cpds_b22", regularizable=False)
def __init__(self, incoming, num_freqs, freqs=init.Normal(std=1), log_sigma=init.Constant(0.), **kwargs): super(SmoothedCFLayer, self).__init__(incoming, **kwargs) self.num_freqs = num_freqs assert len(self.input_shape) == 2 in_dim = self.input_shape[1] self.freqs = self.add_param(freqs, (num_freqs, in_dim), name='freqs') self.log_sigma = self.add_param(log_sigma, (), name='log_sigma')
def __init__(self, incoming, vocab_size, word_dimension, word_embedding, non_static=True, **kwargs): super(SentenceEmbeddingLayer, self).__init__(incoming, **kwargs) self.vocab_size = vocab_size self.word_dimension = word_dimension if word_embedding is None: word_embedding = init.Normal() # word_embedding = init.GlorotUniform(gain=1.0) self.W = self.add_param(word_embedding, (vocab_size, word_dimension), name="Words", trainable=non_static, regularizable=non_static)
def __init__(self, incoming, num_units, max_steps, peepholes=False, mask_input=None, **kwargs): """ initialization :param incoming: bidirectional mLSTM for passane :param num_units: :param max_steps: max num steps to generate answer words, can be tensor scalar variable :param peepholes: :param mask_input: passage's length mask :param kwargs: """ super(AnsPointerLayer, self).__init__(incoming, num_units, peepholes=peepholes, precompute_input=False, mask_input=mask_input, only_return_final=False, **kwargs) self.max_steps = max_steps # initializes attention weights input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[2:]) self.V_pointer = self.add_param(init.Normal(0.1), (num_inputs, num_units), 'V_pointer') # doesn't need transpose self.v_pointer = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer') self.W_a_pointer = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_a_pointer') self.b_a_pointer = self.add_param(init.Constant(0.), (1, num_units), 'b_a_pointer') self.c_pointer = self.add_param(init.Constant(0.), (1, 1), 'c_pointer')
def __init__(self, incoming, num_units, Wfc=init.Normal(), nonlinearity=rectify, mnc=False, b=init.Constant(0.), **kwargs): super(DenseLayer, self).__init__(incoming) self.num_units = num_units self.nonlinearity = nonlinearity self.num_inputs = int(np.prod(self.input_shape[1:])) # what is srng? self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.W = self.add_param(Wfc, (self.num_inputs, self.num_units), name="W") # max norm constraint if mnc: self.W = updates.norm_constraint(self.W, mnc) self.b = self.add_param(b, (num_units,), name="b", regularizable=False)
def __init__(self, incoming, n_slots, d_slots, M=init.Normal(), nonlinearity_final=nonlinearities.identity, **kwargs): super(SeparateMemoryLayer, self).__init__(incoming, **kwargs) self.nonlinearity_final = nonlinearity_final self.n_slots = n_slots self.d_slots = d_slots self.M = self.add_param(M, (n_slots, d_slots), name="M") # memory slots
def __init__(self, incoming, num_units, W=init.Normal(0.0001), r=init.Normal(0.0001), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): super(CoupledWNDenseLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity) self.num_units = num_units num_inputs1 = int(self.input_shape[1] / 2) num_inputs2 = self.input_shape[1] - num_inputs1 self.W1 = self.add_param(W, (num_inputs1, num_units), name="cpds_W1") self.W21 = self.add_param(W, (num_units, num_inputs2), name="cpds_W21") self.W22 = self.add_param(W, (num_units, num_inputs2), name="cdds_W22") self.r1 = self.add_param(r, (num_units, ), name='cpds_r1') self.r21 = self.add_param(r, (num_inputs2, ), name='cpds_r21') self.r22 = self.add_param(r, (num_inputs2, ), name='cpds_r22') if b is None: self.b1 = None self.b21 = None self.b22 = None else: self.b1 = self.add_param(b, (num_units, ), name="cpds_b1", regularizable=False) self.b21 = self.add_param(b, (num_inputs2, ), name="cpds_b21", regularizable=False) self.b22 = self.add_param(b, (num_inputs2, ), name="cpds_b22", regularizable=False)
def erg_dec_net(_emb, _cond): if _cond != None: _conv3 = plu.concat_tc(_emb, _cond) else: _conv3 = _emb _deconv1 = batch_norm( deconv(_conv3, num_filters=128, filter_size=3, stride=1, crop=0, W=I.Normal(0.02), b=None, nonlinearity=NL.LeakyRectify(0.01))) # if _cond != None: _deconv1 = plu.concat_tc(_deconv1, _cond) _deconv2 = deconv(_deconv1, num_filters=64, filter_size=4, stride=2, crop=0, W=I.Normal(0.02), b=None, nonlinearity=NL.LeakyRectify(0.01)) # if _cond != None: _deconv2 = plu.concat_tc(_deconv2, _cond) _deconv3 = deconv(_deconv2, num_filters=npc, filter_size=4, stride=2, crop=1, W=I.Normal(0.02), b=None, nonlinearity=None) return _deconv3