def get_estimator(n_features, files, labels, eval_size=0.1): layers = [ (InputLayer, {'shape': (None, n_features)}), (DenseLayer, {'num_units': N_HIDDEN_1, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01)}), (FeaturePoolLayer, {'pool_size': 2}), (DenseLayer, {'num_units': N_HIDDEN_2, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01)}), (FeaturePoolLayer, {'pool_size': 2}), (DenseLayer, {'num_units': 1, 'nonlinearity': None}), ] args = dict( layers=layers, update=adam, update_learning_rate=theano.shared(util.float32(START_LR)), batch_iterator_train=ResampleIterator(BATCH_SIZE), batch_iterator_test=BatchIterator(BATCH_SIZE), objective=nn.get_objective(l1=L1, l2=L2), #eval_size=eval_size, custom_score=('kappa', util.kappa) if eval_size > 0.0 else None, on_epoch_finished=[ nn.Schedule('update_learning_rate', SCHEDULE), ], regression=True, max_epochs=N_ITER, verbose=1, ) net = BlendNet(eval_size=eval_size, **args) net.set_split(files, labels) return net
def ptb_lstm(input_var, vocabulary_size, hidden_size, seq_len, num_layers, dropout, batch_size): l_input = L.InputLayer(shape=(batch_size, seq_len), input_var=input_var) l_embed = L.EmbeddingLayer(l_input, vocabulary_size, hidden_size, W=init.Uniform(1.0)) l_lstms = [] for i in range(num_layers): l_lstm = L.LSTMLayer(l_embed if i == 0 else l_lstms[-1], hidden_size, ingate=L.Gate(W_in=init.GlorotUniform(), W_hid=init.Orthogonal()), forgetgate=L.Gate(W_in=init.GlorotUniform(), W_hid=init.Orthogonal(), b=init.Constant(1.0)), cell=L.Gate( W_in=init.GlorotUniform(), W_hid=init.Orthogonal(), W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), outgate=L.Gate(W_in=init.GlorotUniform(), W_hid=init.Orthogonal())) l_lstms.append(l_lstm) l_drop = L.DropoutLayer(l_lstms[-1], dropout) l_out = L.DenseLayer(l_drop, num_units=vocabulary_size, num_leading_axes=2) l_out = L.ReshapeLayer( l_out, (l_out.output_shape[0] * l_out.output_shape[1], l_out.output_shape[2])) l_out = L.NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax) return l_out
def _forward(self): net = {} net['input'] = layers.InputLayer(shape=(None, 1, 28, 28), input_var=self.X) net['conv1'] = layers.Conv2DLayer(net['input'], 32, (3, 3), W=init.Orthogonal(), pad=1) net['pool1'] = layers.MaxPool2DLayer(net['conv1'], (2, 2), stride=(2, 2)) net['conv2'] = layers.Conv2DLayer(net['pool1'], 64, (3, 3), W=init.Orthogonal(), pad=1) net['pool2'] = layers.MaxPool2DLayer(net['conv2'], (2, 2), stride=(2, 2)) net['conv3'] = layers.Conv2DLayer(net['pool2'], 128, (3, 3), W=init.Orthogonal(), pad=1) net['conv4'] = layers.Conv2DLayer(net['conv3'], 128, (3, 3), W=init.Orthogonal(), pad=1) net['pool3'] = layers.MaxPool2DLayer(net['conv4'], (2, 2), stride=(2, 2)) net['flatten'] = layers.FlattenLayer(net['pool3']) net['out'] = layers.DenseLayer(net['flatten'], 10, b=None, nonlinearity=nonlinearities.softmax) return net
def add_gate_params(gate_name): return (self.add_param(spec=init.Orthogonal(0.1), shape=(num_inputs, num_units), name="W_in_to_{}".format(gate_name)), self.add_param(spec=init.Orthogonal(0.1), shape=(num_units, num_units), name="W_hid_to_{}".format(gate_name)), self.add_param(spec=init.Constant(0.0), shape=(num_units, ), name="b_{}".format(gate_name), regularizable=False))
def add_gate_params(self, gate_name): num_prev_units = self.num_proj_units if self.num_proj_units else self.num_units return (self.add_param(init.Orthogonal(), (num_prev_units, self.num_units), name="W_h_{}".format(gate_name)), self.add_param(init.Orthogonal(), (self.num_inputs, self.num_units), name="W_x_{}".format(gate_name)), self.add_param(init.Constant(0.0), (self.num_units, ), name="b_{}".format(gate_name), regularizable=False))
def init_main_lstm_weights(self): (self.W_h_ig, self.W_x_ig, self.b_ig) = self.add_gate_params('ig') (self.W_h_fg, self.W_x_fg, self.b_fg) = self.add_gate_params('fg') (self.W_h_c, self.W_x_c, self.b_c) = self.add_gate_params('c') (self.W_h_og, self.W_x_og, self.b_og) = self.add_gate_params('og') self.W_h_stacked = T.concatenate( [self.W_h_ig, self.W_h_fg, self.W_h_c, self.W_h_og], axis=1) self.W_x_stacked = T.concatenate( [self.W_x_ig, self.W_x_fg, self.W_x_c, self.W_x_og], axis=1) self.b_stacked = T.concatenate( [self.b_ig, self.b_fg, self.b_c, self.b_og], axis=0) if self.num_proj_units: self.W_p = self.add_param(init.Orthogonal(), (self.num_units, self.num_proj_units), name="W_p") self.init_states() if self.use_layer_norm: self.W_x_alpha = self.add_param(spec=init.Constant(1.0), shape=(self.num_units * 4, ), name="W_x_alpha") self.W_h_alpha = self.add_param(spec=init.Constant(1.0), shape=(self.num_units * 4, ), name="W_h_alpha") self.W_c_alpha = self.add_param(spec=init.Constant(1.0), shape=(self.num_units, ), name="W_c_alpha") self.W_c_beta = self.add_param(spec=init.Constant(0.0), shape=(self.num_units, ), name="W_c_beta", regularizable=False)
def conv_params( num_filters, filter_size=(3, 3), pad=1, #border_mode='same', nonlinearity=leaky_rectify, W=init.Orthogonal(gain=1.0), b=init.Constant(0.05), untie_biases=True, **kwargs): args = { 'num_filters': num_filters, 'filter_size': filter_size, #'border_mode': border_mode, 'pad': pad, 'nonlinearity': nonlinearity, 'W': W, 'b': b, 'untie_biases': untie_biases, } args.update(kwargs) if CC: args['dimshuffle'] = False else: args.pop('partial_sum', None) return args
def __init__(self, W_in=init.Orthogonal(0.1), W_hid=init.Orthogonal(0.1), W_cell=init.Uniform(0.1), b=init.Constant(0.), nonlinearity=nonlinearities.sigmoid): self.W_in = W_in self.W_hid = W_hid # Don't store a cell weight vector when cell is None if W_cell is not None: self.W_cell = W_cell self.b = b # For the nonlinearity, if None is supplied, use identity if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity
def dense_params(num_units, nonlinearity=leaky_rectify, **kwargs): args = { 'num_units': num_units, 'nonlinearity': nonlinearity, 'W': init.Orthogonal(1.0), 'b': init.Constant(0.05), } args.update(kwargs) return args
def __init__(self, incoming, num_units, num_hyper_units, num_proj_units, ingate=Gate(W_in=init.Orthogonal()), forgetgate=Gate(W_in=init.Orthogonal()), cell=Gate(W_in=init.Orthogonal(), W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(W_in=init.Orthogonal()), nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.), backwards=False, gradient_steps=-1, grad_clipping=0, precompute_input=True, mask_input=None, reparam='relu', use_layer_norm=False, **kwargs): super(HyperLHUCLSTMLayer, self).__init__(incoming, num_units, num_hyper_units, num_proj_units, ingate, forgetgate, cell, outgate, nonlinearity, cell_init, hid_init, backwards, gradient_steps, grad_clipping, precompute_input, mask_input, use_layer_norm=use_layer_norm, **kwargs) self.reparam = to_reparam_fn(reparam)
def conv_params(num_filters, filter_size=(3, 3), border_mode='same', nonlinearity=leaky_rectify, W=init.Orthogonal(gain=1.0), b=init.Constant(0.05), untie_biases=True, **kwargs): args = { 'num_filters': num_filters, 'filter_size': filter_size, 'border_mode': border_mode, 'nonlinearity': nonlinearity, 'W': W, 'b': b, 'untie_biases': untie_biases, } args.update(kwargs) return args
def _forward(self): net = {} net['input'] = layers.InputLayer(shape=(None, 1, 28, 28), input_var=self.X) net['conv'] = layers.Conv2DLayer(net['input'], 10, (5, 5), W=init.Orthogonal()) net['pool'] = layers.MaxPool2DLayer(net['conv'], (3, 3), stride=(1, 1), pad=(1, 1)) net['flatten'] = layers.FlattenLayer(net['pool']) net['out'] = layers.DenseLayer(net['flatten'], 10, b=None, nonlinearity=nonlinearities.softmax) return net
def __init__( self, incomings, num_units, nonlinearity=LN.tanh, gate_nonlinearity=LN.sigmoid, name=None, W=LI.Orthogonal(1.0), b=LI.Constant(0.), h0=LI.Constant(0.), c0=LI.Constant(0.), grad_clipping=0., # h0_trainable=False, ): super().__init__(incomings, name=name) input_shape = self.input_shapes[0][1:] input_dim = np.int(np.prod(input_shape)) self.h0 = self.add_param(h0, (num_units, ), name="h0", trainable=False, regularizable=False) self.c0 = self.add_param(c0, (num_units, ), name="c0", trainable=False, regularizable=False) self.num_units = num_units self.nonlinearity = nonlinearity self.gate_nonlinearity = gate_nonlinearity self.grad_clipping = grad_clipping # Weights for all gates. self.W_x = self.add_param(W, (input_dim, num_units * 4), name="W_x") self.W_h = self.add_param(W, (num_units, num_units * 4), name="W_h") self.b = self.add_param(b, (num_units * 4, ), name="b", regularizable=False)
def conv_params(num_filters, filter_size=(3, 3), stride=(1, 1), border_mode='same', nonlinearity=rectify, W=init.Orthogonal(gain=1.0), b=init.Constant(0.05), untie_biases=False, **kwargs): args = { 'num_filters': num_filters, 'filter_size': filter_size, 'stride': stride, 'pad': border_mode, # The new version has 'pad' instead of 'border_mode' 'nonlinearity': nonlinearity, 'W': W, 'b': b, 'untie_biases': untie_biases, } args.update(kwargs) return args
def __init__( self, # input data input_data_layer, input_mask_layer, # model size num_units, # initialize cell_init=init.Constant(0.), hid_init=init.Constant(0.), learn_init=False, # options stochastic=False, skip_scale=T.ones(shape=(1, ), dtype=floatX), backwards=False, gradient_steps=-1, grad_clipping=0, only_return_final=False, **kwargs): # input incomings = [input_data_layer, input_mask_layer] # init input input_init = init.Constant(0.) self.input_init_incoming_index = -1 if isinstance(input_init, Layer): incomings.append(input_init) self.input_init_incoming_index = len(incomings) - 1 # init hidden self.hid_init_incoming_index = -1 if isinstance(hid_init, Layer): incomings.append(hid_init) self.hid_init_incoming_index = len(incomings) - 1 # init cell self.cell_init_incoming_index = -1 if isinstance(cell_init, Layer): incomings.append(cell_init) self.cell_init_incoming_index = len(incomings) - 1 # init class super(DiffSkipLSTMLayer, self).__init__(incomings, **kwargs) # set options self.stochastic = stochastic self.skip_scale = skip_scale self.learn_init = learn_init self.num_units = num_units self.backwards = backwards self.gradient_steps = gradient_steps self.grad_clipping = grad_clipping self.only_return_final = only_return_final # set sampler self.uniform = RandomStreams(get_rng().randint(1, 2147462579)).uniform # get input size input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[2:]) ################### # gate parameters # ################### def add_gate_params(gate_name): return (self.add_param(spec=init.Orthogonal(0.1), shape=(num_inputs, num_units), name="W_in_to_{}".format(gate_name)), self.add_param(spec=init.Orthogonal(0.1), shape=(num_units, num_units), name="W_hid_to_{}".format(gate_name)), self.add_param(spec=init.Constant(0.0), shape=(num_units, ), name="b_{}".format(gate_name), regularizable=False)) ##### in gate ##### (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate) = add_gate_params('ingate') self.W_cell_to_ingate = self.add_param(spec=init.Uniform(0.1), shape=(num_units, ), name="W_cell_to_ingate") ##### forget gate ##### (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate) = add_gate_params('forgetgate') self.W_cell_to_forgetgate = self.add_param(spec=init.Uniform(0.1), shape=(num_units, ), name="W_cell_to_forgetgate") ##### cell ##### (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell) = add_gate_params('cell') ##### out gate ##### (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate) = add_gate_params('outgate') self.W_cell_to_outgate = self.add_param(spec=init.Uniform(0.1), shape=(num_units, ), name="W_cell_to_outgate") ################### # skip parameters # ################### self.W_cell_to_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_units, num_units), name="W_cell_to_skip") self.b_cell_to_skip = self.add_param(spec=init.Constant(1.0), shape=(num_units, ), name="b_cell_to_skip", regularizable=False) self.W_hid_to_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_units, num_units), name="W_hid_to_skip") self.b_hid_to_skip = self.add_param(spec=init.Constant(1.0), shape=(num_units, ), name="b_hid_to_skip", regularizable=False) self.W_in_to_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_inputs, num_units), name="W_in_to_skip") self.b_in_to_skip = self.add_param(spec=init.Constant(1.0), shape=(num_units, ), name="b_in_to_skip", regularizable=False) self.W_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_units, 1), name="W_skip") self.b_skip = self.add_param(spec=init.Constant(0.0), shape=(1, ), name="b_skip", regularizable=False) self.W_diff_to_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_inputs, num_units), name="W_diff_to_skip") self.b_diff_to_skip = self.add_param(spec=init.Constant(0.0), shape=(num_units, ), name="b_diff_to_skip", regularizable=False) if isinstance(input_init, Layer): self.input_init = input_init else: self.input_init = self.add_param(spec=input_init, shape=(1, num_inputs), name="input_init", trainable=learn_init, regularizable=False) if isinstance(cell_init, Layer): self.cell_init = cell_init else: self.cell_init = self.add_param(spec=cell_init, shape=(1, num_units), name="cell_init", trainable=learn_init, regularizable=False) if isinstance(hid_init, Layer): self.hid_init = hid_init else: self.hid_init = self.add_param(spec=hid_init, shape=(1, num_units), name="hid_init", trainable=learn_init, regularizable=False)
def build_model( batch_size, num_channels, input_length, output_dim, subsample, ): l_in = layers.InputLayer( shape=(batch_size, num_channels, input_length), name='input', ) l_sampling = SubsampleLayer( l_in, window=(None, None, 10), name='l_sampling', ) l_window = WindowNormLayer( l_sampling, name='l_window', ) l_conv1 = Conv1DLayer( l_window, name='conv1', num_filters=16, border_mode='same', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool1 = MaxPool1DLayer( l_conv1, name='pool1', pool_size=3, stride=2, ) l_conv2 = Conv1DLayer( l_pool1, name='conv2', num_filters=32, border_mode='same', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_conv3 = Conv1DLayer( l_conv2, name='conv3', num_filters=64, border_mode='same', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool3 = MaxPool1DLayer( l_conv3, name='pool3', pool_size=3, stride=2, ) l_dropout_dense1 = layers.DropoutLayer( l_pool3, p=0.5, ) l_dense1 = layers.DenseLayer( l_dropout_dense1, num_units=128, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_dropout_dense2 = layers.DropoutLayer( l_dense1, p=0.5, ) l_dense2 = layers.DenseLayer( l_dropout_dense2, num_units=128, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_out = layers.DenseLayer( l_dense2, name='output', num_units=output_dim, nonlinearity=nonlinearities.sigmoid, W=init.Orthogonal(), ) return l_out
def __init__(self, incoming, num_units, num_hyper_units, num_proj_units, ingate=Gate(W_in=init.Orthogonal()), forgetgate=Gate(W_in=init.Orthogonal()), cell=Gate(W_in=init.Orthogonal(), W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(W_in=init.Orthogonal()), nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.), backwards=False, gradient_steps=-1, grad_clipping=0, precompute_input=True, mask_input=None, ivector_input=None, use_layer_norm=False, **kwargs): incomings = [incoming] self.mask_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = len(incomings) - 1 if ivector_input is not None: incomings.append(ivector_input) self.ivector_incoming_index = len(incomings) - 1 super(HyperLSTMLayer, self).__init__(incomings, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.num_units = num_units self.num_hyper_units = num_hyper_units self.num_proj_units = num_proj_units self.backwards = backwards self.gradient_steps = gradient_steps self.grad_clipping = grad_clipping self.precompute_input = precompute_input input_shape = self.input_shapes[0] self.num_inputs = numpy.prod(input_shape[2:]) self.ingate = ingate self.forgetgate = forgetgate self.cell = cell self.outgate = outgate self.nonlinearity_ingate = ingate.nonlinearity self.nonlinearity_forgetgate = forgetgate.nonlinearity self.nonlinearity_cell = cell.nonlinearity self.nonlinearity_outgate = outgate.nonlinearity self.cell_init = cell_init self.hid_init = hid_init self.use_layer_norm = use_layer_norm self.init_weights()
def build_model( batch_size, num_channels, input_length, output_dim, ): l_in = layers.InputLayer( shape=(batch_size, num_channels, input_length), name='l_in', ) l_conv1 = Conv1DLayer( l_in, name='conv1', num_filters=8, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool1 = MaxPool1DLayer( l_conv1, name='pool1', pool_size=3, stride=2, ) l_conv2 = Conv1DLayer( l_pool1, name='conv2', num_filters=16, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool2 = MaxPool1DLayer( l_conv2, name='pool2', pool_size=3, stride=2, ) l_dropout_dense1 = layers.DropoutLayer( #l_pool4, l_pool2, p=0.5, ) l_dense1 = layers.DenseLayer( l_dropout_dense1, num_units=32, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_out = layers.DenseLayer( l_dense1, num_units=output_dim, nonlinearity=nonlinearities.sigmoid, W=init.Orthogonal(), ) return l_out
def estimator(protocol, classifier, n_features, files, X, labels, run, fold, eval_size=0.1): final_weights = 'weights/final_%s_%s_fold_%s.pkl' % (classifier, run, fold) if classifier == "SVM": if os.path.exists(final_weights): est = joblib.load(final_weights) else: svm = SVC(kernel='linear', class_weight='balanced', cache_size=5500, probability=True) if protocol != 'protocol3': svm_model = svm param_grid = {"C": [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]} cv = StratifiedShuffleSplit(labels.reshape( (labels.shape[0], )), n_iter=10, test_size=0.1, random_state=0) est = GridSearchCV(svm_model, param_grid=param_grid, scoring='roc_auc', n_jobs=15, cv=cv, verbose=2) est.fit(X, labels.reshape((labels.shape[0], ))) else: param_grid = { "estimator__C": [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4] } binarized_labels = label_binarize(np.squeeze(labels), classes=[0, 1, 2]) svm_model = OneVsRestClassifier(svm) cv = StratifiedShuffleSplit(binarized_labels, n_iter=10, test_size=0.1, random_state=0) est = GridSearchCV(svm_model, param_grid=param_grid, scoring='roc_auc', n_jobs=15, cv=cv, verbose=2) est.fit(X, binarized_labels) est = est.best_estimator_ print("Best estimator found by grid search for %s: " % (classifier)) print(est) # Persistence #joblib.dump(est, final_weights) elif classifier == "RF": if os.path.exists(final_weights): est = joblib.load(final_weights) else: #for criterion in ["gini","entropy"]: # for n_estimators in [10, 50, 100, 200]:#, 200, 250, 500, 750, 1000]: # for max_features in [None]: #"auto", "sqrt", "log2", # # We are not using class_weight='auto'. Error in sklearn param_grid = { 'criterion': ['gini', 'entropy'], 'n_estimators': [50, 100, 200, 300, 10, 250, 500, 750] } est = GridSearchCV(RandomForestClassifier(max_features="auto"), param_grid=param_grid, n_jobs=-1, verbose=2) print(X[:3]) est.fit(X, labels.reshape((labels.shape[0], ))) est = est.best_estimator_ print("Best estimator found by grid search for %s: " % (classifier)) print(est) # Persistence joblib.dump(est, final_weights) else: layers = [ (InputLayer, { 'shape': (None, n_features) }), (DenseLayer, { 'num_units': N_HIDDEN_1, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01) }), (FeaturePoolLayer, { 'pool_size': 2 }), (DenseLayer, { 'num_units': N_HIDDEN_2, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01) }), (FeaturePoolLayer, { 'pool_size': 2 }), (DenseLayer, { 'num_units': 2, 'nonlinearity': softmax }), ] args = dict( update=adam, update_learning_rate=theano.shared(util.float32(START_LR)), batch_iterator_train=ResampleIterator(BATCH_SIZE), batch_iterator_test=BatchIterator(BATCH_SIZE), objective=nn.get_objective(l1=L1, l2=L2), eval_size=eval_size, custom_scores=[('kappa', metrics.kappa)] if eval_size > 0.0 else None, on_epoch_finished=[ nn.Schedule('update_learning_rate', SCHEDULE), ], regression=False, max_epochs=N_ITER, verbose=1, ) est = BlendNet(layers, **args) if os.path.exists(final_weights): est.load_params_from(str(final_weights)) print("loaded weights from {}".format(final_weights)) else: est.set_split(files, labels) est.fit(X, labels) #Persistence #est.save_params_to(final_weights) return est
def __init__(self, incoming, num_prj, num_units, ingate=Gate(), forgetgate=Gate(b=init.Constant(1.)), cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(), nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.), dropout_ratio=0.2, weight_noise=0.0, backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0, unroll_scan=False, mask_input=None, only_return_final=False, only_return_hidden=True, **kwargs): incomings = [incoming] self.mask_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = len(incomings) - 1 self.hid_init_incoming_index = -1 if isinstance(hid_init, Layer): incomings.append(hid_init) self.hid_init_incoming_index = len(incomings) - 1 self.cell_init_incoming_index = -1 if isinstance(cell_init, Layer): incomings.append(cell_init) self.cell_init_incoming_index = len(incomings) - 1 # Initialize parent layer super(LSTMPLayer, self).__init__(incomings, **kwargs) # for dropout self.binomial = RandomStreams(get_rng().randint(1, 2147462579)).binomial self.p = dropout_ratio # for weight noise self.normal = RandomStreams(get_rng().randint(1, 2147462579)).normal # If the provided nonlinearity is None, make it linear if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.weight_noise = weight_noise self.learn_init = learn_init self.num_prj = num_prj self.num_units = num_units self.backwards = backwards self.peepholes = peepholes self.gradient_steps = gradient_steps self.grad_clipping = grad_clipping self.unroll_scan = unroll_scan self.only_return_final = only_return_final self.only_return_hidden = only_return_hidden if unroll_scan and gradient_steps != -1: raise ValueError( "Gradient steps must be -1 when unroll_scan is true.") input_shape = self.input_shapes[0] if unroll_scan and input_shape[1] is None: raise ValueError("Input sequence length cannot be specified as " "None when unroll_scan is True") #### weight init #### num_inputs = numpy.prod(input_shape[2:]) def add_gate_params(gate, gate_name): return (self.add_param(spec=gate.W_in, shape=(num_inputs, num_units), name="W_in_to_{}".format(gate_name)), self.add_param(spec=gate.W_hid, shape=(num_prj, num_units), name="W_hid_to_{}".format(gate_name)), self.add_param(spec=gate.b, shape=(num_units, ), name="b_{}".format(gate_name), regularizable=False), gate.nonlinearity) #### ingate #### (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate, self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate') #### forgetgate #### (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate, self.nonlinearity_forgetgate) = add_gate_params( forgetgate, 'forgetgate') #### cell #### (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell, self.nonlinearity_cell) = add_gate_params(cell, 'cell') #### outgate #### (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate, self.nonlinearity_outgate) = add_gate_params(outgate, 'outgate') #### peepholes #### if self.peepholes: self.W_cell_to_ingate = self.add_param(spec=ingate.W_cell, shape=(num_units, ), name="W_cell_to_ingate") self.W_cell_to_forgetgate = self.add_param( spec=forgetgate.W_cell, shape=(num_units, ), name="W_cell_to_forgetgate") self.W_cell_to_outgate = self.add_param(spec=outgate.W_cell, shape=(num_units, ), name="W_cell_to_outgate") #### hidden projection #### self.W_hid_projection = self.add_param(spec=init.Orthogonal(), shape=(num_units, num_prj), name="W_cell_to_outgate") # Setup initial values for the cell and the hidden units if isinstance(cell_init, Layer): self.cell_init = cell_init else: self.cell_init = self.add_param(cell_init, (1, num_units), name="cell_init", trainable=learn_init, regularizable=False) if isinstance(hid_init, Layer): self.hid_init = hid_init else: self.hid_init = self.add_param(hid_init, (1, num_prj), name="hid_init", trainable=learn_init, regularizable=False)
def build_model( batch_size, num_channels, input_length, output_dim, ): l_in = layers.InputLayer( shape=(batch_size, num_channels, input_length), name='l_in', ) l_sampling = SubsampleLayer( l_in, window=(None, None, 5), name='l_sampling', ) l_window = WindowNormLayer( l_sampling, name='l_window', ) l_conv1 = Conv1DLayer( l_window, name='conv1', num_filters=16, pad='same', filter_size=1, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_conv2 = Conv1DLayer( l_conv1, name='conv2', num_filters=8, pad='same', filter_size=1, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool2 = MaxPool1DLayer( l_conv2, name='pool2', pool_size=3, stride=2, ) l_conv3 = Conv1DLayer( l_pool2, name='conv3', num_filters=32, pad='same', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_conv4 = Conv1DLayer( l_conv3, name='conv4', num_filters=16, pad='same', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool4 = MaxPool1DLayer( l_conv4, name='pool4', pool_size=3, stride=2, ) l_conv5 = Conv1DLayer( l_pool4, name='conv5', num_filters=64, pad='same', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_conv6 = Conv1DLayer( l_conv5, name='conv6', num_filters=32, pad='same', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool6 = MaxPool1DLayer( l_conv6, name='pool6', pool_size=3, stride=2, ) l_conv7 = Conv1DLayer( l_pool6, name='conv7', num_filters=64, pad='same', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_conv8 = Conv1DLayer( l_conv7, name='conv8', num_filters=32, pad='same', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool8 = MaxPool1DLayer( l_conv8, name='pool8', pool_size=3, stride=2, ) l_dropout_dense1 = layers.DropoutLayer( l_pool8, p=0.5, ) l_dense1 = layers.DenseLayer( l_dropout_dense1, num_units=64, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_dropout_dense2 = layers.DropoutLayer( l_dense1, p=0.5, ) l_dense2 = layers.DenseLayer( l_dropout_dense2, num_units=64, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_out = layers.DenseLayer( l_dense2, num_units=output_dim, nonlinearity=nonlinearities.sigmoid, W=init.Orthogonal(), ) return l_out
def build_model(batch_size, num_channels, input_length, output_dim,): l_in = layers.InputLayer( shape=(batch_size, num_channels, input_length), name='input', ) # window size should be 1600 for this network l_ss_left = SubsampleLayer( l_in, window=(None, 1000, 10), name='l_ss_left', ) l_ss_right = SubsampleLayer( l_in, window=(1000, None, 10), name='l_ss_right', ) #l_window_left = WindowNormLayer( # l_ss_left, # name='l_window_left', #) #l_window_right = WindowNormLayer( # l_ss_right, # name='l_window_right', #) l_conv1_left = Conv1DLayer( #l_window_left, l_ss_left, name='conv1_left', num_filters=8, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_conv1_right = Conv1DLayer( #l_window_right, l_ss_right, name='conv1_right', num_filters=8, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool1_left = MaxPool1DLayer( l_conv1_left, name='pool1_left', pool_size=3, stride=2, ) l_pool1_right = MaxPool1DLayer( l_conv1_right, name='pool1_right', pool_size=3, stride=2, ) l_dropout_conv2_left = layers.DropoutLayer( l_pool1_left, name='drop_conv2_left', p=0.1, ) l_dropout_conv2_right = layers.DropoutLayer( l_pool1_right, name='drop_conv2_right', p=0.1, ) l_conv2_left = Conv1DLayer( l_dropout_conv2_left, name='conv2_left', num_filters=16, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_conv2_right = Conv1DLayer( l_dropout_conv2_right, name='conv2_right', num_filters=16, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_dropout_conv3_left = layers.DropoutLayer( l_conv2_left, name='drop_conv3_left', p=0.2, ) l_dropout_conv3_right = layers.DropoutLayer( l_conv2_right, name='drop_conv3_right', p=0.2, ) l_conv3_left = Conv1DLayer( l_dropout_conv3_left, name='conv3_left', num_filters=32, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_conv3_right = Conv1DLayer( l_dropout_conv3_right, name='conv3_right', num_filters=32, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool3_left = MaxPool1DLayer( l_conv3_left, name='pool3_left', pool_size=3, stride=2, ) l_pool3_right = MaxPool1DLayer( l_conv3_right, name='pool3_right', pool_size=3, stride=2, ) l_concat = layers.ConcatLayer( incomings=(l_pool3_left, l_pool3_right), name='concat', ) l_dropout_dense1 = layers.DropoutLayer( l_concat, name='drop_dense1', p=0.5, ) l_dense1 = layers.DenseLayer( l_dropout_dense1, name='dense1', num_units=128, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_dropout_dense2 = layers.DropoutLayer( l_dense1, name='drop_dense2', p=0.5, ) l_dense2 = layers.DenseLayer( l_dropout_dense2, name='dense2', num_units=128, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_out = layers.DenseLayer( l_dense2, name='output', num_units=output_dim, nonlinearity=nonlinearities.sigmoid, W=init.Orthogonal(), ) return l_out
class DrawLayer(Layer): ''' Implements the draw model. The input to the model should be flattened images. Set the original image shape with img shp nb. Glorot init will not work REFS Gregor, K., Danihelka, I., Graves, A., & Wierstra, D. (2015). DRAW: A Recurrent Neural Network For Image Generation. arXiv Preprint arXiv:1502.04623. ''' ini = init.Normal(std=0.01, mean=0.0) zero = init.Constant(0.) ortho = init.Orthogonal(np.sqrt(2)) def __init__(self, input_layer, num_units_encoder_and_decoder, glimpses, dimz, imgshp, N_filters_read, N_filters_write, W_x_to_gates=ini, W_cell_to_gates=zero, b_gates=zero, W_read=ini, b_read=zero, W_write=ini, b_write=zero, nonlinearity_ingate=nonlinearities.sigmoid, nonlinearity_forgetgate=nonlinearities.sigmoid, nonlinearity_modulationgate=nonlinearities.tanh, nonlinearity_outgate=nonlinearities.sigmoid, nonlinearities_out_encoder=nonlinearities.tanh, nonlinearities_out_decoder=nonlinearities.tanh, cell_init=zero, hid_init=zero, canvas_init=zero, W_dec_to_canvas=ini, W_enc_to_mu_z=ini, learn_hid_init=False, learn_canvas_init=True, peepholes=False, x_distribution='bernoulli', qz_distribution='gaussian', pz_distribution='gaussian', read_init=None, n_classes=None, use_y=False, grad_clip_vals_out=[-1.0, 1.0], grad_clip_vals_in=[-10, 10]): """ :param input_layer: Lasagne input layer :param num_units_encoder_and_decoder: Number of units in encoder and decoder :param glimpses: Number of times the networks sees and tries to reconstruct the image :param dimz: Size of latent layer :param imgshp: list, [height, width] :param N_filters_read: int :param N_filters_write: int :param W_x_to_gates: function or np.ndarray or theano.shared :param W_cell_to_gates: function or np.ndarray or theano.shared :param b_gates: function or np.ndarray or theano.shared :param W_read: function or np.ndarray or theano.shared :param b_read: function or np.ndarray or theano.shared :param W_write: function or np.ndarray or theano.shared :param b_write: function or np.ndarray or theano.shared :param nonlinearity_ingate: function :param nonlinearity_forgetgate: function :param nonlinearity_modulationgate: function :param nonlinearity_outgate: function :param nonlinearities_out_encoder: function :param nonlinearities_out_decoder: function :param cell_init: function or np.ndarray or theano.shared :param hid_init: function or np.ndarray or theano.shared :param canvas_init: function or np.ndarray or theano.shared :param W_dec_to_canvas: function or np.ndarray or theano.shared :param W_enc_to_mu_z: function or np.ndarray or theano.shared :param learn_hid_init: boolean. If true cell and hid inits are learned :param learn_canvas_init: boolean. Learn canvas init. To start with a blank canvas set this to False :param peepholes: boolean. LSTM with or without peepholes :param x_distribution: str. Distribution of input data. Only supports 'bernoulli' :param qz_distribution: distribution of q(z|x), only supports 'gaussianmarg' :param pz_distribution: prior on z, p(z), only supports 'gaussianmarg' :param read_init: None or nd.array of length 5 with initial values for reading operation. If you want to change this you should probly change it so the models sees a blurry version of the entire image. :param n_classes: int, Number if classes. required if use_y=True :param use_y: boolean. If true models p(x,y) else p(x) :param grad_clip_vals_out: Clipping of gradients with grad_clip :param grad_clip_vals_in: Clipping of gradients with grad_clip """ # Initialize parent layer super(DrawLayer, self).__init__(input_layer) # For any of the nonlinearities, if None is supplied, use identity if nonlinearity_ingate is None: self.nonlinearity_ingate = nonlinearities.identity else: self.nonlinearity_ingate = nonlinearity_ingate if nonlinearity_forgetgate is None: self.nonlinearity_forgetgate = nonlinearities.identity else: self.nonlinearity_forgetgate = nonlinearity_forgetgate if nonlinearity_modulationgate is None: self.nonlinearity_modulationgate = nonlinearities.identity else: self.nonlinearity_modulationgate = nonlinearity_modulationgate if nonlinearity_outgate is None: self.nonlinearity_outgate = nonlinearities.identity else: self.nonlinearity_outgate = nonlinearity_outgate if x_distribution not in ['bernoulli']: raise NotImplementedError if pz_distribution not in ['gaussianmarg']: raise NotImplementedError if qz_distribution not in ['gaussianmarg']: raise NotImplementedError if use_y is True and n_classes is None: raise ValueError('n_classes must be given when use_y is true') self.learn_hid_init = learn_hid_init self.learn_canvas_init = learn_canvas_init self.num_units_encoder_and_decoder = num_units_encoder_and_decoder self.peepholes = peepholes self.glimpses = glimpses self.dimz = dimz self.nonlinearity_out_encoder = nonlinearities_out_encoder self.nonlinearity_out_decoder = nonlinearities_out_decoder self.x_distribution = x_distribution self.qz_distribution = qz_distribution self.pz_distribution = pz_distribution self.N_filters_read = N_filters_read self.N_filters_write = N_filters_write self.imgshp = imgshp self.n_classes = n_classes self.use_y = use_y self.grad_clip_vals_out = grad_clip_vals_out self.grad_clip_vals_in = grad_clip_vals_in # Input dimensionality is the output dimensionality of the input layer num_batch, num_inputs = self.input_layer.output_shape self.num_batch = num_batch self.num_inputs = num_inputs if self.peepholes: self.W_cellenc_to_enc_gates = self.add_param( W_cell_to_gates, [3 * num_units_encoder_and_decoder]) self.W_celldec_to_dec_gates = self.add_param( W_cell_to_gates, [3 * num_units_encoder_and_decoder]) self.W_cellenc_to_enc_gates.name = "DrawLayer: W_cellenc_to_enc_gates" self.W_celldec_to_dec_gates.name = "DrawLayer: W_celldec_to_dec_gates" else: self.W_cellenc_to_enc_gates = [] self.W_celldec_to_dec_gates = [] # enc self.b_gates_enc = self.add_param(b_gates, [4 * num_units_encoder_and_decoder]) # extra input applies to both encoder and decoder if self.use_y: # if y is modelled its concatenated to the x input to the encoder # and the z input to the decoder. We need to expand the # corresponding matrices to handle this. extra_input = self.n_classes else: extra_input = 0 self.W_enc_gates = self.add_param(W_x_to_gates, [ 2 * N_filters_read * N_filters_read + num_units_encoder_and_decoder + extra_input, 4 * num_units_encoder_and_decoder ]) self.W_hid_to_gates_enc = self.add_param( W_x_to_gates, [num_units_encoder_and_decoder, 4 * num_units_encoder_and_decoder]) self.b_gates_dec = self.add_param(b_gates, [4 * num_units_encoder_and_decoder]) self.W_z_to_gates_dec = self.add_param( W_x_to_gates, [dimz + extra_input, 4 * num_units_encoder_and_decoder]) self.W_hid_to_gates_dec = self.add_param( W_x_to_gates, [num_units_encoder_and_decoder, 4 * num_units_encoder_and_decoder]) # Setup initial values for the cell and the lstm hidden units if self.learn_hid_init: self.cell_init_enc = self.add_param( cell_init, (1, num_units_encoder_and_decoder)) self.hid_init_enc = self.add_param( hid_init, (1, num_units_encoder_and_decoder)) self.cell_init_dec = self.add_param( cell_init, (1, num_units_encoder_and_decoder)) self.hid_init_dec = self.add_param( hid_init, (1, num_units_encoder_and_decoder)) else: # init at zero + they will not be returned as parameters self.cell_init_enc = T.zeros((1, num_units_encoder_and_decoder)) self.hid_init_enc = T.zeros((1, num_units_encoder_and_decoder)) self.cell_init_dec = T.zeros((1, num_units_encoder_and_decoder)) self.hid_init_dec = T.zeros((1, num_units_encoder_and_decoder)) if self.learn_canvas_init: self.canvas_init = self.add_param(canvas_init, (1, num_inputs)) else: self.canvas_init = T.zeros((1, num_inputs)) # decoder to canvas self.W_dec_to_canvas_patch = self.add_param( W_dec_to_canvas, (num_units_encoder_and_decoder, N_filters_write * N_filters_write)) # variational weights # TODO: Make the sizes more flexible, they are not required to be equal self.W_enc_to_z_mu = self.add_param( W_enc_to_mu_z, (self.num_units_encoder_and_decoder, self.dimz)) self.b_enc_to_z_mu = self.add_param(b_gates, (self.dimz, )) self.W_enc_to_z_sigma = self.add_param( W_enc_to_mu_z, (self.num_units_encoder_and_decoder, self.dimz)) self.b_enc_to_z_sigma = self.add_param(b_gates, (self.dimz, )) self.b_gates_enc.name = "DrawLayer: b_gates_enc" self.b_gates_dec.name = "DrawLayer: b_gates_dec" self.W_enc_gates.name = "DrawLayer: W_x_to_gates_enc" self.W_hid_to_gates_enc.name = "DrawLayer: W_hid_to_gates_enc" self.W_z_to_gates_dec.name = "DrawLayer: W_z_to_gates_dec" self.W_hid_to_gates_dec.name = "DrawLayer: W_hid_to_gates_dec" self.W_enc_to_z_mu.name = "DrawLayer: W_enc_to_z_mu" self.b_enc_to_z_mu.name = "DrawLayer: b_enc_to_z_mu" self.W_enc_to_z_sigma.name = "DrawLayer: W_enc_to_z_sigma" self.b_enc_to_z_sigma.name = "DrawLayer: b_enc_to_z_sigma" self.W_dec_to_canvas_patch.name = "DrawLayer: W_dec_to_canvas" self.cell_init_enc.name = "DrawLayer: cell_init_enc" self.hid_init_enc.name = "DrawLayer: hid_init_enc" self.cell_init_dec.name = "DrawLayer: cell_init_dec" self.hid_init_dec.name = "DrawLayer: hid_init_dec" self.canvas_init.name = "DrawLayer: canvas_init" # init values for read operation. delta_read = 1 # gamma = 1.0 sigma_read = 1.0 center_y = 0. center_x = 0. if read_init is None: read_init = np.array([[ center_y, center_x, np.log(delta_read), np.log(sigma_read), np.log(gamma) ]]) read_init = read_init.astype(theano.config.floatX) print("Read init is", read_init) self.W_read = self.add_param(W_read, (num_units_encoder_and_decoder, 5)) self.W_write = self.add_param(W_write, (num_units_encoder_and_decoder, 5)) self.b_read = self.add_param(b_read, (5, )) self.b_write = self.add_param(b_write, (5, )) self.read_init = self.add_param(read_init, (1, 5)) self.W_read.name = "DrawLayer: W_read" self.W_write.name = "DrawLayer: W_write" self.b_read.name = "DrawLayer: b_read" self.b_write.name = "DrawLayer: b_write" def get_read_init(self): return self.read_init def get_params(self): ''' Get all parameters of this layer. :returns: - params : list of theano.shared List of all parameters ''' params = self.get_weight_params() + self.get_bias_params() if self.peepholes: params.extend(self.get_peephole_params()) if self.learn_hid_init: params.extend(self.get_init_params()) if self.learn_canvas_init: params += [self.canvas_init] return params def get_weight_params(self): ''' Get all weights of this layer :returns: - weight_params : list of theano.shared List of all weight parameters ''' return [ self.W_enc_gates, self.W_hid_to_gates_enc, self.W_z_to_gates_dec, self.W_hid_to_gates_dec, self.W_dec_to_canvas_patch, self.W_enc_to_z_mu, self.W_enc_to_z_sigma, self.W_read, self.W_write ] def get_peephole_params(self): ''' Get all peephole parameters of this layer. :returns: - init_params : list of theano.shared List of all peephole parameters ''' return [self.W_cellenc_to_enc_gates, self.W_celldec_to_dec_gates] def get_init_params(self): ''' Get all initital parameters of this layer. :returns: - init_params : list of theano.shared List of all initial parameters ''' if self.learn_hid_init: params = [ self.hid_init_enc, self.cell_init_enc, self.hid_init_dec, self.cell_init_dec ] else: params = [] return params def get_bias_params(self): ''' Get all bias parameters of this layer. :returns: - bias_params : list of theano.shared List of all bias parameters ''' params = [ self.b_gates_enc, self.b_gates_dec, self.b_enc_to_z_mu, self.b_enc_to_z_sigma, self.b_read, self.b_write ] return params def get_output_shape_for(self, input_shape): ''' Compute the expected output shape given the input. :parameters: - input_shape : tuple Dimensionality of expected input :returns: - output_shape : tuple Dimensionality of expected outputs given input_shape ''' return self.input_shape def _lstm(self, gates, cell_previous, W_cell_to_gates, nonlinearity_out): # LSTM step # Gate names are taken from http://arxiv.org/abs/1409.2329 figure 1 def slice_w(x, n): start = n * self.num_units_encoder_and_decoder stop = (n + 1) * self.num_units_encoder_and_decoder return x[:, start:stop] def slice_c(x, n): start = n * self.num_units_encoder_and_decoder stop = (n + 1) * self.num_units_encoder_and_decoder return x[start:stop] def clip(x): return theano.gradient.grad_clip(x, self.grad_clip_vals_in[0], self.grad_clip_vals_in[1]) ingate = slice_w(gates, 0) forgetgate = slice_w(gates, 1) modulationgate = slice_w(gates, 2) outgate = slice_w(gates, 3) if self.peepholes: ingate += cell_previous * slice_c(W_cell_to_gates, 0) forgetgate += cell_previous * slice_c(W_cell_to_gates, 1) if self.grad_clip_vals_in is not None: print('STEP: CLipping gradients IN', self.grad_clip_vals_in) ingate = clip(ingate) forgetgate = clip(forgetgate) modulationgate = clip(modulationgate) ingate = self.nonlinearity_ingate(ingate) forgetgate = self.nonlinearity_forgetgate(forgetgate) modulationgate = self.nonlinearity_modulationgate(modulationgate) if self.grad_clip_vals_in is not None: ingate = clip(ingate) forgetgate = clip(forgetgate) modulationgate = clip(modulationgate) cell = forgetgate * cell_previous + ingate * modulationgate if self.peepholes: outgate += cell * slice_c(W_cell_to_gates, 2) if self.grad_clip_vals_in is not None: outgate = clip(outgate) outgate = self.nonlinearity_outgate(outgate) if self.grad_clip_vals_in is not None: outgate = clip(outgate) hid = outgate * nonlinearity_out(cell) return [cell, hid] def get_cost(self, x, y=None, *args, **kwargs): """ Compute layer cost. :parameters: - input : theano.TensorType Symbolic input variable :returns: - layer_output : theano.TensorType Symbolic output variable """ if y is None and self.use_y is True: raise ValueError('y must be given when use_y is true') def step( eps_n, ######### REUCCRENT cell_previous_enc, hid_previous_enc, cell_previous_dec, hid_previous_dec, canvas_previous, mu_z_previous, log_sigma_z_previous, z_previous, l_read_previous, l_write_previous, #kl_previous, ######### x and WEIGHTS x, y, W_enc_gates, W_hid_to_gates_enc, b_gates_enc, W_cellenc_to_enc_gates, W_read, b_read, W_z_to_gates_dec, b_gates_dec, W_hid_to_gates_dec, W_celldec_to_dec_gates, W_enc_to_z_mu, b_enc_to_z_mu, W_enc_to_z_sigma, b_enc_to_z_sigma, W_dec_to_canvas_patch, W_write, b_write, ): # calculate gates pre-activations and slice N_read = self.N_filters_read N_write = self.N_filters_write img_shp = self.imgshp x_err = x - T.nnet.sigmoid(canvas_previous) att_read = nn2att(l_read_previous, N_read, img_shp) x_org_in, x_err_in = read(x, x_err, att_read, N_read, img_shp) x_org_in = att_read['gamma'] * x_org_in x_err_in = att_read['gamma'] * x_err_in if self.use_y: in_gates_enc = T.concatenate( [y, x_org_in, x_err_in, hid_previous_dec], axis=1) else: in_gates_enc = T.concatenate( [x_org_in, x_err_in, hid_previous_dec], axis=1) # equation (5)~ish #slice_gates_idx = 4*self.num_units_encoder_and_decoder # ENCODER gates_enc = T.dot(in_gates_enc, W_enc_gates) + b_gates_enc gates_enc += T.dot(hid_previous_enc, W_hid_to_gates_enc) #gates_enc +=T.dot(hid_previous_enc, W_hidenc_to_enc_gates) cell_enc, hid_enc = self._lstm(gates_enc, cell_previous_enc, W_cellenc_to_enc_gates, self.nonlinearity_out_encoder) # VARIATIONAL # eq 6 mu_z = T.dot(hid_enc, W_enc_to_z_mu) + b_enc_to_z_mu log_sigma_z = 0.5 * (T.dot(hid_enc, W_enc_to_z_sigma) + b_enc_to_z_sigma) z = mu_z + T.exp(log_sigma_z) * eps_n if self.use_y: print('STEP: using Y') in_gates_dec = T.concatenate([y, z], axis=1) else: print('STEP: Not using Y') in_gates_dec = z # DECODER gates_dec = T.dot(in_gates_dec, W_z_to_gates_dec) + b_gates_dec # i_dec gates_dec += T.dot(hid_previous_dec, W_hid_to_gates_dec) # equation (7) cell_dec, hid_dec = self._lstm(gates_dec, cell_previous_dec, W_celldec_to_dec_gates, self.nonlinearity_out_decoder) # WRITE l_write = T.dot(hid_dec, W_write) + b_write w = T.dot(hid_dec, W_dec_to_canvas_patch) att_write = nn2att(l_write, N_write, img_shp) canvas_upd = write(w, att_write, N_write, img_shp) canvas_upd = 1.0 / (att_write['gamma'] + 1e-4) * canvas_upd canvas = canvas_previous + canvas_upd l_read = T.dot(hid_dec, W_read) + b_read # Todo: some of the (all?) gradient clips are redundant # + I'm unsure if I use grad_clip correct and in correct places... # The description of gradient clipping is in # Generating sequences with recurrent neural networks # section: 2.1 Long Short-Term Memory # #if self.grad_clip_vals_out is not None: # print('STEP: CLipping gradients Out', self.grad_clip_vals_out) # cell_enc = theano.gradient.grad_clip(cell_enc, self.grad_clip_vals_out[0], self.grad_clip_vals_out[1]) # hid_enc = theano.gradient.grad_clip(hid_enc, self.grad_clip_vals_out[0], self.grad_clip_vals_out[1]) # cell_dec = theano.gradient.grad_clip(cell_dec, self.grad_clip_vals_out[0], self.grad_clip_vals_out[1]) # hid_dec = theano.gradient.grad_clip(hid_dec, self.grad_clip_vals_out[0], self.grad_clip_vals_out[1]) return [ cell_enc, hid_enc, cell_dec, hid_dec, canvas, mu_z, log_sigma_z, z, l_read, l_write ] ones = T.ones((self.num_batch, 1)) mu_z_init = T.zeros((self.num_batch, self.dimz)) log_sigma_z_init = T.zeros((self.num_batch, self.dimz)) z_init = T.zeros((self.num_batch, self.dimz)) att_vals_write_init = T.zeros((self.num_batch, 5)) if theano.config.compute_test_value is 'off': eps = _srng.normal((self.glimpses, self.num_batch)) else: # for testing print("draw.py: is not using random generator" + "!#>" * 30) eps = T.ones( (self.glimpses, self.num_batch), theano.config.floatX) * 0.3 if y is None: y = T.zeros((1)) # Todo: cleanup this somehow # Todo: Will it slow down theano optimization if I dont pass in # non seqs as arguments, but just call them with self.XXXX? seqs = [eps] init = [ T.dot(ones, self.cell_init_enc), T.dot(ones, self.hid_init_enc), T.dot(ones, self.cell_init_dec), T.dot(ones, self.hid_init_dec), T.dot(ones, self.canvas_init), mu_z_init, log_sigma_z_init, z_init, T.dot(ones, self.read_init), att_vals_write_init ] nonseqs_input = [x, y] nonseqs_enc = [ self.W_enc_gates, self.W_hid_to_gates_enc, self.b_gates_enc, self.W_cellenc_to_enc_gates, self.W_read, self.b_read ] nonseqs_dec = [ self.W_z_to_gates_dec, self.b_gates_dec, self.W_hid_to_gates_dec, self.W_celldec_to_dec_gates ] nonseqs_variational = [ self.W_enc_to_z_mu, self.b_enc_to_z_mu, self.W_enc_to_z_sigma, self.b_enc_to_z_sigma ] nonseqs_other = [ self.W_dec_to_canvas_patch, self.W_write, self.b_write ] non_seqs = nonseqs_input + nonseqs_enc + nonseqs_dec + nonseqs_variational \ + nonseqs_other output_scan = theano.scan(step, sequences=seqs, outputs_info=init, non_sequences=non_seqs, go_backwards=False)[0] cell_enc, hid_enc, cell_dec, hid_dec, canvas, mu_z, log_sigma_z, \ z, l_read, l_write = output_scan # because we model the output as bernoulli we take sigmoid to ensure # range (0,1) last_reconstruction = T.nnet.sigmoid(canvas[-1, :, :]) # select distribution of p(x|z) # LOSS # The loss is the negative loglikelihood of the data plus the # KL divergence between the the variational approximation to z and # the prior on z: # Loss = -logD(x) + D_kl(Q(z|h)||p(z)) # If we assume that x is bernoulli then # -logD(x) = -(t*log(o) +(1-t)*log(1-o)) = cross_ent(t,o) # D_kl(Q(z|h)||p(z)) can in some cases be solved analytically as # D_kl(Q(z|h)||p(z)) = 0.5(sum_T(mu^2 + sigma^2 - 1 -log(sigma^2))) # We add these terms and return minus the cost, i.e return the # lowerbound L_x = T.nnet.binary_crossentropy(last_reconstruction, x).sum() #L_x = cross_ent(last_reconstruction, x).sum() L_z = T.sum(0.5 * (mu_z**2 + T.exp(log_sigma_z * 2) - 1 - log_sigma_z * 2)) self.L_x = L_x self.L_z = L_z L = L_x + L_z self.canvas = canvas self.att_vals_read = l_read self.att_vals_write = l_write return L / self.num_batch def get_canvas(self): return T.nnet.sigmoid(self.canvas.dimshuffle(1, 0, 2)) def get_att_vals(self): return self.att_vals_read.dimshuffle(1,0,2), \ self.att_vals_write.dimshuffle(1,0,2) def get_logx(self): return self.L_x / self.num_batch def get_KL(self): return self.L_z / self.num_batch def generate(self, n_digits, y=None, *args, **kwargs): ''' Generate digits see http://arxiv.org/abs/1502.04623v1 section 2.3 ''' if y is None and self.use_y is True: raise ValueError('y must be given when use_y is true') def step(z, cell_previous_dec, hid_previous_dec, canvas_previous, l_write_previous, y, W_z_to_gates_dec, b_gates_dec, W_hid_to_gates_dec, W_celldec_to_dec_gates, W_dec_to_canvas_patch, W_write, b_write): N_write = self.N_filters_write img_shp = self.imgshp # DECODER if self.use_y: print('STEP: using Y') in_gates_dec = T.concatenate([y, z], axis=1) else: print('STEP: Not using Y') in_gates_dec = z gates_dec = T.dot(in_gates_dec, W_z_to_gates_dec) + b_gates_dec gates_dec += T.dot(hid_previous_dec, W_hid_to_gates_dec) # equation (7) cell_dec, hid_dec = self._lstm(gates_dec, cell_previous_dec, W_celldec_to_dec_gates, self.nonlinearity_out_decoder) # WRITE l_write = T.dot(hid_dec, W_write) + b_write w = T.dot(hid_dec, W_dec_to_canvas_patch) att_write = nn2att(l_write, N_write, img_shp) canvas_upd = write(w, att_write, N_write, img_shp) canvas_upd = 1.0 / (att_write['gamma'] + 1e-4) * canvas_upd canvas = canvas_previous + canvas_upd return [cell_dec, hid_dec, canvas, l_write] ones = T.ones((n_digits, 1)) if theano.config.compute_test_value is 'off': z_samples = _srng.normal((self.glimpses, n_digits, self.dimz)) else: print("draw.py: is not using random generator" + "!#>" * 30) z_samples = T.ones((self.glimpses, n_digits, self.dimz), theano.config.floatX) * 0.3 if y is None: y = T.zeros((1)) att_vals_write_init = T.zeros((n_digits, 5)) seqs = [z_samples] init = [ T.dot(ones, self.cell_init_dec), T.dot(ones, self.hid_init_dec), T.dot(ones, self.canvas_init), att_vals_write_init ] non_seqs = [ y, self.W_z_to_gates_dec, self.b_gates_dec, self.W_hid_to_gates_dec, self.W_celldec_to_dec_gates, self.W_dec_to_canvas_patch, self.W_write, self.b_write ] output_scan = theano.scan(step, sequences=seqs, outputs_info=init, non_sequences=non_seqs, go_backwards=False)[0] canvas = output_scan[2] l_write = output_scan[3] return T.nnet.sigmoid(canvas.dimshuffle(1, 0, 2)), l_write.dimshuffle( 1, 0, 2)
def build_model( batch_size, num_channels, input_length, output_dim, subsample, ): l_in = layers.InputLayer( shape=(batch_size, num_channels, input_length), name='input', ) l_sampling = SubsampleLayer( l_in, window=(None, None, subsample), name='l_sampling', ) l_conv1 = Conv1DLayer( l_sampling, name='conv1', num_filters=8, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool1 = MaxPool1DLayer( l_conv1, name='pool1', pool_size=3, stride=2, ) l_dropout_conv2 = layers.DropoutLayer( l_pool1, name='drop_conv2', p=0.2, ) l_conv2 = Conv1DLayer( l_dropout_conv2, name='conv2', num_filters=16, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_dropout_conv3 = layers.DropoutLayer( l_conv2, name='drop_conv2', p=0.2, ) l_conv3 = Conv1DLayer( l_dropout_conv3, name='conv2', num_filters=16, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool3 = MaxPool1DLayer( l_conv3, name='pool3', pool_size=3, stride=2, ) l_dropout_conv4 = layers.DropoutLayer( l_pool3, name='drop_conv4', p=0.3, ) l_conv4 = Conv1DLayer( l_dropout_conv4, name='conv4', num_filters=32, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_dropout_conv5 = layers.DropoutLayer( l_conv4, name='drop_conv5', p=0.3, ) l_conv5 = Conv1DLayer( l_dropout_conv5, name='conv4', num_filters=32, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool5 = MaxPool1DLayer( l_conv5, name='pool5', pool_size=3, stride=2, ) l_dropout_conv6 = layers.DropoutLayer( l_pool5, name='drop_conv4', p=0.4, ) l_conv6 = Conv1DLayer( l_dropout_conv6, name='conv6', num_filters=64, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_dropout_conv7 = layers.DropoutLayer( l_conv6, name='drop_conv7', p=0.4, ) l_conv7 = Conv1DLayer( l_dropout_conv7, name='conv7', num_filters=64, border_mode='valid', filter_size=3, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_pool7 = MaxPool1DLayer( l_conv7, name='pool7', pool_size=3, stride=2, ) l_dropout_dense1 = layers.DropoutLayer( l_pool7, name='drop_dense1', p=0.5, ) l_dense1 = layers.DenseLayer( l_dropout_dense1, name='dense1', num_units=128, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_dropout_dense2 = layers.DropoutLayer( l_dense1, name='drop_dense2', p=0.5, ) l_dense2 = layers.DenseLayer( l_dropout_dense2, name='dense2', num_units=128, nonlinearity=nonlinearities.rectify, W=init.Orthogonal(), ) l_out = layers.DenseLayer( l_dense2, name='output', num_units=output_dim, nonlinearity=nonlinearities.sigmoid, W=init.Orthogonal(), ) return l_out