def _create_nnet(self, input_dims, output_dims, learning_rate, num_hidden_units=15, batch_size=32, max_train_epochs=1, hidden_nonlinearity=nonlinearities.rectify, output_nonlinearity=None, update_method=updates.sgd): """ A subclass may override this if a different sort of network is desired. """ nnlayers = [('input', layers.InputLayer), ('hidden', layers.DenseLayer), ('output', layers.DenseLayer)] nnet = NeuralNet(layers=nnlayers, # layer parameters: input_shape=(None, input_dims), hidden_num_units=num_hidden_units, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, output_num_units=output_dims, # optimization method: update=update_method, update_learning_rate=learning_rate, regression=True, # flag to indicate we're dealing with regression problem max_epochs=max_train_epochs, batch_iterator_train=BatchIterator(batch_size=batch_size), train_split=nolearn.lasagne.TrainSplit(eval_size=0), verbose=0, ) nnet.initialize() return nnet
def load_encoder(path): """ load a pretrained dbn from path :param path: path to the .mat dbn :return: pretrained unrolled encoder """ # create the network using weights from pretrain_nn.mat nn = sio.loadmat(path) w1 = nn['w1'] w2 = nn['w2'] w3 = nn['w3'] w4 = nn['w4'] b1 = nn['b1'][0] b2 = nn['b2'][0] b3 = nn['b3'][0] b4 = nn['b4'][0] encoder = NeuralNet(layers=[ (InputLayer, { 'name': 'input', 'shape': (None, 1200) }), (DenseLayer, { 'name': 'l1', 'num_units': 2000, 'nonlinearity': sigmoid, 'W': w1, 'b': b1 }), (DenseLayer, { 'name': 'l2', 'num_units': 1000, 'nonlinearity': sigmoid, 'W': w2, 'b': b2 }), (DenseLayer, { 'name': 'l3', 'num_units': 500, 'nonlinearity': sigmoid, 'W': w3, 'b': b3 }), (DenseLayer, { 'name': 'l4', 'num_units': 50, 'nonlinearity': linear, 'W': w4, 'b': b4 }), ], update=nesterov_momentum, update_learning_rate=0.001, update_momentum=0.5, objective_l2=0.005, verbose=1, regression=True) encoder.initialize() return encoder
def test_okay(self, NeuralNet): net = NeuralNet( layers=[('input', Mock), ('mylayer', Mock)], input_shape=(10, 10), mylayer_hey='hey', update_foo=1, update_bar=2, ) net._create_iter_funcs = lambda *args: (1, 2, 3) net.initialize()
def test_unused(self, NeuralNet): net = NeuralNet( layers=[('input', Mock), ('mylayer', Mock)], input_shape=(10, 10), mylayer_hey='hey', yourlayer_ho='ho', update_foo=1, update_bar=2, ) net._create_iter_funcs = lambda *args: (1, 2, 3) with pytest.raises(ValueError) as err: net.initialize() assert str(err.value) == 'Unused kwarg: yourlayer_ho'
def test_layers_included(self, NeuralNet): def objective(layers_, target, **kwargs): out_a_layer = layers_['output_a'] out_b_layer = layers_['output_b'] # Get the outputs out_a, out_b = get_output([out_a_layer, out_b_layer]) # Get the targets gt_a = T.cast(target[:, 0], 'int32') gt_b = target[:, 1].reshape((-1, 1)) # Calculate the multi task loss cls_loss = aggregate(categorical_crossentropy(out_a, gt_a)) reg_loss = aggregate(categorical_crossentropy(out_b, gt_b)) loss = cls_loss + reg_loss return loss # test that both branches of the multi output network are included, # and also that a single layer isn't included multiple times. l = InputLayer(shape=(None, 1, 28, 28), name="input") l = Conv2DLayer(l, name='conv1', filter_size=(5, 5), num_filters=8) l = Conv2DLayer(l, name='conv2', filter_size=(5, 5), num_filters=8) la = DenseLayer(l, name='hidden_a', num_units=128) la = DenseLayer(la, name='output_a', nonlinearity=softmax, num_units=10) lb = DenseLayer(l, name='hidden_b', num_units=128) lb = DenseLayer(lb, name='output_b', nonlinearity=sigmoid, num_units=1) net = NeuralNet(layers=[la, lb], update_learning_rate=0.5, y_tensor_type=None, regression=True, objective=objective) net.initialize() expected_names = sorted([ "input", "conv1", "conv2", "hidden_a", "output_a", "hidden_b", "output_b" ]) network_names = sorted(list(net.layers_.keys())) assert (expected_names == network_names)
def extract_encoder(dbn): dbn_layers = dbn.get_all_layers() encoder = NeuralNet(layers=[ (InputLayer, { 'name': 'input', 'shape': dbn_layers[0].shape }), (DenseLayer, { 'name': 'l1', 'num_units': dbn_layers[1].num_units, 'nonlinearity': sigmoid, 'W': dbn_layers[1].W, 'b': dbn_layers[1].b }), (DenseLayer, { 'name': 'l2', 'num_units': dbn_layers[2].num_units, 'nonlinearity': sigmoid, 'W': dbn_layers[2].W, 'b': dbn_layers[2].b }), (DenseLayer, { 'name': 'l3', 'num_units': dbn_layers[3].num_units, 'nonlinearity': sigmoid, 'W': dbn_layers[3].W, 'b': dbn_layers[3].b }), (DenseLayer, { 'name': 'l4', 'num_units': dbn_layers[4].num_units, 'nonlinearity': linear, 'W': dbn_layers[4].W, 'b': dbn_layers[4].b }), ], update=nesterov_momentum, update_learning_rate=0.001, update_momentum=0.5, objective_l2=0.005, verbose=1, regression=True) encoder.initialize() return encoder
def test_layers_included(self, NeuralNet): def objective(layers_, target, **kwargs): out_a_layer = layers_['output_a'] out_b_layer = layers_['output_b'] # Get the outputs out_a, out_b = get_output([out_a_layer, out_b_layer]) # Get the targets gt_a = T.cast(target[:, 0], 'int32') gt_b = target[:, 1].reshape((-1, 1)) # Calculate the multi task loss cls_loss = aggregate(categorical_crossentropy(out_a, gt_a)) reg_loss = aggregate(categorical_crossentropy(out_b, gt_b)) loss = cls_loss + reg_loss return loss # test that both branches of the multi output network are included, # and also that a single layer isn't included multiple times. l = InputLayer(shape=(None, 1, 28, 28), name="input") l = Conv2DLayer(l, name='conv1', filter_size=(5, 5), num_filters=8) l = Conv2DLayer(l, name='conv2', filter_size=(5, 5), num_filters=8) la = DenseLayer(l, name='hidden_a', num_units=128) la = DenseLayer(la, name='output_a', nonlinearity=softmax, num_units=10) lb = DenseLayer(l, name='hidden_b', num_units=128) lb = DenseLayer(lb, name='output_b', nonlinearity=sigmoid, num_units=1) net = NeuralNet(layers=[la, lb], update_learning_rate=0.5, y_tensor_type=None, regression=True, objective=objective) net.initialize() expected_names = sorted(["input", "conv1", "conv2", "hidden_a", "output_a", "hidden_b", "output_b"]) network_names = sorted(list(net.layers_.keys())) assert (expected_names == network_names)
def model_initial(X_train, y_train, max_iter=5): global params, val_acc params = [] val_acc = np.zeros(max_iter) lr = theano.shared(np.float32(1e-4)) for iteration in range(max_iter): print 'Initializing weights (%d/5) ...' % (iteration + 1) network_init = create_network() net_init = NeuralNet( network_init, max_epochs=3, update=adam, update_learning_rate=lr, train_split=TrainSplit(eval_size=0.1), batch_iterator_train=BatchIterator(batch_size=32), batch_iterator_test=BatchIterator(batch_size=64), on_training_finished=[SaveTrainHistory(iteration=iteration)], verbose=0) net_init.initialize() net_init.fit(X_train, y_train)
def net_color_non_square(NeuralNet): l = InputLayer(shape=(None, 3, 20, 28)) l = Conv2DLayer(l, name='conv1', filter_size=(5, 5), num_filters=1) l = MaxPool2DLayer(l, name='pool1', pool_size=(2, 2)) l = Conv2DLayer(l, name='conv2', filter_size=(5, 5), num_filters=8) l = MaxPool2DLayer(l, name='pool2', pool_size=(2, 2)) l = DenseLayer(l, name='hidden1', num_units=128) l = DenseLayer(l, name='output', nonlinearity=softmax, num_units=10) net = NeuralNet( layers=l, update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, max_epochs=1, ) net.initialize() return net
def make_net(W, H, size1=20, size2=15): net = NeuralNet( layers=[ ('input', InputLayer), ('dense1', DenseLayer), ('dense2', DenseLayer), ('output', DenseLayer), ], input_shape=(None, W * H), dense1_num_units=size1, dense1_nonlinearity=LeakyRectify(leakiness=0.1), dense1_W=HeNormal(), dense1_b=Constant(), dense2_num_units=size2, dense2_nonlinearity=LeakyRectify(leakiness=0.1), dense2_W=HeNormal(), dense2_b=Constant(), output_num_units=4, output_nonlinearity=softmax, output_W=HeNormal(), output_b=Constant(), update=nesterov_momentum, # todo update_learning_rate=shared(float32(1.)), update_momentum=0.9, max_epochs=200, on_epoch_finished=[ StopWhenOverfitting(), StopAfterMinimum(), AdjustLearningRate(1., 0.0001), ], #label_encoder = False, regression=True, verbose=1, batch_iterator_train=BatchIterator(batch_size=128), # todo batch_iterator_test=BatchIterator(batch_size=128), train_split=TrainSplit(eval_size=0.1), ) net.initialize() return net
def create_nn(): ''' Create a neural net with one (or more) layers to fit the featurized data. A single softmax layer is equivalent to doing logistic regression on the featurized data. Result: 53% accuracy. Adding a fully connected hiddent layer boots accuracy to 67%. ''' nn = NeuralNet( layers = [ (InputLayer, { 'name':'input', 'shape':(None,4096) }), # (DropoutLayer, { # 'name':'drop6', # 'p':.5 # }), (DenseLayer, { 'name':'fc7', 'num_units':4096, }), (DenseLayer, { 'name':'output', 'num_units':3, 'nonlinearity':softmax, }) ], update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, # regression=True, # flag to indicate we're dealing with regression problem max_epochs=1000, # we want to train this many epochs verbose=1, train_split=TrainSplit(eval_size=0.25), ) nn.initialize() return nn
def extract_encoder(dbn): dbn_layers = dbn.get_all_layers() encoder = NeuralNet( layers=[ (InputLayer, {'name': 'input', 'shape': dbn_layers[0].shape}), (DenseLayer, {'name': 'l1', 'num_units': dbn_layers[1].num_units, 'nonlinearity': sigmoid, 'W': dbn_layers[1].W, 'b': dbn_layers[1].b}), (DenseLayer, {'name': 'l2', 'num_units': dbn_layers[2].num_units, 'nonlinearity': sigmoid, 'W': dbn_layers[2].W, 'b': dbn_layers[2].b}), (DenseLayer, {'name': 'l3', 'num_units': dbn_layers[3].num_units, 'nonlinearity': sigmoid, 'W': dbn_layers[3].W, 'b': dbn_layers[3].b}), (DenseLayer, {'name': 'l4', 'num_units': dbn_layers[4].num_units, 'nonlinearity': linear, 'W': dbn_layers[4].W, 'b': dbn_layers[4].b}), ], update=adadelta, update_learning_rate=0.01, objective_l2=0.005, verbose=1, regression=True ) encoder.initialize() return encoder
def _create_nnet(self, input_dims, output_dims, learning_rate, num_hidden_units=15, batch_size=32, max_train_epochs=1, hidden_nonlinearity=nonlinearities.rectify, output_nonlinearity=None, update_method=updates.sgd): """ A subclass may override this if a different sort of network is desired. """ nnlayers = [('input', layers.InputLayer), ('hidden', layers.DenseLayer), ('output', layers.DenseLayer)] nnet = NeuralNet( layers=nnlayers, # layer parameters: input_shape=(None, input_dims), hidden_num_units=num_hidden_units, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, output_num_units=output_dims, # optimization method: update=update_method, update_learning_rate=learning_rate, regression= True, # flag to indicate we're dealing with regression problem max_epochs=max_train_epochs, batch_iterator_train=BatchIterator(batch_size=batch_size), train_split=nolearn.lasagne.TrainSplit(eval_size=0), verbose=0, ) nnet.initialize() return nnet
def load_encoder(path): """ load a pretrained dbn from path :param path: path to the .mat dbn :return: pretrained unrolled encoder """ # create the network using weights from pretrain_nn.mat nn = sio.loadmat(path) w1 = nn['w1'] w2 = nn['w2'] w3 = nn['w3'] w4 = nn['w4'] b1 = nn['b1'][0] b2 = nn['b2'][0] b3 = nn['b3'][0] b4 = nn['b4'][0] encoder = NeuralNet( layers=[ (InputLayer, {'name': 'input', 'shape': (None, 1200)}), (DenseLayer, {'name': 'l1', 'num_units': 2000, 'nonlinearity': sigmoid, 'W': w1, 'b': b1}), (DenseLayer, {'name': 'l2', 'num_units': 1000, 'nonlinearity': sigmoid, 'W': w2, 'b': b2}), (DenseLayer, {'name': 'l3', 'num_units': 500, 'nonlinearity': sigmoid, 'W': w3, 'b': b3}), (DenseLayer, {'name': 'l4', 'num_units': 50, 'nonlinearity': linear, 'W': w4, 'b': b4}), ], update=nesterov_momentum, update_learning_rate=0.001, update_momentum=0.5, objective_l2=0.005, verbose=1, regression=True ) encoder.initialize() return encoder
def net_with_nonlinearity_layer(NeuralNet): l = InputLayer(shape=(None, 1, 28, 28)) l = Conv2DLayer(l, name='conv1', filter_size=(5, 5), num_filters=8) l = MaxPool2DLayer(l, name='pool1', pool_size=(2, 2)) l = Conv2DLayer(l, name='conv2', filter_size=(5, 5), num_filters=8) l = MaxPool2DLayer(l, name='pool2', pool_size=(2, 2)) l = DenseLayer(l, name='hidden1', num_units=128) l = DenseLayer(l, name='output', nonlinearity=softmax, num_units=10) l = NonlinearityLayer(l) net = NeuralNet( layers=l, update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, max_epochs=5, on_epoch_finished=[_OnEpochFinished()], verbose=99, ) net.initialize() return net
conv2d8_filter_size=(1,1), conv2d8_nonlinearity=lasagne.nonlinearities.rectify, conv2d8_W=W[7], #output_nonlinearity=lasagne.nonlinearities.softmax,#, # output layer uses identity function #output_num_units=1000, # 1000 target values #output_W = W[7], # optimization method params update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, max_epochs=10, verbose=1, regression=True ) for i, w in enumerate(W): print i, w.shape net1.initialize() import cv2 from training_images import simpleProcessImage img = cv2.imread("/home/simon/python/sklearn-theano/sklearn_theano/datasets/images/cat_and_dog.jpg") crop = simpleProcessImage(img) cv2.imshow("X", crop) res = net1.predict(crop.transpose(2,0,1).reshape(-1,3,231,231)) print res cv2.waitKey()
def create_pretrained_vgg_nn_nolearn(): ''' *** This function need only be run once to create and save a nolearn NeuralNet *** *** instance from the origninal lasagne layer weights for the vgg net. *** Create a vgg neural net. Load pretrained weights. Pickle the entire net. Pickle the mean image. Return a nolearn.NeuralNet instance, mean_image numpy array ''' # define the vgg_s network vgg_nn = NeuralNet( layers = [ (InputLayer, { 'name':'input', 'shape':(None,3,224,224) }), (ConvLayer, { 'name':'conv1', 'num_filters':96, 'filter_size':(7,7), 'stride':2, 'flip_filters':False }), (NormLayer, { 'name':'norm1', 'alpha':.0001 }), (PoolLayer, { 'name':'pool1', 'pool_size':(3,3), 'stride':3, 'ignore_border':False }), (ConvLayer, { 'name':'conv2', 'num_filters':256, 'filter_size':(5,5), 'flip_filters':False # 'pad':2, # 'stride':1 }), (PoolLayer, { 'name':'pool2', 'pool_size':(2,2), 'stride':2, 'ignore_border':False }), (ConvLayer, { 'name':'conv3', 'num_filters':512, 'filter_size':(3,3), 'pad':1, # 'stride':1 'flip_filters':False }), (ConvLayer, { 'name':'conv4', 'num_filters':512, 'filter_size':(3,3), 'pad':1, # 'stride':1 'flip_filters':False }), (ConvLayer, { 'name':'conv5', 'num_filters':512, 'filter_size':(3,3), 'pad':1, # 'stride':1 'flip_filters':False }), (PoolLayer, { 'name':'pool5', 'pool_size':(3,3), 'stride':3, 'ignore_border':False }), (DenseLayer,{ 'name':'fc6', 'num_units':4096 }), (DropoutLayer, { 'name':'drop6', 'p':.5 }), (DenseLayer, { 'name':'fc7', 'num_units':4096 }), ], # # optimization method: update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, # Do not need these unless trainng the net. # regression=True, # flag to indicate we're dealing with regression problem # max_epochs=400, # we want to train this many epochs # verbose=1, ) # upload pretrained weights vgg_nn.initialize() vgg_nn.load_params_from('./vgg_nolearn_saved_wts_biases.pkl') # upload mean image model = pickle.load(open('./vgg_cnn_s.pkl')) mean_image = model['mean image'] # pickel the model and the mean image with open("/data/mean_image.pkl", 'w') as f: pickle.dump(mean_image, f) with open("/data/full_vgg.pkl", 'w') as f: pickle.dump(vgg_nn, f) return vgg_net, mean_image
class NNet(BaseEstimator, ClassifierMixin): def __init__( self, name='nameless_net', # used for saving, so maybe make it unique dense1_size=60, dense1_nonlinearity='tanh', dense1_init='orthogonal', dense2_size=None, dense2_nonlinearity=None, # inherits dense1 dense2_init=None, # inherits dense1 dense3_size=None, dense3_nonlinearity=None, # inherits dense2 dense3_init=None, # inherits dense2 learning_rate=0.001, learning_rate_scaling=100, momentum=0.9, momentum_scaling=100, max_epochs=3000, epoch_steps=None, dropout0_rate=0, # this is the input layer dropout1_rate=None, dropout2_rate=None, # inherits dropout1_rate dropout3_rate=None, # inherits dropout2_rate weight_decay=0, adaptive_weight_decay=False, batch_size=128, output_nonlinearity='softmax', auto_stopping=True, save_snapshots_stepsize=None, ): """ Create the network with the selected parameters. :param name: Name for save files :param dense1_size: Number of neurons for first hidden layer :param dense1_nonlinearity: The activation function for the first hidden layer :param dense1_init: The weight initialization for the first hidden layer :param learning_rate: The (initial) learning rate (how fast the network learns) :param learning_rate_scaling: The total factor to gradually decrease the learning rate by :param momentum: The (initial) momentum :param momentum_scaling: Similar to learning_rate_scaling :param max_epochs: Total number of epochs (at most) :param dropout1_rate: Percentage of connections dropped each step for first hidden layer :param weight_decay: Palatalizes the weights by L2 norm (regularizes but decreases results) :param adaptive_weight_decay: Should the weight decay adapt automatically? :param batch_size: How many samples to send through the network at a time :param auto_stopping: Stop early if the network seems to stop performing well :param pretrain: Filepath of the previous weights to start at (or None) :return: """ """ Input argument storage: automatically store all locals, which should be exactly the arguments at this point, but storing a little too much is not a big problem. """ params = locals() del params['self'] #self.__dict__.update(params) self.parameter_names = sorted(params.keys()) """ Check the parameters and update some defaults (will be done for 'self', no need to store again). """ self.set_params(**params) def init_net(self, feature_count, class_count=NCLASSES, verbosity=VERBOSITY >= 2): """ Initialize the network (needs to be done when data is available in order to set dimensions). """ if VERBOSITY >= 1: print 'initializing network {0:s} {1:d}x{2:d}x{3:d}'.format( self.name, self.dense1_size or 0, self.dense2_size or 0, self.dense3_size or 0) if VERBOSITY >= 2: print 'parameters: ' + ', '.join( '{0:s} = {1:}'.format(k, v) for k, v in self.get_params(deep=False).items()) self.feature_count = feature_count self.class_count = class_count """ Create the layers and their settings. """ self.layers = [ ('input', InputLayer), ] self.params = { 'dense1_num_units': self.dense1_size, 'dense1_nonlinearity': nonlinearities[self.dense1_nonlinearity], 'dense1_W': initializers[self.dense1_init], 'dense1_b': Constant(0.), } if self.dropout0_rate: self.layers += [('dropout0', DropoutLayer)] self.params['dropout0_p'] = self.dropout0_rate self.layers += [ ('dense1', DenseLayer), ] if self.dropout1_rate: self.layers += [('dropout1', DropoutLayer)] self.params['dropout1_p'] = self.dropout1_rate if self.dense2_size: self.layers += [('dense2', DenseLayer)] self.params.update({ 'dense2_num_units': self.dense2_size, 'dense2_nonlinearity': nonlinearities[self.dense2_nonlinearity], 'dense2_W': initializers[self.dense2_init], 'dense2_b': Constant(0.), }) else: assert not self.dense3_size, 'There cannot be a third dense layer without a second one' if self.dropout2_rate: assert self.dense2_size is not None, 'There cannot be a second dropout layer without a second dense layer.' self.layers += [('dropout2', DropoutLayer)] self.params['dropout2_p'] = self.dropout2_rate if self.dense3_size: self.layers += [('dense3', DenseLayer)] self.params.update({ 'dense3_num_units': self.dense3_size, 'dense3_nonlinearity': nonlinearities[self.dense3_nonlinearity], 'dense3_W': initializers[self.dense3_init], 'dense3_b': Constant(0.), }) if self.dropout3_rate: assert self.dense2_size is not None, 'There cannot be a third dropout layer without a third dense layer.' self.layers += [('dropout3', DropoutLayer)] self.params['dropout3_p'] = self.dropout2_rate self.layers += [('output', DenseLayer)] self.params.update({ 'output_nonlinearity': nonlinearities[self.output_nonlinearity], 'output_W': GlorotUniform(), 'output_b': Constant(0.), }) """ Create meta parameters and special handlers. """ if VERBOSITY >= 3: print 'learning rate: {0:.6f} -> {1:.6f}'.format( abs(self.learning_rate), abs(self.learning_rate) / float(self.learning_rate_scaling)) print 'momentum: {0:.6f} -> {1:.6f}'.format( abs(self.momentum), 1 - ((1 - abs(self.momentum)) / float(self.momentum_scaling))) self.step_handlers = [ LinearVariable('update_learning_rate', start=abs(self.learning_rate), stop=abs(self.learning_rate) / float(self.learning_rate_scaling)), LinearVariable( 'update_momentum', start=abs(self.momentum), stop=1 - ((1 - abs(self.momentum)) / float(self.momentum_scaling))), StopNaN(), ] self.end_handlers = [ SnapshotEndSaver(base_name=self.name), TrainProgressPlotter(base_name=self.name), ] snapshot_name = 'nn_' + params_name(self.params, prefix=self.name)[0] if self.save_snapshots_stepsize: self.step_handlers += [ SnapshotStepSaver(every=self.save_snapshots_stepsize, base_name=snapshot_name), ] if self.auto_stopping: self.step_handlers += [ StopWhenOverfitting(loss_fraction=0.9, base_name=snapshot_name), StopAfterMinimum(patience=40, base_name=self.name), ] weight_decay = shared(float32(abs(self.weight_decay)), 'weight_decay') if self.adaptive_weight_decay: self.step_handlers += [ AdaptiveWeightDecay(weight_decay), ] if self.epoch_steps: self.step_handlers += [ BreakEveryN(self.epoch_steps), ] """ Create the actual nolearn network with information from __init__. """ self.net = NeuralNet( layers=self.layers, objective=partial(WeightDecayObjective, weight_decay=weight_decay), input_shape=(None, feature_count), output_num_units=class_count, update=nesterov_momentum, # todo: make parameter update_learning_rate=shared(float32(self.learning_rate)), update_momentum=shared(float(self.weight_decay)), on_epoch_finished=self.step_handlers, on_training_finished=self.end_handlers, regression=False, max_epochs=self.max_epochs, verbose=verbosity, batch_iterator_train=BatchIterator(batch_size=self.batch_size), batch_iterator_test=BatchIterator(batch_size=self.batch_size), eval_size=0.1, #custom_score = ('custom_loss', categorical_crossentropy), **self.params) self.net.parent = self self.net.initialize() return self.net def get_params(self, deep=True): return OrderedDict( (name, getattr(self, name)) for name in self.parameter_names) def set_params(self, **params): """ Set all the parameters. """ for name, val in params.items(): assert name in self.parameter_names, '"{0:s}" is not a valid parameter name (known parameters: "{1:s}")'.format( name, '", "'.join(self.parameter_names)) setattr(self, name, val) """ Arguments checks. """ assert self.dropout1_rate is None or 0 <= self.dropout1_rate < 1, 'Dropout rate 1 should be a value between 0 and 1 (value: {0})'.format( self.dropout1_rate) assert self.dropout2_rate is None or 0 <= self.dropout2_rate < 1, 'Dropout rate 2 should be a value between 0 and 1, or None for inheritance (value: {0})'.format( self.dropout2_rate) assert self.dropout3_rate is None or 0 <= self.dropout3_rate < 1, 'Dropout rate 3 should be a value between 0 and 1, or None for inheritance (value: {0})'.format( self.dropout3_rate) assert self.dense1_nonlinearity in nonlinearities.keys( ), 'Linearity 1 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(nonlinearities.keys()), self.dense1_nonlinearity) assert self.dense2_nonlinearity in nonlinearities.keys() + [ None ], 'Linearity 2 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(nonlinearities.keys()), self.dense2_nonlinearity) assert self.dense3_nonlinearity in nonlinearities.keys() + [ None ], 'Linearity 3 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(nonlinearities.keys()), self.dense3_nonlinearity) assert self.dense1_init in initializers.keys( ), 'Initializer 1 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(initializers.keys()), self.dense1_init) assert self.dense2_init in initializers.keys() + [ None ], 'Initializer 2 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(initializers.keys()), self.dense2_init) assert self.dense3_init in initializers.keys() + [ None ], 'Initializer 3 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(initializers.keys()), self.dense3_init) """ Argument defaults. """ if self.dense2_nonlinearity is None: self.dense2_nonlinearity = self.dense1_nonlinearity if self.dense2_init is None: self.dense2_init = self.dense1_init if self.dense3_nonlinearity is None: self.dense3_nonlinearity = self.dense2_nonlinearity if self.dense3_init is None: self.dense3_init = self.dense2_init if self.dropout2_rate is None and self.dense2_size: self.dropout2_rate = self.dropout1_rate if self.dropout3_rate is None and self.dense3_size: self.dropout3_rate = self.dropout2_rate def fit(self, X, y, random_sleep=None): if random_sleep: sleep(random_sleep * random()) # this is to prevent compiler lock problems labels = y - y.min() #todo: don't use labels.max(), occasionally (rarely) it will not have the highest class self.init_net(feature_count=X.shape[1], class_count=labels.max() + 1) net = self.net.fit(X, labels) self.save() return net def interrupted_fit(self, X, y): """ DEPRECATED """ labels = y - y.min() self.init_net(feature_count=X.shape[1], class_count=labels.max() + 1) knowledge = get_knowledge(self.net) for epoch in range(0, self.max_epochs, self.epoch_steps): set_knowledge(self.net, knowledge) self.init_net(feature_count=X.shape[1], class_count=labels.max() + 1) print 'epoch {0:d}: learning {1:d} epochs'.format( epoch, self.epoch_steps) self.net.fit(X, labels) ratio = mean([d['valid_loss'] for d in self.net._train_history[-self.epoch_steps:]]) / \ mean([d['train_loss'] for d in self.net._train_history[-self.epoch_steps:]]) if ratio < 0.85: self.weight_decay *= 1.3 if ratio > 0.95: self.weight_decay /= 1.2 self.init_net(feature_count=X.shape[1], class_count=labels.max() + 1) knowledge = get_knowledge(self.net) exit() net = self.net.fit(X, labels) self.save() return net def predict_proba(self, X): probs = self.net.predict_proba(X) if not isfinite(probs).sum(): errmsg = 'network "{0:s}" predicted infinite/NaN probabilities'.format( self.name) stderr.write(errmsg) raise DivergenceError(errmsg) return probs def predict(self, X): return self.net.predict(X) def score(self, X, y, **kwargs): return self.net.score(X, y) def save(self, filepath=None): assert hasattr( self, 'net' ), 'Cannot save a network that is not initialized; .fit(X, y) something first [or use net.initialize(..) for random initialization].' parameters = self.get_params(deep=False) filepath = filepath or join(NNET_STATE_DIR, self.name) if VERBOSITY >= 1: print 'saving network to "{0:s}.net.npz|json"'.format(filepath) with open(filepath + '.net.json', 'w+') as fh: dump([parameters, self.feature_count, self.class_count], fp=fh, indent=2) save_knowledge(self.net, filepath + '.net.npz') @classmethod def load(cls, filepath=None, name=None): """ :param filepath: The base path (without extension) to load the file from, OR: :param name: The name of the network to load (if filename is not given) :return: The loaded network """ filepath = filepath or join(NNET_STATE_DIR, name) if VERBOSITY >= 1: print 'loading network from "{0:s}.net.npz|json"'.format(filepath) with open(filepath + '.net.json', 'r') as fh: [parameters, feature_count, class_count] = load(fp=fh) nnet = cls(**parameters) nnet.init_net(feature_count=feature_count, class_count=class_count) load_knowledge(nnet.net, filepath + '.net.npz') return nnet
def create_pretrained_vgg_nn_nolearn(): ''' *** This function need only be run once to create and save a nolearn NeuralNet *** *** instance from the origninal lasagne layer weights for the vgg net. *** Create a vgg neural net. Load pretrained weights. Pickle the entire net. Pickle the mean image. Return a nolearn.NeuralNet instance, mean_image numpy array ''' # define the vgg_s network vgg_nn = NeuralNet( layers=[ (InputLayer, { 'name': 'input', 'shape': (None, 3, 224, 224) }), (ConvLayer, { 'name': 'conv1', 'num_filters': 96, 'filter_size': (7, 7), 'stride': 2, 'flip_filters': False }), (NormLayer, { 'name': 'norm1', 'alpha': .0001 }), (PoolLayer, { 'name': 'pool1', 'pool_size': (3, 3), 'stride': 3, 'ignore_border': False }), ( ConvLayer, { 'name': 'conv2', 'num_filters': 256, 'filter_size': (5, 5), 'flip_filters': False # 'pad':2, # 'stride':1 }), (PoolLayer, { 'name': 'pool2', 'pool_size': (2, 2), 'stride': 2, 'ignore_border': False }), ( ConvLayer, { 'name': 'conv3', 'num_filters': 512, 'filter_size': (3, 3), 'pad': 1, # 'stride':1 'flip_filters': False }), ( ConvLayer, { 'name': 'conv4', 'num_filters': 512, 'filter_size': (3, 3), 'pad': 1, # 'stride':1 'flip_filters': False }), ( ConvLayer, { 'name': 'conv5', 'num_filters': 512, 'filter_size': (3, 3), 'pad': 1, # 'stride':1 'flip_filters': False }), (PoolLayer, { 'name': 'pool5', 'pool_size': (3, 3), 'stride': 3, 'ignore_border': False }), (DenseLayer, { 'name': 'fc6', 'num_units': 4096 }), (DropoutLayer, { 'name': 'drop6', 'p': .5 }), (DenseLayer, { 'name': 'fc7', 'num_units': 4096 }), ], # # optimization method: update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, # Do not need these unless trainng the net. # regression=True, # flag to indicate we're dealing with regression problem # max_epochs=400, # we want to train this many epochs # verbose=1, ) # upload pretrained weights vgg_nn.initialize() vgg_nn.load_params_from('./vgg_nolearn_saved_wts_biases.pkl') # upload mean image model = pickle.load(open('./vgg_cnn_s.pkl')) mean_image = model['mean image'] # pickel the model and the mean image with open("/data/mean_image.pkl", 'w') as f: pickle.dump(mean_image, f) with open("/data/full_vgg.pkl", 'w') as f: pickle.dump(vgg_nn, f) return vgg_net, mean_image
def main(): seed = 12345 np.random.seed(seed) set_lasagne_rng(RandomState(seed)) LOOKUP_PATH = os.path.join(WDIR, 'data', 'HIV.pkl') lookup = pickle.load(open(LOOKUP_PATH, 'rb')) data_list = lookup['data'] y = lookup['y'] labels = lookup['labels'] nmark = len(labels) # event occurence list occurred = [x for i, x in enumerate(data_list) if y[i, 1] == 1] not_occurred = [x for i, x in enumerate(data_list) if y[i, 1] == 0] y1 = y[y[:, 1] == 1] y0 = y[y[:, 1] == 0] # split the examples randomly into a training (2/3) and test (1/3) cohort # both cohorts should contain equal percentage of cencored data sep1 = len(y1) / 3 sep0 = len(y0) / 3 # include only uncensored data from the training cohort for training CellCnn tr_list = occurred[sep1:] tr_stime = y1[sep1:, 0].astype(float) # transform survival times to [-1, 1] interval by ranking them tr_stime = (ss.rankdata(tr_stime) / (0.5 * len(tr_stime))) - 1 # fit scaler to all training data sc = StandardScaler() sc.fit(np.vstack(occurred[sep1:] + not_occurred[sep0:])) tr_list = [sc.transform(x) for x in tr_list] # the test cohort validation_list = [ sc.transform(x) for x in (occurred[:sep1] + not_occurred[:sep0]) ] y_valid = np.vstack([y1[:sep1], y0[:sep0]]) # cross validation on the training cohort nfold = 10 nfilter = 3 skf = KFold(len(tr_list), n_folds=nfold, shuffle=True) committee = [] valid_accuracy = [] accum_w = np.empty((nfilter * nfold, nmark + 2)) for ifold, (train_index, test_index) in enumerate(skf): cv_train_samples = [tr_list[t_idx] for t_idx in train_index] cv_test_samples = [tr_list[t_idx] for t_idx in test_index] cv_y_train = list(tr_stime[train_index]) cv_y_test = list(tr_stime[test_index]) results = train_model(cv_train_samples, cv_y_train, labels, valid_samples=cv_test_samples, valid_phenotypes=cv_y_test, ncell=500, nsubset=200, subset_selection='random', nrun=3, pooling='mean', regression=True, nfilter=nfilter, learning_rate=0.03, momentum=0.9, l2_weight_decay_conv=1e-8, l2_weight_decay_out=1e-8, max_epochs=20, verbose=1, select_filters='best', accur_thres=-1) net_dict = results['best_net'] # update the committee of networks committee.append(net_dict) valid_accuracy.append(results['best_accuracy']) w_tot = param_vector(net_dict, regression=True) # add weights to accumulator accum_w[ifold * nfilter:(ifold + 1) * nfilter] = w_tot save_path = os.path.join(OUTDIR, 'network_committee.pkl') with open(save_path, 'wb') as f: pickle.dump((committee, valid_accuracy), f, -1) ''' committee, valid_accuracy = pickle.load(open(save_path, 'r')) # retrieve the filter weights for ifold, net_dict in enumerate(committee): w_tot = param_vector(net_dict, regression=True) # add weights to accumulator accum_w[ifold*nfilter:(ifold+1)*nfilter] = w_tot ''' # choose the strong signatures (all of them) w_strong = accum_w # members of each cluster should have cosine similarity > 0.7 # equivalently, cosine distance < 0.3 Z = linkage(w_strong, 'average', metric='cosine') clusters = fcluster(Z, .3, criterion='distance') - 1 n_clusters = len(np.unique(clusters)) print '%d clusters chosen' % (n_clusters) # plot the discovered filter profiles plt.figure(figsize=(3, 2)) idx = range(nmark) + [nmark + 1] clmap = sns.clustermap(pd.DataFrame(w_strong[:, idx], columns=labels + ['survival']), method='average', metric='cosine', row_linkage=Z, col_cluster=False, robust=True, yticklabels=clusters) clmap.cax.set_visible(False) fig_path = os.path.join(OUTDIR, 'HIV_clmap.eps') clmap.savefig(fig_path, format='eps') plt.close() # generate the consensus filter profiles c = Counter(clusters) cons = [] for key, val in c.items(): if val > nfold / 2: cons.append(np.mean(w_strong[clusters == key], axis=0)) cons_mat = np.vstack(cons) # plot the consensus filter profiles plt.figure(figsize=(10, 3)) idx = range(nmark) + [nmark + 1] ax = sns.heatmap(pd.DataFrame(cons_mat[:, idx], columns=labels + ['survival']), robust=True, yticklabels=False) plt.xticks(rotation=90) ax.tick_params(axis='both', which='major', labelsize=20) plt.tight_layout() fig_path = os.path.join(OUTDIR, 'clmap_consensus.eps') plt.savefig(fig_path, format='eps') plt.close() # create an ensemble of neural networks ncell_cons = 3000 ncell_voter = 3000 layers_voter = [(layers.InputLayer, { 'name': 'input', 'shape': (None, nmark, ncell_voter) }), (layers.Conv1DLayer, { 'name': 'conv', 'num_filters': nfilter, 'filter_size': 1 }), (layers.Pool1DLayer, { 'name': 'meanPool', 'pool_size': ncell_voter, 'mode': 'average_exc_pad' }), (layers.DenseLayer, { 'name': 'output', 'num_units': 1, 'nonlinearity': T.tanh })] # predict on the test cohort small_data_list_v = [ x[:ncell_cons].T.reshape(1, nmark, ncell_cons) for x in validation_list ] data_v = np.vstack(small_data_list_v) stime, censor = y_valid[:, 0], y_valid[:, 1] # committee of the best nfold/2 models voter_risk_pred = list() for ifold in np.argsort(valid_accuracy): voter = NeuralNet(layers=layers_voter, update=nesterov_momentum, update_learning_rate=0.001, regression=True, max_epochs=5, verbose=0) voter.load_params_from(committee[ifold]) voter.initialize() # rank the risk predictions voter_risk_pred.append(ss.rankdata(-np.squeeze(voter.predict(data_v)))) all_voters = np.vstack(voter_risk_pred) # compute mean rank per individual risk_p = np.mean(all_voters, axis=0) g1 = np.squeeze(risk_p > np.median(risk_p)) voters_pval_v = logrank_pval(stime, censor, g1) fig_v = os.path.join(OUTDIR, 'cellCnn_cox_test.eps') plot_KM(stime, censor, g1, voters_pval_v, fig_v) # filter-activating cells data_t = np.vstack(small_data_list_v) data_stack = np.vstack([x for x in np.swapaxes(data_t, 2, 1)]) # finally define a network from the consensus filters nfilter_cons = cons_mat.shape[0] ncell_cons = 3000 layers_cons = [(layers.InputLayer, { 'name': 'input', 'shape': (None, nmark, ncell_cons) }), (layers.Conv1DLayer, { 'name': 'conv', 'b': init.Constant(cons_mat[:, -2]), 'W': cons_mat[:, :-2].reshape(nfilter_cons, nmark, 1), 'num_filters': nfilter_cons, 'filter_size': 1 }), (layers.Pool1DLayer, { 'name': 'meanPool', 'pool_size': ncell_cons, 'mode': 'average_exc_pad' }), (layers.DenseLayer, { 'name': 'output', 'num_units': 1, 'W': np.sign(cons_mat[:, -1:]), 'b': init.Constant(0.), 'nonlinearity': T.tanh })] net_cons = NeuralNet(layers=layers_cons, update=nesterov_momentum, update_learning_rate=0.001, regression=True, max_epochs=5, verbose=0) net_cons.initialize() # get the representation after mean pooling xs = T.tensor3('xs').astype(theano.config.floatX) act_conv = theano.function([xs], lh.get_output(net_cons.layers_['conv'], xs)) # and apply to the test data act_tot = act_conv(data_t) act_tot = np.swapaxes(act_tot, 2, 1) act_stack = np.vstack([x for x in act_tot]) idx = range(7) + [8, 9] for i_map in range(nfilter_cons): val = act_stack[:, i_map] descending_order = np.argsort(val)[::-1] val_cumsum = np.cumsum(val[descending_order]) data_sorted = data_stack[descending_order] thres = 0.75 * val_cumsum[-1] res_data = data_sorted[val_cumsum < thres] fig_path = os.path.join(OUTDIR, 'filter_' + str(i_map) + '_active.eps') plot_marker_distribution([res_data[:, idx], data_stack[:, idx]], ['filter ' + str(i_map), 'all'], [labels[l] for l in idx], (3, 3), fig_path, 24)
class LasagneToNolearn(object): """This class builds the VGG_CNN_S model from pickled weights and biases (vgg_cnn_s.pkl) in Lasagne and converts the model for use in Nolearn. Nolearn is a Lasagne wrapper that is used here to facilitate and increase speed of vectorizing images. VGG_CNN_S is a Convolutional Neural Network (CNN) trained by the Visual Geometry Group at Oxford Univeristy. More information on this CNN can be found elsewhere: The Devil is in the Details: An evaluation of recent feature encoding methods K. Chatfield, V. Lempitsky, A. Vedaldi and A. Zisserman, In Proc. BMVC, 2011. http://www.robots.ox.ac.uk/~vgg/research/deep_eval/ vgg_cnn_s.pkl was obtained from the Lasagne Model Zoo: https://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg_cnn_s.pkl """ def __init__(self, path_to_pkl): ''' INPUT: Local path to vgg_cnn_s.pkl OUTPUT: Points to path of the stored weights and biases. ''' self.path_to_pkl = path_to_pkl def lasagne_layers_method(self): ''' INPUT: None OUTPUT: Dict Creates dictionary of vgg_cnn_s model Lasagne layer objects. Here the original output layer (softmax, 1000 classes) has been removed and the output layer returns a vector of shape (1,4096). ''' # Create dictionary of VGG_CNN_S model layers self.lasagne_layers = {} self.lasagne_layers['input'] = InputLayer((None, 3, 224, 224)) self.lasagne_layers['conv1'] = ConvLayer(self.lasagne_layers['input'], num_filters=96, filter_size=7, stride=2, flip_filters=False) self.lasagne_layers['norm1'] = NormLayer(self.lasagne_layers['conv1'], alpha=0.0001) self.lasagne_layers['pool1'] = PoolLayer(self.lasagne_layers['norm1'], pool_size=3, stride=3, ignore_border=False) self.lasagne_layers['conv2'] = ConvLayer(self.lasagne_layers['pool1'], num_filters=256, filter_size=5, flip_filters=False) self.lasagne_layers['pool2'] = PoolLayer(self.lasagne_layers['conv2'], pool_size=2, stride=2, ignore_border=False) self.lasagne_layers['conv3'] = ConvLayer(self.lasagne_layers['pool2'], num_filters=512, filter_size=3, pad=1, flip_filters=False) self.lasagne_layers['conv4'] = ConvLayer(self.lasagne_layers['conv3'], num_filters=512, filter_size=3, pad=1, flip_filters=False) self.lasagne_layers['conv5'] = ConvLayer(self.lasagne_layers['conv4'], num_filters=512, filter_size=3, pad=1, flip_filters=False) self.lasagne_layers['pool5'] = PoolLayer(self.lasagne_layers['conv5'], pool_size=3, stride=3, ignore_border=False) self.lasagne_layers['fc6'] = DenseLayer(self.lasagne_layers['pool5'], num_units=4096) self.lasagne_layers['drop6'] = DropoutLayer(self.lasagne_layers['fc6'], p=0.5) self.lasagne_layers['fc7'] = DenseLayer(self.lasagne_layers['drop6'], num_units=4096) def build_lasagne(self): ''' INPUT: None OUTPUT: None Builds the CNN model using Lasagne. ''' model = pickle.load(open(self.path_to_pkl)) output_layer = self.lasagne_layers['fc7'] self.mean_image = model['mean image'] lasagne.layers.set_all_param_values(output_layer, model['values'][:14]) def extract_layers(self): ''' INPUT: None OUTPUT: None Extracts relavent layers from Lasagne model for use with Nolearn model. ''' self.extracted_layers = {} for layer in self.lasagne_layers: if layer[:4] != 'drop' and layer != 'input' and \ layer[:4] != 'pool' and layer[:4] != 'norm': self.extracted_layers[layer] = [ self.lasagne_layers[layer].W.get_value(), self.lasagne_layers[layer].b.get_value() ] def nolearn_layers_method(self): ''' INPUT: None OUTPUT: None Creates list of layers for Nolearn model. ''' self.nolearn_layers = [(InputLayer, { 'name': 'input', 'shape': (None, 3, 224, 224) }), (ConvLayer, { 'name': 'conv1', 'num_filters': 96, 'filter_size': (7, 7), 'stride': 2, 'flip_filters': False, 'W': self.extracted_layers['conv1'][0], 'b': self.extracted_layers['conv1'][1] }), (NormLayer, { 'name': 'norm11', 'alpha': .0001 }), (PoolLayer, { 'name': 'pool1', 'pool_size': (3, 3), 'stride': 3, 'ignore_border': False }), (ConvLayer, { 'name': 'conv2', 'num_filters': 256, 'filter_size': (5, 5), 'flip_filters': False, 'W': self.extracted_layers['conv2'][0], 'b': self.extracted_layers['conv2'][1] }), (PoolLayer, { 'name': 'pool2', 'pool_size': (2, 2), 'stride': 2, 'ignore_border': False }), (ConvLayer, { 'name': 'conv3', 'num_filters': 512, 'filter_size': (3, 3), 'flip_filters': False, 'pad': 1, 'W': self.extracted_layers['conv3'][0], 'b': self.extracted_layers['conv3'][1] }), (ConvLayer, { 'name': 'conv4', 'num_filters': 512, 'filter_size': (3, 3), 'flip_filters': False, 'pad': 1, 'W': self.extracted_layers['conv4'][0], 'b': self.extracted_layers['conv4'][1] }), (ConvLayer, { 'name': 'conv5', 'num_filters': 512, 'filter_size': (3, 3), 'flip_filters': False, 'pad': 1, 'W': self.extracted_layers['conv5'][0], 'b': self.extracted_layers['conv5'][1] }), (PoolLayer, { 'name': 'pool5', 'pool_size': (3, 3), 'stride': 3, 'ignore_border': False }), (DenseLayer, { 'name': 'fc6', 'num_units': 4096, 'W': self.extracted_layers['fc6'][0], 'b': self.extracted_layers['fc6'][1] }), (DropoutLayer, { 'name': 'drop6', 'p': 0.5 }), (DenseLayer, { 'name': 'fc7', 'num_units': 4096, 'W': self.extracted_layers['fc7'][0], 'b': self.extracted_layers['fc7'][1] })] def build_nolearn(self): ''' INPUT: None OUTPUT: None Builds CNN model using Nolearn. ''' self.nolearn_layers_method() self.nn = NeuralNet(layers=self.nolearn_layers, update=adam, update_learning_rate=0.0002) self.nn.initialize() def to_pickle(self, path): ''' INPUT: Local path where pickle files will be stored OUTPUT: Two pickle files Pickles the Nolearn model as well as the mean image. ''' joblib.dump(self.nn, '/home/ubuntu/vintage-classifier/pkls/nolearn_nn.pkl', compress=9) joblib.dump(self.mean_image, '/home/ubuntu/vintage-classifier/pkls/mean_image.pkl', compress=9)
input_shape=(None, num_features), dense_num_units=64, narrow_num_units=48, denseReverse1_num_units=64, denseReverse2_num_units=128, output_num_units=128, #input_nonlinearity = None, #nonlinearities.sigmoid, #dense_nonlinearity = nonlinearities.tanh, narrow_nonlinearity=nonlinearities.softplus, #denseReverse1_nonlinearity = nonlinearities.tanh, denseReverse2_nonlinearity=nonlinearities.softplus, output_nonlinearity=nonlinearities.linear, #nonlinearities.softmax, #dropout0_p=0.1, dropout1_p=0.01, dropout2_p=0.001, regression=True, verbose=1) ae.initialize() PrintLayerInfo()(ae) maybe_this_is_a_history = ae.fit(Z, Z) #learned_parameters = ae.get_all_params_values() #np.save("task4/learned_parameter.npy", learned_parameters) #SaveWeights(path='task4/koebi_train_history_AE')(ae, maybe_this_is_a_history) ae.save_params_to('task4/koebi_train_history_AE2')
def make_net( NFEATS, name='hidden1_size', dense1_size=60, dense1_nonlinearity='tanh', dense1_init='orthogonal', dense2_size=None, dense2_nonlinearity=None, # inherits dense1 dense2_init=None, # inherits dense1 dense3_size=None, dense3_nonlinearity=None, # inherits dense2 dense3_init=None, # inherits dense2 learning_rate=0.001, learning_rate_scaling=100, momentum=0.9, momentum_scaling=100, max_epochs=3000, dropout1_rate=None, dropout2_rate=None, # inherits dropout1_rate dropout3_rate=None, weight_decay=0, output_nonlinearity='softmax', auto_stopping=True, pretrain=False, save_snapshots_stepsize=None, verbosity=VERBOSITY >= 2, ): """ Create the network with the selected parameters. :param name: Name for save files :param dense1_size: Number of neurons for first hidden layer :param dense1_nonlinearity: The activation function for the first hidden layer :param dense1_init: The weight initialization for the first hidden layer :param learning_rate_start: Start value at first epoch (logarithmic scale) :param learning_rate_end: End value at last epoch (logarithmic scale) :param momentum_start: Start value at first epoch (logarithmic scale) :param momentum_end: End value at last epoch (logarithmic scale) :param max_epochs: Total number of epochs (at most) :param dropout1_rate: Percentage of connections dropped each step. :param weight_decay: Constrain the weights by L2 norm. :param auto_stopping: Stop early if the network seems to stop performing well. :param pretrain: Filepath of the previous weights to start at (or None). :return: """ """ Initial arguments checks and defaults. """ assert dropout1_rate is None or 0 <= dropout1_rate < 1, 'Dropout rate 1 should be a value between 0 and 1' assert dropout2_rate is None or 0 <= dropout1_rate < 1, 'Dropout rate 2 should be a value between 0 and 1, or None for inheritance' assert dropout3_rate is None or 0 <= dropout1_rate < 1, 'Dropout rate 3 should be a value between 0 and 1, or None for inheritance' assert dense1_nonlinearity in nonlinearities.keys( ), 'Linearity 1 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(nonlinearities.keys()), dense1_nonlinearity) assert dense2_nonlinearity in nonlinearities.keys() + [ None ], 'Linearity 2 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(nonlinearities.keys()), dense2_nonlinearity) assert dense3_nonlinearity in nonlinearities.keys() + [ None ], 'Linearity 3 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(nonlinearities.keys()), dense3_nonlinearity) assert dense1_init in initializers.keys( ), 'Initializer 1 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(initializers.keys()), dense1_init) assert dense2_init in initializers.keys() + [ None ], 'Initializer 2 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(initializers.keys()), dense2_init) assert dense3_init in initializers.keys() + [ None ], 'Initializer 3 should be one of "{0}", got "{1}" instead.'.format( '", "'.join(initializers.keys()), dense3_init) if dense2_nonlinearity is None: dense2_nonlinearity = dense1_nonlinearity if dense2_init is None: dense2_init = dense1_init if dense3_nonlinearity is None: dense3_nonlinearity = dense2_nonlinearity if dense3_init is None: dense3_init = dense2_init if dropout2_rate is None and dense2_size: dropout2_rate = dropout1_rate if dropout3_rate is None and dense3_size: dropout3_rate = dropout2_rate """ Create the layers and their settings. """ params = {} layers = [ ('input', InputLayer), ('dense1', DenseLayer), ] if dropout1_rate: layers += [('dropout1', DropoutLayer)] params['dropout1_p'] = dropout1_rate if dense2_size: layers += [('dense2', DenseLayer)] params.update({ 'dense2_num_units': dense2_size, 'dense2_nonlinearity': nonlinearities[dense2_nonlinearity], 'dense2_W': initializers[dense2_init], 'dense2_b': Constant(0.), }) else: assert dense3_size is None, 'There cannot be a third dense layer without a second one' if dropout2_rate: assert dense2_size is not None, 'There cannot be a second dropout layer without a second dense layer.' layers += [('dropout2', DropoutLayer)] params['dropout2_p'] = dropout2_rate if dense3_size: layers += [('dense3', DenseLayer)] params.update({ 'dense3_num_units': dense3_size, 'dense3_nonlinearity': nonlinearities[dense3_nonlinearity], 'dense3_W': initializers[dense3_init], 'dense3_b': Constant(0.), }) if dropout3_rate: assert dense2_size is not None, 'There cannot be a third dropout layer without a third dense layer.' layers += [('dropout3', DropoutLayer)] params['dropout3_p'] = dropout2_rate layers += [('output', DenseLayer)] """ Create meta parameters and special handlers. """ if VERBOSITY >= 3: print 'learning rate: {0:.6f} -> {1:.6f}'.format( learning_rate, learning_rate / float(learning_rate_scaling)) print 'momentum: {0:.6f} -> {1:.6f}'.format( momentum, 1 - ((1 - momentum) / float(momentum_scaling))) handlers = [ LogarithmicVariable('update_learning_rate', start=learning_rate, stop=learning_rate / float(learning_rate_scaling)), LogarithmicVariable('update_momentum', start=momentum, stop=1 - ((1 - momentum) / float(momentum_scaling))), StopNaN(), ] snapshot_name = 'nn_' + params_name(params, prefix=name)[0] if save_snapshots_stepsize: handlers += [ SnapshotStepSaver(every=save_snapshots_stepsize, base_name=snapshot_name), ] if auto_stopping: handlers += [ StopWhenOverfitting(loss_fraction=0.8, base_name=snapshot_name), StopAfterMinimum(patience=40, base_name=name), ] """ Create the actual nolearn network with above information. """ net = NeuralNet(layers=layers, objective=partial(WeightDecayObjective, weight_decay=weight_decay), input_shape=(None, NFEATS), dense1_num_units=dense1_size, dense1_nonlinearity=nonlinearities[dense1_nonlinearity], dense1_W=initializers[dense1_init], dense1_b=Constant(0.), output_nonlinearity=nonlinearities[output_nonlinearity], output_num_units=NCLASSES, output_W=Orthogonal(), update=nesterov_momentum, update_learning_rate=shared(float32(learning_rate)), update_momentum=shared(float32(momentum)), on_epoch_finished=handlers, regression=False, max_epochs=max_epochs, verbose=verbosity, **params) net.initialize() """ Load weights from earlier training (by name, no auto-choosing). """ if pretrain: assert isfile(pretrain), 'Pre-train file "{0:s}" not found'.format( pretrain) load_knowledge(net, pretrain) return net
def make_grnn(batch_size, emb_size, g_hidden_size, word_n, wc_num, dence, wsm_num=1, rnn_type='LSTM', rnn_size=12, dropout_d=0.5,# pooling='mean', quest_na=4, gradient_steps = -1, valid_indices=None, lr=0.05, grad_clip=10): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #------------------------------------------------------------------input layers layers = [ (LL.InputLayer, {'name': 'l_in_se_q', 'shape': (None, word_n, emb_size)}), (LL.InputLayer, {'name': 'l_in_se_a', 'shape': (None, quest_na, word_n, emb_size)}), (LL.InputLayer, {'name': 'l_in_mask_q', 'shape': (None, word_n)}), (LL.InputLayer, {'name': 'l_in_mask_a', 'shape': (None, quest_na, word_n)}), (LL.InputLayer, {'name': 'l_in_mask_ri_q', 'shape': (None, word_n)}), (LL.InputLayer, {'name': 'l_in_mask_ri_a', 'shape': (None, quest_na, word_n)}), (LL.InputLayer, {'name': 'l_in_wt_q', 'shape': (None, word_n, word_n)}), (LL.InputLayer, {'name': 'l_in_wt_a', 'shape': (None, word_n, quest_na, word_n)}), (LL.InputLayer, {'name': 'l_in_act_', 'shape': (None, word_n, g_hidden_size)}), (LL.InputLayer, {'name': 'l_in_act__', 'shape': (None, word_n, word_n, g_hidden_size)}), ] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_ase_names = ['l_ase_{}'.format(i) for i in range(quest_na)] l_amask_names = ['l_amask_{}'.format(i) for i in range(quest_na)] l_amask_ri_names = ['l_amask_ri_{}'.format(i) for i in range(quest_na)] l_awt_names = ['l_awt_{}'.format(i) for i in range(quest_na)] for i in range(quest_na): layers.extend([(LL.SliceLayer, {'name': l_ase_names[i], 'incoming': 'l_in_se_a', 'indices': i, 'axis': 1})]) for i in range(quest_na): layers.extend([(LL.SliceLayer, {'name': l_amask_names[i], 'incoming': 'l_in_mask_a', 'indices': i, 'axis': 1})]) for i in range(quest_na): layers.extend([(LL.SliceLayer, {'name': l_amask_ri_names[i], 'incoming': 'l_in_mask_ri_a', 'indices': i, 'axis': 1})]) for i in range(quest_na): layers.extend([(LL.SliceLayer, {'name': l_awt_names[i], 'incoming': 'l_in_wt_a', 'indices': i, 'axis': 1})]) #-------------------------------------------------------------------GRNN layers WC = theano.shared(np.random.randn(wc_num, g_hidden_size, g_hidden_size).astype('float32')) # WC = LI.Normal(0.1) WSM = theano.shared(np.random.randn(emb_size, g_hidden_size).astype('float32')) b = theano.shared(np.ones(g_hidden_size).astype('float32')) # b = lasagne.init.Constant(1.0) layers.extend([(GRNNLayer, {'name': 'l_q_grnn', 'incomings': ['l_in_se_q', 'l_in_mask_q', 'l_in_wt_q', 'l_in_act_', 'l_in_act__'], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b})]) l_a_grnns_names = ['l_a_grnn_{}'.format(i) for i in range(quest_na)] for i, l_a_grnns_name in enumerate(l_a_grnns_names): layers.extend([(GRNNLayer, {'name': l_a_grnns_name, 'incomings': [l_ase_names[i], l_amask_names[i], l_awt_names[i], 'l_in_act_', 'l_in_act__'], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b})]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, {'name': 'l_qa_concat', 'incomings': ['l_q_grnn'] + l_a_grnns_names})]) layers.extend([(LL.ConcatLayer, {'name': 'l_qamask_concat', 'incomings': ['l_in_mask_ri_q'] + l_amask_ri_names})]) #--------------------------------------------------------------------RNN layers layers.extend([(RNN, {'name': 'l_qa_rnn_f', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': True, 'grad_clipping': grad_clip})]) layers.extend([(RNN, {'name': 'l_qa_rnn_b', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': True, 'grad_clipping': grad_clip})]) layers.extend([(LL.ElemwiseSumLayer, {'name': 'l_qa_rnn_conc', 'incomings': ['l_qa_rnn_f', 'l_qa_rnn_b']})]) ##-----------------------------------------------------------------pooling layer ## l_qa_pool = layers.extend([(LL.ExpressionLayer, {'name': 'l_qa_pool', ## 'incoming': l_qa_rnn_conc, ## 'function': lambda X: X.mean(-1), ## 'output_shape'='auto'})]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence)] if dropout_d: layers.extend([(LL.DropoutLayer, {'name': 'l_dence_do' + 'do', 'p': dropout_d})]) for i, d in enumerate(dence): if i < len(dence) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, {'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin})]) if i < len(dence) - 1 and dropout_d: layers.extend([(LL.DropoutLayer, {'name': l_dence_names[i] + 'do', 'p': dropout_d})]) def loss(x, t): return LO.aggregate(LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) if isinstance(valid_indices, np.ndarray) or isinstance(valid_indices, list): train_split=TrainSplit_indices(valid_indices=valid_indices) else: train_split=TrainSplit(eval_size=valid_indices, stratify=False) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=LU.adagrad, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=PermIterator(batch_size=batch_size), batch_iterator_test=BatchIterator(batch_size=batch_size/2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split ) nnet.initialize() PrintLayerInfo()(nnet) return nnet
def make_grnn( batch_size, emb_size, g_hidden_size, word_n, wc_num, dence, wsm_num=1, rnn_type='LSTM', rnn_size=12, dropout_d=0.5, # pooling='mean', quest_na=4, gradient_steps=-1, valid_indices=None, lr=0.05, grad_clip=10): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #------------------------------------------------------------------input layers layers = [ (LL.InputLayer, { 'name': 'l_in_se_q', 'shape': (None, word_n, emb_size) }), (LL.InputLayer, { 'name': 'l_in_se_a', 'shape': (None, quest_na, word_n, emb_size) }), (LL.InputLayer, { 'name': 'l_in_mask_q', 'shape': (None, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_a', 'shape': (None, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_ri_q', 'shape': (None, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_ri_a', 'shape': (None, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_wt_q', 'shape': (None, word_n, word_n) }), (LL.InputLayer, { 'name': 'l_in_wt_a', 'shape': (None, word_n, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_act_', 'shape': (None, word_n, g_hidden_size) }), (LL.InputLayer, { 'name': 'l_in_act__', 'shape': (None, word_n, word_n, g_hidden_size) }), ] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_ase_names = ['l_ase_{}'.format(i) for i in range(quest_na)] l_amask_names = ['l_amask_{}'.format(i) for i in range(quest_na)] l_amask_ri_names = ['l_amask_ri_{}'.format(i) for i in range(quest_na)] l_awt_names = ['l_awt_{}'.format(i) for i in range(quest_na)] for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_ase_names[i], 'incoming': 'l_in_se_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_amask_names[i], 'incoming': 'l_in_mask_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_amask_ri_names[i], 'incoming': 'l_in_mask_ri_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_awt_names[i], 'incoming': 'l_in_wt_a', 'indices': i, 'axis': 1 })]) #-------------------------------------------------------------------GRNN layers WC = theano.shared( np.random.randn(wc_num, g_hidden_size, g_hidden_size).astype('float32')) # WC = LI.Normal(0.1) WSM = theano.shared( np.random.randn(emb_size, g_hidden_size).astype('float32')) b = theano.shared(np.ones(g_hidden_size).astype('float32')) # b = lasagne.init.Constant(1.0) layers.extend([(GRNNLayer, { 'name': 'l_q_grnn', 'incomings': ['l_in_se_q', 'l_in_mask_q', 'l_in_wt_q', 'l_in_act_', 'l_in_act__'], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b })]) l_a_grnns_names = ['l_a_grnn_{}'.format(i) for i in range(quest_na)] for i, l_a_grnns_name in enumerate(l_a_grnns_names): layers.extend([(GRNNLayer, { 'name': l_a_grnns_name, 'incomings': [ l_ase_names[i], l_amask_names[i], l_awt_names[i], 'l_in_act_', 'l_in_act__' ], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b })]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, { 'name': 'l_qa_concat', 'incomings': ['l_q_grnn'] + l_a_grnns_names })]) layers.extend([(LL.ConcatLayer, { 'name': 'l_qamask_concat', 'incomings': ['l_in_mask_ri_q'] + l_amask_ri_names })]) #--------------------------------------------------------------------RNN layers layers.extend([(RNN, { 'name': 'l_qa_rnn_f', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': True, 'grad_clipping': grad_clip })]) layers.extend([(RNN, { 'name': 'l_qa_rnn_b', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': True, 'grad_clipping': grad_clip })]) layers.extend([(LL.ElemwiseSumLayer, { 'name': 'l_qa_rnn_conc', 'incomings': ['l_qa_rnn_f', 'l_qa_rnn_b'] })]) ##-----------------------------------------------------------------pooling layer ## l_qa_pool = layers.extend([(LL.ExpressionLayer, {'name': 'l_qa_pool', ## 'incoming': l_qa_rnn_conc, ## 'function': lambda X: X.mean(-1), ## 'output_shape'='auto'})]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence)] if dropout_d: layers.extend([(LL.DropoutLayer, { 'name': 'l_dence_do' + 'do', 'p': dropout_d })]) for i, d in enumerate(dence): if i < len(dence) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, { 'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin })]) if i < len(dence) - 1 and dropout_d: layers.extend([(LL.DropoutLayer, { 'name': l_dence_names[i] + 'do', 'p': dropout_d })]) def loss(x, t): return LO.aggregate( LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) if isinstance(valid_indices, np.ndarray) or isinstance( valid_indices, list): train_split = TrainSplit_indices(valid_indices=valid_indices) else: train_split = TrainSplit(eval_size=valid_indices, stratify=False) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=LU.adagrad, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=PermIterator(batch_size=batch_size), batch_iterator_test=BatchIterator(batch_size=batch_size / 2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split) nnet.initialize() PrintLayerInfo()(nnet) return nnet
('dropout1', DropoutLayer), ('narrow', DenseLayer), ] encoder = NeuralNet( layers=const_layers, update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.975, input_shape=(None, num_features), dense_num_units=64, narrow_num_units=num_encoder, narrow_nonlinearity=nonlinearities.softplus, regression=True, ) encoder.initialize() encoder.load_params_from('task4/koebi_train_history_AE2') # encode train and test data x_encoded = encoder.predict(X) test_encoded = encoder.predict(test_data) X_plus = np.hstack([X, x_encoded]) test_plus = np.hstack([test_data, test_encoded]) # supervised learning with the encoded data dynamic_layers = [ ('input', InputLayer), ('dense', DenseLayer), ('dropout', DropoutLayer), ('dense1', DenseLayer), ('dropout1', DropoutLayer),
#objective_loss_function=binary_crossentropy, objective_loss_function=multilabel_objective, custom_score=("validation score", lambda x, y: 1 - np.mean(np.abs(x - y))), max_epochs= 1200, #on_epoch_finished = [ # AdjustVariable('update_learning_rate',start=0.00001,stop=0.000001) #AdjustVariable('update_momentum',start=0.9,stop=0.999) #], batch_iterator_train=BatchIterator(batch_size=250), #batch_iterator_train = FlipBatchIterator(batch_size=25), verbose=2, ) print "Training NN..." print datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S') X_offset = np.mean(X_train, axis = 0) nnet.initialize() layer_info = PrintLayerInfo() layer_info(nnet) nnet.fit(X_train-X_offset,y_train) print "Using trained model to predict" print datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S') y_predictions = nnet.predict(X_test-X_offset) print datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S') score = 0 for i,j in zip(y_test,y_predictions): temp = [] for a in j: if a == max(j): temp.append(1.)
'num_units': 4096, 'W': layer_w_b['fc6'][0], 'b': layer_w_b['fc6'][1] }), (DropoutLayer, { 'name': 'drop6', 'p': 0.5 }), (DenseLayer, { 'name': 'fc7', 'num_units': 4096, 'W': layer_w_b['fc7'][0], 'b': layer_w_b['fc7'][1] }) ] net0 = NeuralNet( layers=layers0, update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, # regression=True, # flag to indicate we're dealing with regression problem # max_epochs=400, # we want to train this many epochs verbose=1, ) #initialize nolearn net net0.initialize() #save weights and biases to the file for future use net0.save_params_to('nolearn_with_w_b.pkl')
# prediction set might too small to calculate a meaningful mean and standard deviation. X_train_z = zscore(X_train, train_mean, train_sdev) #scipy.stats.mstats.zscore(X_train) X_validate_z = zscore(X_validate, train_mean, train_sdev) #scipy.stats.mstats.zscore(X_validate) #These can be used to check my zscore calc to numpy #print(X_train_z) #print(scipy.stats.mstats.zscore(X_train)) # Provide our own validation set def my_split(self, X, y, eval_size): return X_train_z,X_validate_z,y_train,y_validate net0.train_test_split = types.MethodType(my_split, net0) # Train the network net0.initialize() d = extract_weights(net0) print("D:" + str(len(d))) #net0.fit(X_train_z,y_train) # Predict the validation set pred_y = net0.predict(X_validate_z) # Display predictions and count the number of incorrect predictions. species_names = ['setosa','versicolour','virginica'] count = 0 wrong = 0 for element in zip(X_validate,y_validate,pred_y): print("Input: sepal length: {}, sepal width: {}, petal length: {}, petal width: {}; Expected: {}; Actual: {}".format(
def make_memnn(vocab_size, cont_sl, cont_wl, quest_wl, answ_wl, rnn_size, rnn_type='LSTM', pool_size=4, answ_n=4, dence_l=[100], dropout=0.5, batch_size=16, emb_size=50, grad_clip=40, init_std=0.1, num_hops=3, rnn_style=False, nonlin=LN.softmax, init_W=None, rng=None, art_pool=4, lr=0.01, mom=0, updates=LU.adagrad, valid_indices=0.2, permute_answ=False, permute_cont=False): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #-----------------------------------------------------------------------weights tr_variables = {} tr_variables['WQ'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WA'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WC'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WTA'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WTC'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WAnsw'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) #------------------------------------------------------------------input layers layers = [(LL.InputLayer, { 'name': 'l_in_q', 'shape': (batch_size, 1, quest_wl), 'input_var': T.itensor3('l_in_q_') }), (LL.InputLayer, { 'name': 'l_in_a', 'shape': (batch_size, answ_n, answ_wl), 'input_var': T.itensor3('l_in_a_') }), (LL.InputLayer, { 'name': 'l_in_q_pe', 'shape': (batch_size, 1, quest_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_a_pe', 'shape': (batch_size, answ_n, answ_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_cont', 'shape': (batch_size, cont_sl, cont_wl), 'input_var': T.itensor3('l_in_cont_') }), (LL.InputLayer, { 'name': 'l_in_cont_pe', 'shape': (batch_size, cont_sl, cont_wl, emb_size) })] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)] l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_names[i], 'incoming': 'l_in_a', 'indices': slice(i, i + 1), 'axis': 1 })]) for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_pe_names[i], 'incoming': 'l_in_a_pe', 'indices': slice(i, i + 1), 'axis': 1 })]) #------------------------------------------------------------------MEMNN layers #question---------------------------------------------------------------------- layers.extend([(EncodingFullLayer, { 'name': 'l_emb_f_q', 'incomings': ('l_in_q', 'l_in_q_pe'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WQ'], 'WT': None })]) l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)] layers.extend([(MemoryLayer, { 'name': l_mem_names[0], 'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': tr_variables['WA'], 'C': tr_variables['WC'], 'AT': tr_variables['WTA'], 'CT': tr_variables['WTC'], 'nonlin': nonlin })]) for i in range(1, num_hops): if i % 2: WC, WA = tr_variables['WA'], tr_variables['WC'] WTC, WTA = tr_variables['WTA'], tr_variables['WTC'] else: WA, WC = tr_variables['WA'], tr_variables['WC'] WTA, WTC = tr_variables['WTA'], tr_variables['WTC'] layers.extend([(MemoryLayer, { 'name': l_mem_names[i], 'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i - 1]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': WA, 'C': WC, 'AT': WTA, 'CT': WTC, 'nonlin': nonlin })]) #answers----------------------------------------------------------------------- l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(EncodingFullLayer, { 'name': l_emb_f_a_names[i], 'incomings': (l_a_names[i], l_a_pe_names[i]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WAnsw'], 'WT': None })]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, { 'name': 'l_qma_concat', 'incomings': l_mem_names + l_emb_f_a_names })]) #--------------------------------------------------------------------RNN layers layers.extend([( RNN, { 'name': 'l_qa_rnn_f', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([( RNN, { 'name': 'l_qa_rnn_b', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_f_sl', 'incoming': 'l_qa_rnn_f', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_b_sl', 'incoming': 'l_qa_rnn_b', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.ElemwiseMergeLayer, { 'name': 'l_qa_rnn_conc', 'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'), 'merge_function': T.add })]) #-----------------------------------------------------------------pooling layer # layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_', # 'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})]) layers.extend([(LL.Pool1DLayer, { 'name': 'l_qa_pool', 'incoming': 'l_qa_rnn_conc', 'pool_size': pool_size, 'mode': 'max' })]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)] if dropout: layers.extend([(LL.DropoutLayer, { 'name': 'l_dence_do', 'p': dropout })]) for i, d in enumerate(dence_l): if i < len(dence_l) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, { 'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin })]) if i < len(dence_l) - 1 and dropout: layers.extend([(LL.DropoutLayer, { 'name': l_dence_names[i] + 'do', 'p': dropout })]) if isinstance(valid_indices, np.ndarray) or isinstance( valid_indices, list): train_split = TrainSplit_indices(valid_indices=valid_indices) else: train_split = TrainSplit(eval_size=valid_indices, stratify=False) if permute_answ or permute_cont: batch_iterator_train = PermIterator(batch_size, permute_answ, permute_cont) else: batch_iterator_train = BatchIterator(batch_size=batch_size) def loss(x, t): return LO.aggregate( LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=updates, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=batch_iterator_train, batch_iterator_test=BatchIterator(batch_size=batch_size / 2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split, on_batch_finished=[zero_memnn]) nnet.initialize() PrintLayerInfo()(nnet) return nnet
] net1 = NeuralNet( layers=layers1, update_learning_rate=0.01, verbose=2, ) # To see information about the capacity and coverage of each layer, # we need to set the verbosity of the net to a value of 2 and # then initialize the net. We next pass the initialized net to PrintLayerInfo # to see some useful information. By the way, we could also just call the # fit method of the net to get the same outcome, but since we don't want # to fit just now, we proceed as shown below. net1.initialize() layer_info = PrintLayerInfo() layer_info(net1) # This net is fine. The capacity never falls below 1/6, which would be 16.7%, # and the coverage of the image never exceeds 100%. However, # with only 4 convolutional layers, this net is not very deep and will # properly not achieve the best possible results. # if we use max pooling too often, the coverage will quickly # exceed 100% and we cannot go sufficiently deep. # Too little maxpooling layers2 = [ (InputLayer, {'shape': (None, 1, 28, 28)}), (Conv2DLayer, {'num_filters': 32, 'filter_size': (3, 3)}),
conv2d8_nonlinearity=lasagne.nonlinearities.rectify, conv2d8_W=W[7], #output_nonlinearity=lasagne.nonlinearities.softmax,#, # output layer uses identity function #output_num_units=1000, # 1000 target values #output_W = W[7], # optimization method params update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, max_epochs=10, verbose=1, regression=True) for i, w in enumerate(W): print i, w.shape net1.initialize() import cv2 from training_images import simpleProcessImage img = cv2.imread( "/home/simon/python/sklearn-theano/sklearn_theano/datasets/images/cat_and_dog.jpg" ) crop = simpleProcessImage(img) cv2.imshow("X", crop) res = net1.predict(crop.transpose(2, 0, 1).reshape(-1, 3, 231, 231)) print res cv2.waitKey()
#on_training_finished = None, verbose=bool(VERBOSITY), input_shape=(None, train.shape[1]), output_num_units=NCLASSES, dense1_num_units=500, dense2_num_units=500, dense3_num_units=400, dense1_nonlinearity=LeakyRectify(leakiness=0.1), dense2_nonlinearity=LeakyRectify(leakiness=0.1), dense3_nonlinearity=LeakyRectify(leakiness=0.1), output_nonlinearity=softmax, dense1_W=HeUniform(), dense2_W=HeUniform(), dense3_W=HeUniform(), dense1_b=Constant(0.), dense2_b=Constant(0.), dense3_b=Constant(0.), output_b=Constant(0.), dropout0_p=0.1, dropout1_p=0.6, dropout2_p=0.6, dropout3_p=0.6, update_learning_rate=shared(float32(0.02)), # update_momentum=shared(float32(0.9)), # batch_iterator_train=BatchIterator(batch_size=128), batch_iterator_test=BatchIterator(batch_size=128), ) net.initialize() net.fit(train, labels)
def main(): seed = 12345 np.random.seed(seed) set_lasagne_rng(RandomState(seed)) LOOKUP_PATH = os.path.join(WDIR, 'data', 'HIV.pkl') lookup = pickle.load(open(LOOKUP_PATH, 'rb')) data_list = lookup['data'] y = lookup['y'] labels = lookup['labels'] nmark = len(labels) # event occurence list occurred = [x for i, x in enumerate(data_list) if y[i,1] == 1] not_occurred = [x for i, x in enumerate(data_list) if y[i,1] == 0] y1 = y[y[:,1] == 1] y0 = y[y[:,1] == 0] # split the examples randomly into a training (2/3) and test (1/3) cohort # both cohorts should contain equal percentage of cencored data sep1 = len(y1) / 3 sep0 = len(y0) / 3 # include only uncensored data from the training cohort for training CellCnn tr_list = occurred[sep1:] tr_stime = y1[sep1:,0].astype(float) # transform survival times to [-1, 1] interval by ranking them tr_stime = (ss.rankdata(tr_stime) / (0.5 * len(tr_stime))) - 1 # fit scaler to all training data sc = StandardScaler() sc.fit(np.vstack(occurred[sep1:] + not_occurred[sep0:])) tr_list = [sc.transform(x) for x in tr_list] # the test cohort validation_list = [sc.transform(x) for x in (occurred[:sep1] + not_occurred[:sep0])] y_valid = np.vstack([y1[:sep1], y0[:sep0]]) # cross validation on the training cohort nfold = 10 nfilter = 3 skf = KFold(len(tr_list), n_folds=nfold, shuffle=True) committee = [] valid_accuracy = [] accum_w = np.empty((nfilter * nfold, nmark+2)) for ifold, (train_index, test_index) in enumerate(skf): cv_train_samples = [tr_list[t_idx] for t_idx in train_index] cv_test_samples = [tr_list[t_idx] for t_idx in test_index] cv_y_train = list(tr_stime[train_index]) cv_y_test = list(tr_stime[test_index]) results = train_model(cv_train_samples, cv_y_train, labels, valid_samples=cv_test_samples, valid_phenotypes=cv_y_test, ncell=500, nsubset=200, subset_selection='random', nrun=3, pooling='mean', regression=True, nfilter=nfilter, learning_rate=0.03, momentum=0.9, l2_weight_decay_conv=1e-8, l2_weight_decay_out=1e-8, max_epochs=20, verbose=1, select_filters='best', accur_thres=-1) net_dict = results['best_net'] # update the committee of networks committee.append(net_dict) valid_accuracy.append(results['best_accuracy']) w_tot = param_vector(net_dict, regression=True) # add weights to accumulator accum_w[ifold*nfilter:(ifold+1)*nfilter] = w_tot save_path = os.path.join(OUTDIR, 'network_committee.pkl') with open(save_path, 'wb') as f: pickle.dump((committee, valid_accuracy), f, -1) ''' committee, valid_accuracy = pickle.load(open(save_path, 'r')) # retrieve the filter weights for ifold, net_dict in enumerate(committee): w_tot = param_vector(net_dict, regression=True) # add weights to accumulator accum_w[ifold*nfilter:(ifold+1)*nfilter] = w_tot ''' # choose the strong signatures (all of them) w_strong = accum_w # members of each cluster should have cosine similarity > 0.7 # equivalently, cosine distance < 0.3 Z = linkage(w_strong, 'average', metric='cosine') clusters = fcluster(Z, .3, criterion='distance') - 1 n_clusters = len(np.unique(clusters)) print '%d clusters chosen' % (n_clusters) # plot the discovered filter profiles plt.figure(figsize=(3,2)) idx = range(nmark) + [nmark+1] clmap = sns.clustermap(pd.DataFrame(w_strong[:,idx], columns=labels+['survival']), method='average', metric='cosine', row_linkage=Z, col_cluster=False, robust=True, yticklabels=clusters) clmap.cax.set_visible(False) fig_path = os.path.join(OUTDIR, 'HIV_clmap.eps') clmap.savefig(fig_path, format='eps') plt.close() # generate the consensus filter profiles c = Counter(clusters) cons = [] for key, val in c.items(): if val > nfold/2: cons.append(np.mean(w_strong[clusters == key], axis=0)) cons_mat = np.vstack(cons) # plot the consensus filter profiles plt.figure(figsize=(10, 3)) idx = range(nmark) + [nmark+1] ax = sns.heatmap(pd.DataFrame(cons_mat[:,idx], columns=labels + ['survival']), robust=True, yticklabels=False) plt.xticks(rotation=90) ax.tick_params(axis='both', which='major', labelsize=20) plt.tight_layout() fig_path = os.path.join(OUTDIR, 'clmap_consensus.eps') plt.savefig(fig_path, format='eps') plt.close() # create an ensemble of neural networks ncell_cons = 3000 ncell_voter = 3000 layers_voter = [ (layers.InputLayer, {'name': 'input', 'shape': (None, nmark, ncell_voter)}), (layers.Conv1DLayer, {'name': 'conv', 'num_filters': nfilter, 'filter_size': 1}), (layers.Pool1DLayer, {'name': 'meanPool', 'pool_size' : ncell_voter, 'mode': 'average_exc_pad'}), (layers.DenseLayer, {'name': 'output', 'num_units': 1, 'nonlinearity': T.tanh})] # predict on the test cohort small_data_list_v = [x[:ncell_cons].T.reshape(1,nmark,ncell_cons) for x in validation_list] data_v = np.vstack(small_data_list_v) stime, censor = y_valid[:,0], y_valid[:,1] # committee of the best nfold/2 models voter_risk_pred = list() for ifold in np.argsort(valid_accuracy): voter = NeuralNet(layers = layers_voter, update = nesterov_momentum, update_learning_rate = 0.001, regression=True, max_epochs=5, verbose=0) voter.load_params_from(committee[ifold]) voter.initialize() # rank the risk predictions voter_risk_pred.append(ss.rankdata(- np.squeeze(voter.predict(data_v)))) all_voters = np.vstack(voter_risk_pred) # compute mean rank per individual risk_p = np.mean(all_voters, axis=0) g1 = np.squeeze(risk_p > np.median(risk_p)) voters_pval_v = logrank_pval(stime, censor, g1) fig_v = os.path.join(OUTDIR, 'cellCnn_cox_test.eps') plot_KM(stime, censor, g1, voters_pval_v, fig_v) # filter-activating cells data_t = np.vstack(small_data_list_v) data_stack = np.vstack([x for x in np.swapaxes(data_t, 2, 1)]) # finally define a network from the consensus filters nfilter_cons = cons_mat.shape[0] ncell_cons = 3000 layers_cons = [ (layers.InputLayer, {'name': 'input', 'shape': (None, nmark, ncell_cons)}), (layers.Conv1DLayer, {'name': 'conv', 'b': init.Constant(cons_mat[:,-2]), 'W': cons_mat[:,:-2].reshape(nfilter_cons, nmark, 1), 'num_filters': nfilter_cons, 'filter_size': 1}), (layers.Pool1DLayer, {'name': 'meanPool', 'pool_size' : ncell_cons, 'mode': 'average_exc_pad'}), (layers.DenseLayer, {'name': 'output', 'num_units': 1, 'W': np.sign(cons_mat[:,-1:]), 'b': init.Constant(0.), 'nonlinearity': T.tanh})] net_cons = NeuralNet(layers = layers_cons, update = nesterov_momentum, update_learning_rate = 0.001, regression=True, max_epochs=5, verbose=0) net_cons.initialize() # get the representation after mean pooling xs = T.tensor3('xs').astype(theano.config.floatX) act_conv = theano.function([xs], lh.get_output(net_cons.layers_['conv'], xs)) # and apply to the test data act_tot = act_conv(data_t) act_tot = np.swapaxes(act_tot, 2, 1) act_stack = np.vstack([x for x in act_tot]) idx = range(7) + [8,9] for i_map in range(nfilter_cons): val = act_stack[:, i_map] descending_order = np.argsort(val)[::-1] val_cumsum = np.cumsum(val[descending_order]) data_sorted = data_stack[descending_order] thres = 0.75 * val_cumsum[-1] res_data = data_sorted[val_cumsum < thres] fig_path = os.path.join(OUTDIR, 'filter_'+str(i_map)+'_active.eps') plot_marker_distribution([res_data[:,idx], data_stack[:,idx]], ['filter '+str(i_map), 'all'], [labels[l] for l in idx], (3,3), fig_path, 24)
def make_memnn(vocab_size, cont_sl, cont_wl, quest_wl, answ_wl, rnn_size, rnn_type='LSTM', pool_size=4, answ_n=4, dence_l=[100], dropout=0.5, batch_size=16, emb_size=50, grad_clip=40, init_std=0.1, num_hops=3, rnn_style=False, nonlin=LN.softmax, init_W=None, rng=None, art_pool=4, lr=0.01, mom=0, updates=LU.adagrad, valid_indices=0.2, permute_answ=False, permute_cont=False): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #-----------------------------------------------------------------------weights tr_variables = {} tr_variables['WQ'] = theano.shared(init_std*np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WA'] = theano.shared(init_std*np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WC'] = theano.shared(init_std*np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WTA'] = theano.shared(init_std*np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WTC'] = theano.shared(init_std*np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WAnsw'] = theano.shared(init_std*np.random.randn(vocab_size, emb_size).astype('float32')) #------------------------------------------------------------------input layers layers = [ (LL.InputLayer, {'name': 'l_in_q', 'shape': (batch_size, 1, quest_wl), 'input_var': T.itensor3('l_in_q_')}), (LL.InputLayer, {'name': 'l_in_a', 'shape': (batch_size, answ_n, answ_wl), 'input_var': T.itensor3('l_in_a_')}), (LL.InputLayer, {'name': 'l_in_q_pe', 'shape': (batch_size, 1, quest_wl, emb_size)}), (LL.InputLayer, {'name': 'l_in_a_pe', 'shape': (batch_size, answ_n, answ_wl, emb_size)}), (LL.InputLayer, {'name': 'l_in_cont', 'shape': (batch_size, cont_sl, cont_wl), 'input_var': T.itensor3('l_in_cont_')}), (LL.InputLayer, {'name': 'l_in_cont_pe', 'shape': (batch_size, cont_sl, cont_wl, emb_size)}) ] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)] l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(LL.SliceLayer, {'name': l_a_names[i], 'incoming': 'l_in_a', 'indices': slice(i, i+1), 'axis': 1})]) for i in range(answ_n): layers.extend([(LL.SliceLayer, {'name': l_a_pe_names[i], 'incoming': 'l_in_a_pe', 'indices': slice(i, i+1), 'axis': 1})]) #------------------------------------------------------------------MEMNN layers #question---------------------------------------------------------------------- layers.extend([(EncodingFullLayer, {'name': 'l_emb_f_q', 'incomings': ('l_in_q', 'l_in_q_pe'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WQ'], 'WT': None})]) l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)] layers.extend([(MemoryLayer, {'name': l_mem_names[0], 'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': tr_variables['WA'], 'C': tr_variables['WC'], 'AT': tr_variables['WTA'], 'CT': tr_variables['WTC'], 'nonlin': nonlin})]) for i in range(1, num_hops): if i%2: WC, WA = tr_variables['WA'], tr_variables['WC'] WTC, WTA = tr_variables['WTA'], tr_variables['WTC'] else: WA, WC = tr_variables['WA'], tr_variables['WC'] WTA, WTC = tr_variables['WTA'], tr_variables['WTC'] layers.extend([(MemoryLayer, {'name': l_mem_names[i], 'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i-1]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': WA, 'C': WC, 'AT': WTA, 'CT': WTC, 'nonlin': nonlin})]) #answers----------------------------------------------------------------------- l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(EncodingFullLayer, {'name': l_emb_f_a_names[i], 'incomings': (l_a_names[i], l_a_pe_names[i]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WAnsw'], 'WT': None})]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, {'name': 'l_qma_concat', 'incomings': l_mem_names + l_emb_f_a_names})]) #--------------------------------------------------------------------RNN layers layers.extend([(RNN, {'name': 'l_qa_rnn_f', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': False, 'grad_clipping': grad_clip})]) layers.extend([(RNN, {'name': 'l_qa_rnn_b', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': False, 'grad_clipping': grad_clip})]) layers.extend([(LL.SliceLayer, {'name': 'l_qa_rnn_f_sl', 'incoming': 'l_qa_rnn_f', 'indices': slice(-answ_n, None), 'axis': 1})]) layers.extend([(LL.SliceLayer, {'name': 'l_qa_rnn_b_sl', 'incoming': 'l_qa_rnn_b', 'indices': slice(-answ_n, None), 'axis': 1})]) layers.extend([(LL.ElemwiseMergeLayer, {'name': 'l_qa_rnn_conc', 'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'), 'merge_function': T.add})]) #-----------------------------------------------------------------pooling layer # layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_', # 'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})]) layers.extend([(LL.Pool1DLayer, {'name': 'l_qa_pool', 'incoming': 'l_qa_rnn_conc', 'pool_size': pool_size, 'mode': 'max'})]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)] if dropout: layers.extend([(LL.DropoutLayer, {'name': 'l_dence_do', 'p': dropout})]) for i, d in enumerate(dence_l): if i < len(dence_l) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, {'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin})]) if i < len(dence_l) - 1 and dropout: layers.extend([(LL.DropoutLayer, {'name': l_dence_names[i] + 'do', 'p': dropout})]) if isinstance(valid_indices, np.ndarray) or isinstance(valid_indices, list): train_split=TrainSplit_indices(valid_indices=valid_indices) else: train_split=TrainSplit(eval_size=valid_indices, stratify=False) if permute_answ or permute_cont: batch_iterator_train = PermIterator(batch_size, permute_answ, permute_cont) else: batch_iterator_train = BatchIterator(batch_size=batch_size) def loss(x, t): return LO.aggregate(LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=updates, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=batch_iterator_train, batch_iterator_test=BatchIterator(batch_size=batch_size/2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split, on_batch_finished=[zero_memnn] ) nnet.initialize() PrintLayerInfo()(nnet) return nnet
coutput_num_units=10, #input_nonlinearity = None, #nonlinearities.sigmoid, #dense_nonlinearity = nonlinearities.tanh, narrow_nonlinearity=nonlinearities.softplus, reverse_nonlinearity=nonlinearities.sigmoid, coutput_nonlinearity=nonlinearities.softmax, #dropout0_p=0.1, dropout1_p=0.01, #regression=True, regression=False, verbose=1) nn.initialize() nn.load_params_from('task4/koebi_train_history_AE') PrintLayerInfo()(nn) nn.fit(X, Y) test = pd.read_hdf("task4/test.h5", "test") id_col = test.index test_data = np.array(test) test_data = skpre.StandardScaler().fit_transform(test_data) test_prediction = nn.predict(test_data) # Write prediction and it's linenumber into a csv file with open('task4/' + result_file_name + '.csv', 'wb') as csvfile: