def _create_nnet(self, input_dims, output_dims, learning_rate, num_hidden_units=15, batch_size=32, max_train_epochs=1,
                     hidden_nonlinearity=nonlinearities.rectify, output_nonlinearity=None, update_method=updates.sgd):
        """
        A subclass may override this if a different sort
        of network is desired.
        """
        nnlayers = [('input', layers.InputLayer), ('hidden', layers.DenseLayer), ('output', layers.DenseLayer)]
        nnet = NeuralNet(layers=nnlayers,

                           # layer parameters:
                           input_shape=(None, input_dims),
                           hidden_num_units=num_hidden_units,
                           hidden_nonlinearity=hidden_nonlinearity,
                           output_nonlinearity=output_nonlinearity,
                           output_num_units=output_dims,

                           # optimization method:
                           update=update_method,
                           update_learning_rate=learning_rate,

                           regression=True,  # flag to indicate we're dealing with regression problem
                           max_epochs=max_train_epochs,
                           batch_iterator_train=BatchIterator(batch_size=batch_size),
                           train_split=nolearn.lasagne.TrainSplit(eval_size=0),
                           verbose=0,
                         )
        nnet.initialize()
        return nnet
Beispiel #2
0
def load_encoder(path):
    """
        load a pretrained dbn from path
        :param path: path to the .mat dbn
        :return: pretrained unrolled encoder
        """
    # create the network using weights from pretrain_nn.mat
    nn = sio.loadmat(path)
    w1 = nn['w1']
    w2 = nn['w2']
    w3 = nn['w3']
    w4 = nn['w4']
    b1 = nn['b1'][0]
    b2 = nn['b2'][0]
    b3 = nn['b3'][0]
    b4 = nn['b4'][0]

    encoder = NeuralNet(layers=[
        (InputLayer, {
            'name': 'input',
            'shape': (None, 1200)
        }),
        (DenseLayer, {
            'name': 'l1',
            'num_units': 2000,
            'nonlinearity': sigmoid,
            'W': w1,
            'b': b1
        }),
        (DenseLayer, {
            'name': 'l2',
            'num_units': 1000,
            'nonlinearity': sigmoid,
            'W': w2,
            'b': b2
        }),
        (DenseLayer, {
            'name': 'l3',
            'num_units': 500,
            'nonlinearity': sigmoid,
            'W': w3,
            'b': b3
        }),
        (DenseLayer, {
            'name': 'l4',
            'num_units': 50,
            'nonlinearity': linear,
            'W': w4,
            'b': b4
        }),
    ],
                        update=nesterov_momentum,
                        update_learning_rate=0.001,
                        update_momentum=0.5,
                        objective_l2=0.005,
                        verbose=1,
                        regression=True)
    encoder.initialize()
    return encoder
Beispiel #3
0
 def test_okay(self, NeuralNet):
     net = NeuralNet(
         layers=[('input', Mock), ('mylayer', Mock)],
         input_shape=(10, 10),
         mylayer_hey='hey',
         update_foo=1,
         update_bar=2,
         )
     net._create_iter_funcs = lambda *args: (1, 2, 3)
     net.initialize()
Beispiel #4
0
 def test_okay(self, NeuralNet):
     net = NeuralNet(
         layers=[('input', Mock), ('mylayer', Mock)],
         input_shape=(10, 10),
         mylayer_hey='hey',
         update_foo=1,
         update_bar=2,
     )
     net._create_iter_funcs = lambda *args: (1, 2, 3)
     net.initialize()
Beispiel #5
0
    def test_unused(self, NeuralNet):
        net = NeuralNet(
            layers=[('input', Mock), ('mylayer', Mock)],
            input_shape=(10, 10),
            mylayer_hey='hey',
            yourlayer_ho='ho',
            update_foo=1,
            update_bar=2,
        )
        net._create_iter_funcs = lambda *args: (1, 2, 3)

        with pytest.raises(ValueError) as err:
            net.initialize()
        assert str(err.value) == 'Unused kwarg: yourlayer_ho'
Beispiel #6
0
    def test_unused(self, NeuralNet):
        net = NeuralNet(
            layers=[('input', Mock), ('mylayer', Mock)],
            input_shape=(10, 10),
            mylayer_hey='hey',
            yourlayer_ho='ho',
            update_foo=1,
            update_bar=2,
            )
        net._create_iter_funcs = lambda *args: (1, 2, 3)

        with pytest.raises(ValueError) as err:
            net.initialize()
        assert str(err.value) == 'Unused kwarg: yourlayer_ho'
Beispiel #7
0
    def test_layers_included(self, NeuralNet):
        def objective(layers_, target, **kwargs):
            out_a_layer = layers_['output_a']
            out_b_layer = layers_['output_b']

            # Get the outputs
            out_a, out_b = get_output([out_a_layer, out_b_layer])

            # Get the targets
            gt_a = T.cast(target[:, 0], 'int32')
            gt_b = target[:, 1].reshape((-1, 1))

            # Calculate the multi task loss
            cls_loss = aggregate(categorical_crossentropy(out_a, gt_a))
            reg_loss = aggregate(categorical_crossentropy(out_b, gt_b))
            loss = cls_loss + reg_loss
            return loss

        # test that both branches of the multi output network are included,
        # and also that a single layer isn't included multiple times.
        l = InputLayer(shape=(None, 1, 28, 28), name="input")
        l = Conv2DLayer(l, name='conv1', filter_size=(5, 5), num_filters=8)
        l = Conv2DLayer(l, name='conv2', filter_size=(5, 5), num_filters=8)

        la = DenseLayer(l, name='hidden_a', num_units=128)
        la = DenseLayer(la,
                        name='output_a',
                        nonlinearity=softmax,
                        num_units=10)

        lb = DenseLayer(l, name='hidden_b', num_units=128)
        lb = DenseLayer(lb, name='output_b', nonlinearity=sigmoid, num_units=1)

        net = NeuralNet(layers=[la, lb],
                        update_learning_rate=0.5,
                        y_tensor_type=None,
                        regression=True,
                        objective=objective)
        net.initialize()

        expected_names = sorted([
            "input", "conv1", "conv2", "hidden_a", "output_a", "hidden_b",
            "output_b"
        ])
        network_names = sorted(list(net.layers_.keys()))

        assert (expected_names == network_names)
Beispiel #8
0
def extract_encoder(dbn):
    dbn_layers = dbn.get_all_layers()
    encoder = NeuralNet(layers=[
        (InputLayer, {
            'name': 'input',
            'shape': dbn_layers[0].shape
        }),
        (DenseLayer, {
            'name': 'l1',
            'num_units': dbn_layers[1].num_units,
            'nonlinearity': sigmoid,
            'W': dbn_layers[1].W,
            'b': dbn_layers[1].b
        }),
        (DenseLayer, {
            'name': 'l2',
            'num_units': dbn_layers[2].num_units,
            'nonlinearity': sigmoid,
            'W': dbn_layers[2].W,
            'b': dbn_layers[2].b
        }),
        (DenseLayer, {
            'name': 'l3',
            'num_units': dbn_layers[3].num_units,
            'nonlinearity': sigmoid,
            'W': dbn_layers[3].W,
            'b': dbn_layers[3].b
        }),
        (DenseLayer, {
            'name': 'l4',
            'num_units': dbn_layers[4].num_units,
            'nonlinearity': linear,
            'W': dbn_layers[4].W,
            'b': dbn_layers[4].b
        }),
    ],
                        update=nesterov_momentum,
                        update_learning_rate=0.001,
                        update_momentum=0.5,
                        objective_l2=0.005,
                        verbose=1,
                        regression=True)
    encoder.initialize()
    return encoder
Beispiel #9
0
    def test_layers_included(self, NeuralNet):
        def objective(layers_, target, **kwargs):
            out_a_layer = layers_['output_a']
            out_b_layer = layers_['output_b']

            # Get the outputs
            out_a, out_b = get_output([out_a_layer, out_b_layer])

            # Get the targets
            gt_a = T.cast(target[:, 0], 'int32')
            gt_b = target[:, 1].reshape((-1, 1))

            # Calculate the multi task loss
            cls_loss = aggregate(categorical_crossentropy(out_a, gt_a))
            reg_loss = aggregate(categorical_crossentropy(out_b, gt_b))
            loss = cls_loss + reg_loss
            return loss

        # test that both branches of the multi output network are included,
        # and also that a single layer isn't included multiple times.
        l = InputLayer(shape=(None, 1, 28, 28), name="input")
        l = Conv2DLayer(l, name='conv1', filter_size=(5, 5), num_filters=8)
        l = Conv2DLayer(l, name='conv2', filter_size=(5, 5), num_filters=8)

        la = DenseLayer(l, name='hidden_a', num_units=128)
        la = DenseLayer(la, name='output_a', nonlinearity=softmax,
                        num_units=10)

        lb = DenseLayer(l, name='hidden_b', num_units=128)
        lb = DenseLayer(lb, name='output_b', nonlinearity=sigmoid, num_units=1)

        net = NeuralNet(layers=[la, lb],
                        update_learning_rate=0.5,
                        y_tensor_type=None,
                        regression=True,
                        objective=objective)
        net.initialize()

        expected_names = sorted(["input", "conv1", "conv2",
                                 "hidden_a", "output_a",
                                 "hidden_b", "output_b"])
        network_names = sorted(list(net.layers_.keys()))

        assert (expected_names == network_names)
Beispiel #10
0
def model_initial(X_train, y_train, max_iter=5):
    global params, val_acc
    params = []
    val_acc = np.zeros(max_iter)
    lr = theano.shared(np.float32(1e-4))
    for iteration in range(max_iter):
        print 'Initializing weights (%d/5) ...' % (iteration + 1)
        network_init = create_network()
        net_init = NeuralNet(
            network_init,
            max_epochs=3,
            update=adam,
            update_learning_rate=lr,
            train_split=TrainSplit(eval_size=0.1),
            batch_iterator_train=BatchIterator(batch_size=32),
            batch_iterator_test=BatchIterator(batch_size=64),
            on_training_finished=[SaveTrainHistory(iteration=iteration)],
            verbose=0)
        net_init.initialize()
        net_init.fit(X_train, y_train)
Beispiel #11
0
def net_color_non_square(NeuralNet):
    l = InputLayer(shape=(None, 3, 20, 28))
    l = Conv2DLayer(l, name='conv1', filter_size=(5, 5), num_filters=1)
    l = MaxPool2DLayer(l, name='pool1', pool_size=(2, 2))
    l = Conv2DLayer(l, name='conv2', filter_size=(5, 5), num_filters=8)
    l = MaxPool2DLayer(l, name='pool2', pool_size=(2, 2))
    l = DenseLayer(l, name='hidden1', num_units=128)
    l = DenseLayer(l, name='output', nonlinearity=softmax, num_units=10)

    net = NeuralNet(
        layers=l,

        update=nesterov_momentum,
        update_learning_rate=0.01,
        update_momentum=0.9,

        max_epochs=1,
        )
    net.initialize()
    return net
Beispiel #12
0
def make_net(W, H, size1=20, size2=15):
    net = NeuralNet(
        layers=[
            ('input', InputLayer),
            ('dense1', DenseLayer),
            ('dense2', DenseLayer),
            ('output', DenseLayer),
        ],
        input_shape=(None, W * H),
        dense1_num_units=size1,
        dense1_nonlinearity=LeakyRectify(leakiness=0.1),
        dense1_W=HeNormal(),
        dense1_b=Constant(),
        dense2_num_units=size2,
        dense2_nonlinearity=LeakyRectify(leakiness=0.1),
        dense2_W=HeNormal(),
        dense2_b=Constant(),
        output_num_units=4,
        output_nonlinearity=softmax,
        output_W=HeNormal(),
        output_b=Constant(),
        update=nesterov_momentum,  # todo
        update_learning_rate=shared(float32(1.)),
        update_momentum=0.9,
        max_epochs=200,
        on_epoch_finished=[
            StopWhenOverfitting(),
            StopAfterMinimum(),
            AdjustLearningRate(1., 0.0001),
        ],

        #label_encoder = False,
        regression=True,
        verbose=1,
        batch_iterator_train=BatchIterator(batch_size=128),  # todo
        batch_iterator_test=BatchIterator(batch_size=128),
        train_split=TrainSplit(eval_size=0.1),
    )
    net.initialize()

    return net
Beispiel #13
0
def create_nn():
    '''
    Create a neural net with one (or more) layers to fit the featurized data.
    A single softmax layer is equivalent to doing logistic regression on the featurized data.
    Result:  53% accuracy.
    Adding a fully connected hiddent layer boots accuracy to 67%.
    '''
    nn = NeuralNet(
        layers = [
            (InputLayer, {
                        'name':'input',
                        'shape':(None,4096)
                         }),
            # (DropoutLayer, {
            #             'name':'drop6',
            #             'p':.5
            #             }),
            (DenseLayer, {
                        'name':'fc7',
                        'num_units':4096,
                        }),
            (DenseLayer, {
                        'name':'output',
                        'num_units':3,
                        'nonlinearity':softmax,
                        })
                        ],
        update=nesterov_momentum,
        update_learning_rate=0.01,
        update_momentum=0.9,
    #         regression=True,  # flag to indicate we're dealing with regression problem
        max_epochs=1000,  # we want to train this many epochs
        verbose=1,
        train_split=TrainSplit(eval_size=0.25),

        )

    nn.initialize()

    return nn
def extract_encoder(dbn):
    dbn_layers = dbn.get_all_layers()
    encoder = NeuralNet(
        layers=[
            (InputLayer, {'name': 'input', 'shape': dbn_layers[0].shape}),
            (DenseLayer, {'name': 'l1', 'num_units': dbn_layers[1].num_units, 'nonlinearity': sigmoid,
                          'W': dbn_layers[1].W, 'b': dbn_layers[1].b}),
            (DenseLayer, {'name': 'l2', 'num_units': dbn_layers[2].num_units, 'nonlinearity': sigmoid,
                          'W': dbn_layers[2].W, 'b': dbn_layers[2].b}),
            (DenseLayer, {'name': 'l3', 'num_units': dbn_layers[3].num_units, 'nonlinearity': sigmoid,
                          'W': dbn_layers[3].W, 'b': dbn_layers[3].b}),
            (DenseLayer, {'name': 'l4', 'num_units': dbn_layers[4].num_units, 'nonlinearity': linear,
                          'W': dbn_layers[4].W, 'b': dbn_layers[4].b}),
        ],
        update=adadelta,
        update_learning_rate=0.01,
        objective_l2=0.005,
        verbose=1,
        regression=True
    )
    encoder.initialize()
    return encoder
Beispiel #15
0
    def _create_nnet(self,
                     input_dims,
                     output_dims,
                     learning_rate,
                     num_hidden_units=15,
                     batch_size=32,
                     max_train_epochs=1,
                     hidden_nonlinearity=nonlinearities.rectify,
                     output_nonlinearity=None,
                     update_method=updates.sgd):
        """
        A subclass may override this if a different sort
        of network is desired.
        """
        nnlayers = [('input', layers.InputLayer),
                    ('hidden', layers.DenseLayer),
                    ('output', layers.DenseLayer)]
        nnet = NeuralNet(
            layers=nnlayers,

            # layer parameters:
            input_shape=(None, input_dims),
            hidden_num_units=num_hidden_units,
            hidden_nonlinearity=hidden_nonlinearity,
            output_nonlinearity=output_nonlinearity,
            output_num_units=output_dims,

            # optimization method:
            update=update_method,
            update_learning_rate=learning_rate,
            regression=
            True,  # flag to indicate we're dealing with regression problem
            max_epochs=max_train_epochs,
            batch_iterator_train=BatchIterator(batch_size=batch_size),
            train_split=nolearn.lasagne.TrainSplit(eval_size=0),
            verbose=0,
        )
        nnet.initialize()
        return nnet
Beispiel #16
0
def load_encoder(path):
    """
        load a pretrained dbn from path
        :param path: path to the .mat dbn
        :return: pretrained unrolled encoder
        """
    # create the network using weights from pretrain_nn.mat
    nn = sio.loadmat(path)
    w1 = nn['w1']
    w2 = nn['w2']
    w3 = nn['w3']
    w4 = nn['w4']
    b1 = nn['b1'][0]
    b2 = nn['b2'][0]
    b3 = nn['b3'][0]
    b4 = nn['b4'][0]

    encoder = NeuralNet(
        layers=[
            (InputLayer, {'name': 'input', 'shape': (None, 1200)}),
            (DenseLayer, {'name': 'l1', 'num_units': 2000, 'nonlinearity': sigmoid,
                          'W': w1, 'b': b1}),
            (DenseLayer, {'name': 'l2', 'num_units': 1000, 'nonlinearity': sigmoid,
                          'W': w2, 'b': b2}),
            (DenseLayer, {'name': 'l3', 'num_units': 500, 'nonlinearity': sigmoid,
                          'W': w3, 'b': b3}),
            (DenseLayer, {'name': 'l4', 'num_units': 50, 'nonlinearity': linear,
                          'W': w4, 'b': b4}),
        ],
        update=nesterov_momentum,
        update_learning_rate=0.001,
        update_momentum=0.5,
        objective_l2=0.005,
        verbose=1,
        regression=True
    )
    encoder.initialize()
    return encoder
Beispiel #17
0
def net_with_nonlinearity_layer(NeuralNet):
    l = InputLayer(shape=(None, 1, 28, 28))
    l = Conv2DLayer(l, name='conv1', filter_size=(5, 5), num_filters=8)
    l = MaxPool2DLayer(l, name='pool1', pool_size=(2, 2))
    l = Conv2DLayer(l, name='conv2', filter_size=(5, 5), num_filters=8)
    l = MaxPool2DLayer(l, name='pool2', pool_size=(2, 2))
    l = DenseLayer(l, name='hidden1', num_units=128)
    l = DenseLayer(l, name='output', nonlinearity=softmax, num_units=10)
    l = NonlinearityLayer(l)

    net = NeuralNet(
        layers=l,

        update=nesterov_momentum,
        update_learning_rate=0.01,
        update_momentum=0.9,

        max_epochs=5,
        on_epoch_finished=[_OnEpochFinished()],
        verbose=99,
        )
    net.initialize()
    return net
Beispiel #18
0
        conv2d8_filter_size=(1,1),
        conv2d8_nonlinearity=lasagne.nonlinearities.rectify,
        conv2d8_W=W[7],

        #output_nonlinearity=lasagne.nonlinearities.softmax,#,  # output layer uses identity function
        #output_num_units=1000,  # 1000 target values
        #output_W = W[7],

        # optimization method params
        update=nesterov_momentum,
        update_learning_rate=0.01,
        update_momentum=0.9,
        max_epochs=10,
        verbose=1,
        regression=True
    )
    for i, w in enumerate(W):
        print i, w.shape

    net1.initialize()
    import cv2
    from training_images import simpleProcessImage
    img = cv2.imread("/home/simon/python/sklearn-theano/sklearn_theano/datasets/images/cat_and_dog.jpg")

    crop = simpleProcessImage(img)
    cv2.imshow("X", crop)
    res = net1.predict(crop.transpose(2,0,1).reshape(-1,3,231,231))
    print res

    cv2.waitKey()
Beispiel #19
0
def create_pretrained_vgg_nn_nolearn():
    '''
    *** This function need only be run once to create and save a nolearn NeuralNet ***
    ***     instance from the origninal lasagne layer weights for the vgg net.     ***
    Create a vgg neural net. Load pretrained weights.
    Pickle the entire net.
    Pickle the mean image.
    Return a nolearn.NeuralNet instance,  mean_image numpy array
    '''
    # define the vgg_s network
    vgg_nn = NeuralNet(
        layers = [
            (InputLayer, {
                        'name':'input',
                        'shape':(None,3,224,224)
                         }),
            (ConvLayer, {
                        'name':'conv1',
                        'num_filters':96,
                        'filter_size':(7,7),
                        'stride':2,
                        'flip_filters':False
                        }),
            (NormLayer, {
                        'name':'norm1',
                        'alpha':.0001
                        }),
            (PoolLayer, {
                        'name':'pool1',
                        'pool_size':(3,3),
                        'stride':3,
                        'ignore_border':False
                        }),
            (ConvLayer, {
                        'name':'conv2',
                        'num_filters':256,
                        'filter_size':(5,5),
                        'flip_filters':False
    #                     'pad':2,
    #                     'stride':1
                       }),
            (PoolLayer, {
                        'name':'pool2',
                        'pool_size':(2,2),
                        'stride':2,
                        'ignore_border':False
                        }),
            (ConvLayer, {
                        'name':'conv3',
                        'num_filters':512,
                        'filter_size':(3,3),
                        'pad':1,
    #                     'stride':1
                        'flip_filters':False
                       }),
            (ConvLayer, {
                        'name':'conv4',
                        'num_filters':512,
                        'filter_size':(3,3),
                        'pad':1,
    #                     'stride':1
                        'flip_filters':False
                        }),
            (ConvLayer, {
                        'name':'conv5',
                        'num_filters':512,
                        'filter_size':(3,3),
                        'pad':1,
    #                     'stride':1
                        'flip_filters':False
                         }),
            (PoolLayer, {
                        'name':'pool5',
                        'pool_size':(3,3),
                        'stride':3,
                        'ignore_border':False
                        }),
            (DenseLayer,{
                        'name':'fc6',
                        'num_units':4096
                       }),
            (DropoutLayer, {
                        'name':'drop6',
                        'p':.5
                        }),
            (DenseLayer, {
                        'name':'fc7',
                        'num_units':4096
                        }),
        ],



    #        # optimization method:
        update=nesterov_momentum,
        update_learning_rate=0.01,
        update_momentum=0.9,

    #  Do not need these unless trainng the net.
    #     regression=True,  # flag to indicate we're dealing with regression problem
    #     max_epochs=400,  # we want to train this many epochs
    #     verbose=1,
    )

    # upload pretrained weights
    vgg_nn.initialize()
    vgg_nn.load_params_from('./vgg_nolearn_saved_wts_biases.pkl')

    # upload mean image
    model = pickle.load(open('./vgg_cnn_s.pkl'))
    mean_image = model['mean image']

    # pickel the model and the mean image
    with open("/data/mean_image.pkl", 'w') as f:
        pickle.dump(mean_image, f)
    with open("/data/full_vgg.pkl", 'w') as f:
        pickle.dump(vgg_nn, f)

    return vgg_net, mean_image
Beispiel #20
0
class NNet(BaseEstimator, ClassifierMixin):
    def __init__(
        self,
        name='nameless_net',  # used for saving, so maybe make it unique
        dense1_size=60,
        dense1_nonlinearity='tanh',
        dense1_init='orthogonal',
        dense2_size=None,
        dense2_nonlinearity=None,  # inherits dense1
        dense2_init=None,  # inherits dense1
        dense3_size=None,
        dense3_nonlinearity=None,  # inherits dense2
        dense3_init=None,  # inherits dense2
        learning_rate=0.001,
        learning_rate_scaling=100,
        momentum=0.9,
        momentum_scaling=100,
        max_epochs=3000,
        epoch_steps=None,
        dropout0_rate=0,  # this is the input layer
        dropout1_rate=None,
        dropout2_rate=None,  # inherits dropout1_rate
        dropout3_rate=None,  # inherits dropout2_rate
        weight_decay=0,
        adaptive_weight_decay=False,
        batch_size=128,
        output_nonlinearity='softmax',
        auto_stopping=True,
        save_snapshots_stepsize=None,
    ):
        """
			Create the network with the selected parameters.

			:param name: Name for save files
			:param dense1_size: Number of neurons for first hidden layer
			:param dense1_nonlinearity: The activation function for the first hidden layer
			:param dense1_init: The weight initialization for the first hidden layer
			:param learning_rate: The (initial) learning rate (how fast the network learns)
			:param learning_rate_scaling: The total factor to gradually decrease the learning rate by
			:param momentum: The (initial) momentum
			:param momentum_scaling: Similar to learning_rate_scaling
			:param max_epochs: Total number of epochs (at most)
			:param dropout1_rate: Percentage of connections dropped each step for first hidden layer
			:param weight_decay: Palatalizes the weights by L2 norm (regularizes but decreases results)
			:param adaptive_weight_decay: Should the weight decay adapt automatically?
			:param batch_size: How many samples to send through the network at a time
			:param auto_stopping: Stop early if the network seems to stop performing well
			:param pretrain: Filepath of the previous weights to start at (or None)
			:return:
		"""
        """
			Input argument storage: automatically store all locals, which should be exactly the arguments at this point, but storing a little too much is not a big problem.
		"""
        params = locals()
        del params['self']
        #self.__dict__.update(params)
        self.parameter_names = sorted(params.keys())
        """
			Check the parameters and update some defaults (will be done for 'self', no need to store again).
		"""
        self.set_params(**params)

    def init_net(self,
                 feature_count,
                 class_count=NCLASSES,
                 verbosity=VERBOSITY >= 2):
        """
			Initialize the network (needs to be done when data is available in order to set dimensions).
		"""
        if VERBOSITY >= 1:
            print 'initializing network {0:s} {1:d}x{2:d}x{3:d}'.format(
                self.name, self.dense1_size or 0, self.dense2_size or 0,
                self.dense3_size or 0)
            if VERBOSITY >= 2:
                print 'parameters: ' + ', '.join(
                    '{0:s} = {1:}'.format(k, v)
                    for k, v in self.get_params(deep=False).items())
        self.feature_count = feature_count
        self.class_count = class_count
        """
			Create the layers and their settings.
		"""
        self.layers = [
            ('input', InputLayer),
        ]
        self.params = {
            'dense1_num_units': self.dense1_size,
            'dense1_nonlinearity': nonlinearities[self.dense1_nonlinearity],
            'dense1_W': initializers[self.dense1_init],
            'dense1_b': Constant(0.),
        }
        if self.dropout0_rate:
            self.layers += [('dropout0', DropoutLayer)]
            self.params['dropout0_p'] = self.dropout0_rate
        self.layers += [
            ('dense1', DenseLayer),
        ]
        if self.dropout1_rate:
            self.layers += [('dropout1', DropoutLayer)]
            self.params['dropout1_p'] = self.dropout1_rate
        if self.dense2_size:
            self.layers += [('dense2', DenseLayer)]
            self.params.update({
                'dense2_num_units':
                self.dense2_size,
                'dense2_nonlinearity':
                nonlinearities[self.dense2_nonlinearity],
                'dense2_W':
                initializers[self.dense2_init],
                'dense2_b':
                Constant(0.),
            })
        else:
            assert not self.dense3_size, 'There cannot be a third dense layer without a second one'
        if self.dropout2_rate:
            assert self.dense2_size is not None, 'There cannot be a second dropout layer without a second dense layer.'
            self.layers += [('dropout2', DropoutLayer)]
            self.params['dropout2_p'] = self.dropout2_rate
        if self.dense3_size:
            self.layers += [('dense3', DenseLayer)]
            self.params.update({
                'dense3_num_units':
                self.dense3_size,
                'dense3_nonlinearity':
                nonlinearities[self.dense3_nonlinearity],
                'dense3_W':
                initializers[self.dense3_init],
                'dense3_b':
                Constant(0.),
            })
        if self.dropout3_rate:
            assert self.dense2_size is not None, 'There cannot be a third dropout layer without a third dense layer.'
            self.layers += [('dropout3', DropoutLayer)]
            self.params['dropout3_p'] = self.dropout2_rate
        self.layers += [('output', DenseLayer)]
        self.params.update({
            'output_nonlinearity':
            nonlinearities[self.output_nonlinearity],
            'output_W':
            GlorotUniform(),
            'output_b':
            Constant(0.),
        })
        """
			Create meta parameters and special handlers.
		"""
        if VERBOSITY >= 3:
            print 'learning rate: {0:.6f} -> {1:.6f}'.format(
                abs(self.learning_rate),
                abs(self.learning_rate) / float(self.learning_rate_scaling))
            print 'momentum:      {0:.6f} -> {1:.6f}'.format(
                abs(self.momentum),
                1 - ((1 - abs(self.momentum)) / float(self.momentum_scaling)))
        self.step_handlers = [
            LinearVariable('update_learning_rate',
                           start=abs(self.learning_rate),
                           stop=abs(self.learning_rate) /
                           float(self.learning_rate_scaling)),
            LinearVariable(
                'update_momentum',
                start=abs(self.momentum),
                stop=1 -
                ((1 - abs(self.momentum)) / float(self.momentum_scaling))),
            StopNaN(),
        ]
        self.end_handlers = [
            SnapshotEndSaver(base_name=self.name),
            TrainProgressPlotter(base_name=self.name),
        ]
        snapshot_name = 'nn_' + params_name(self.params, prefix=self.name)[0]
        if self.save_snapshots_stepsize:
            self.step_handlers += [
                SnapshotStepSaver(every=self.save_snapshots_stepsize,
                                  base_name=snapshot_name),
            ]
        if self.auto_stopping:
            self.step_handlers += [
                StopWhenOverfitting(loss_fraction=0.9,
                                    base_name=snapshot_name),
                StopAfterMinimum(patience=40, base_name=self.name),
            ]
        weight_decay = shared(float32(abs(self.weight_decay)), 'weight_decay')
        if self.adaptive_weight_decay:
            self.step_handlers += [
                AdaptiveWeightDecay(weight_decay),
            ]
        if self.epoch_steps:
            self.step_handlers += [
                BreakEveryN(self.epoch_steps),
            ]
        """
			Create the actual nolearn network with information from __init__.
		"""
        self.net = NeuralNet(
            layers=self.layers,
            objective=partial(WeightDecayObjective, weight_decay=weight_decay),
            input_shape=(None, feature_count),
            output_num_units=class_count,
            update=nesterov_momentum,  # todo: make parameter
            update_learning_rate=shared(float32(self.learning_rate)),
            update_momentum=shared(float(self.weight_decay)),
            on_epoch_finished=self.step_handlers,
            on_training_finished=self.end_handlers,
            regression=False,
            max_epochs=self.max_epochs,
            verbose=verbosity,
            batch_iterator_train=BatchIterator(batch_size=self.batch_size),
            batch_iterator_test=BatchIterator(batch_size=self.batch_size),
            eval_size=0.1,

            #custom_score = ('custom_loss', categorical_crossentropy),
            **self.params)
        self.net.parent = self

        self.net.initialize()

        return self.net

    def get_params(self, deep=True):
        return OrderedDict(
            (name, getattr(self, name)) for name in self.parameter_names)

    def set_params(self, **params):
        """
			Set all the parameters.
		"""
        for name, val in params.items():
            assert name in self.parameter_names, '"{0:s}" is not a valid parameter name (known parameters: "{1:s}")'.format(
                name, '", "'.join(self.parameter_names))
            setattr(self, name, val)
        """
			Arguments checks.
		"""
        assert self.dropout1_rate is None or 0 <= self.dropout1_rate < 1, 'Dropout rate 1 should be a value between 0 and 1 (value: {0})'.format(
            self.dropout1_rate)
        assert self.dropout2_rate is None or 0 <= self.dropout2_rate < 1, 'Dropout rate 2 should be a value between 0 and 1, or None for inheritance (value: {0})'.format(
            self.dropout2_rate)
        assert self.dropout3_rate is None or 0 <= self.dropout3_rate < 1, 'Dropout rate 3 should be a value between 0 and 1, or None for inheritance (value: {0})'.format(
            self.dropout3_rate)
        assert self.dense1_nonlinearity in nonlinearities.keys(
        ), 'Linearity 1 should be one of "{0}", got "{1}" instead.'.format(
            '", "'.join(nonlinearities.keys()), self.dense1_nonlinearity)
        assert self.dense2_nonlinearity in nonlinearities.keys() + [
            None
        ], 'Linearity 2 should be one of "{0}", got "{1}" instead.'.format(
            '", "'.join(nonlinearities.keys()), self.dense2_nonlinearity)
        assert self.dense3_nonlinearity in nonlinearities.keys() + [
            None
        ], 'Linearity 3 should be one of "{0}", got "{1}" instead.'.format(
            '", "'.join(nonlinearities.keys()), self.dense3_nonlinearity)
        assert self.dense1_init in initializers.keys(
        ), 'Initializer 1 should be one of "{0}", got "{1}" instead.'.format(
            '", "'.join(initializers.keys()), self.dense1_init)
        assert self.dense2_init in initializers.keys() + [
            None
        ], 'Initializer 2 should be one of "{0}", got "{1}" instead.'.format(
            '", "'.join(initializers.keys()), self.dense2_init)
        assert self.dense3_init in initializers.keys() + [
            None
        ], 'Initializer 3 should be one of "{0}", got "{1}" instead.'.format(
            '", "'.join(initializers.keys()), self.dense3_init)
        """
			Argument defaults.
		"""
        if self.dense2_nonlinearity is None:
            self.dense2_nonlinearity = self.dense1_nonlinearity
        if self.dense2_init is None:
            self.dense2_init = self.dense1_init
        if self.dense3_nonlinearity is None:
            self.dense3_nonlinearity = self.dense2_nonlinearity
        if self.dense3_init is None:
            self.dense3_init = self.dense2_init
        if self.dropout2_rate is None and self.dense2_size:
            self.dropout2_rate = self.dropout1_rate
        if self.dropout3_rate is None and self.dense3_size:
            self.dropout3_rate = self.dropout2_rate

    def fit(self, X, y, random_sleep=None):
        if random_sleep:
            sleep(random_sleep *
                  random())  # this is to prevent compiler lock problems
        labels = y - y.min()
        #todo: don't use labels.max(), occasionally (rarely) it will not have the highest class
        self.init_net(feature_count=X.shape[1], class_count=labels.max() + 1)
        net = self.net.fit(X, labels)
        self.save()
        return net

    def interrupted_fit(self, X, y):
        """ DEPRECATED """
        labels = y - y.min()
        self.init_net(feature_count=X.shape[1], class_count=labels.max() + 1)
        knowledge = get_knowledge(self.net)
        for epoch in range(0, self.max_epochs, self.epoch_steps):
            set_knowledge(self.net, knowledge)
            self.init_net(feature_count=X.shape[1],
                          class_count=labels.max() + 1)
            print 'epoch {0:d}: learning {1:d} epochs'.format(
                epoch, self.epoch_steps)
            self.net.fit(X, labels)
            ratio = mean([d['valid_loss'] for d in self.net._train_history[-self.epoch_steps:]]) / \
              mean([d['train_loss'] for d in self.net._train_history[-self.epoch_steps:]])
            if ratio < 0.85:
                self.weight_decay *= 1.3
            if ratio > 0.95:
                self.weight_decay /= 1.2
            self.init_net(feature_count=X.shape[1],
                          class_count=labels.max() + 1)
            knowledge = get_knowledge(self.net)
        exit()
        net = self.net.fit(X, labels)
        self.save()
        return net

    def predict_proba(self, X):
        probs = self.net.predict_proba(X)
        if not isfinite(probs).sum():
            errmsg = 'network "{0:s}" predicted infinite/NaN probabilities'.format(
                self.name)
            stderr.write(errmsg)
            raise DivergenceError(errmsg)
        return probs

    def predict(self, X):
        return self.net.predict(X)

    def score(self, X, y, **kwargs):
        return self.net.score(X, y)

    def save(self, filepath=None):
        assert hasattr(
            self, 'net'
        ), 'Cannot save a network that is not initialized; .fit(X, y) something first [or use net.initialize(..) for random initialization].'
        parameters = self.get_params(deep=False)
        filepath = filepath or join(NNET_STATE_DIR, self.name)
        if VERBOSITY >= 1:
            print 'saving network to "{0:s}.net.npz|json"'.format(filepath)
        with open(filepath + '.net.json', 'w+') as fh:
            dump([parameters, self.feature_count, self.class_count],
                 fp=fh,
                 indent=2)
        save_knowledge(self.net, filepath + '.net.npz')

    @classmethod
    def load(cls, filepath=None, name=None):
        """
			:param filepath: The base path (without extension) to load the file from, OR:
			:param name: The name of the network to load (if filename is not given)
			:return: The loaded network
		"""
        filepath = filepath or join(NNET_STATE_DIR, name)
        if VERBOSITY >= 1:
            print 'loading network from "{0:s}.net.npz|json"'.format(filepath)
        with open(filepath + '.net.json', 'r') as fh:
            [parameters, feature_count, class_count] = load(fp=fh)
        nnet = cls(**parameters)
        nnet.init_net(feature_count=feature_count, class_count=class_count)
        load_knowledge(nnet.net, filepath + '.net.npz')
        return nnet
Beispiel #21
0
def create_pretrained_vgg_nn_nolearn():
    '''
    *** This function need only be run once to create and save a nolearn NeuralNet ***
    ***     instance from the origninal lasagne layer weights for the vgg net.     ***
    Create a vgg neural net. Load pretrained weights.
    Pickle the entire net.
    Pickle the mean image.
    Return a nolearn.NeuralNet instance,  mean_image numpy array
    '''
    # define the vgg_s network
    vgg_nn = NeuralNet(
        layers=[
            (InputLayer, {
                'name': 'input',
                'shape': (None, 3, 224, 224)
            }),
            (ConvLayer, {
                'name': 'conv1',
                'num_filters': 96,
                'filter_size': (7, 7),
                'stride': 2,
                'flip_filters': False
            }),
            (NormLayer, {
                'name': 'norm1',
                'alpha': .0001
            }),
            (PoolLayer, {
                'name': 'pool1',
                'pool_size': (3, 3),
                'stride': 3,
                'ignore_border': False
            }),
            (
                ConvLayer,
                {
                    'name': 'conv2',
                    'num_filters': 256,
                    'filter_size': (5, 5),
                    'flip_filters': False
                    #                     'pad':2,
                    #                     'stride':1
                }),
            (PoolLayer, {
                'name': 'pool2',
                'pool_size': (2, 2),
                'stride': 2,
                'ignore_border': False
            }),
            (
                ConvLayer,
                {
                    'name': 'conv3',
                    'num_filters': 512,
                    'filter_size': (3, 3),
                    'pad': 1,
                    #                     'stride':1
                    'flip_filters': False
                }),
            (
                ConvLayer,
                {
                    'name': 'conv4',
                    'num_filters': 512,
                    'filter_size': (3, 3),
                    'pad': 1,
                    #                     'stride':1
                    'flip_filters': False
                }),
            (
                ConvLayer,
                {
                    'name': 'conv5',
                    'num_filters': 512,
                    'filter_size': (3, 3),
                    'pad': 1,
                    #                     'stride':1
                    'flip_filters': False
                }),
            (PoolLayer, {
                'name': 'pool5',
                'pool_size': (3, 3),
                'stride': 3,
                'ignore_border': False
            }),
            (DenseLayer, {
                'name': 'fc6',
                'num_units': 4096
            }),
            (DropoutLayer, {
                'name': 'drop6',
                'p': .5
            }),
            (DenseLayer, {
                'name': 'fc7',
                'num_units': 4096
            }),
        ],

        #        # optimization method:
        update=nesterov_momentum,
        update_learning_rate=0.01,
        update_momentum=0.9,

        #  Do not need these unless trainng the net.
        #     regression=True,  # flag to indicate we're dealing with regression problem
        #     max_epochs=400,  # we want to train this many epochs
        #     verbose=1,
    )

    # upload pretrained weights
    vgg_nn.initialize()
    vgg_nn.load_params_from('./vgg_nolearn_saved_wts_biases.pkl')

    # upload mean image
    model = pickle.load(open('./vgg_cnn_s.pkl'))
    mean_image = model['mean image']

    # pickel the model and the mean image
    with open("/data/mean_image.pkl", 'w') as f:
        pickle.dump(mean_image, f)
    with open("/data/full_vgg.pkl", 'w') as f:
        pickle.dump(vgg_nn, f)

    return vgg_net, mean_image
Beispiel #22
0
def main():

    seed = 12345
    np.random.seed(seed)
    set_lasagne_rng(RandomState(seed))

    LOOKUP_PATH = os.path.join(WDIR, 'data', 'HIV.pkl')
    lookup = pickle.load(open(LOOKUP_PATH, 'rb'))
    data_list = lookup['data']
    y = lookup['y']
    labels = lookup['labels']
    nmark = len(labels)

    # event occurence list
    occurred = [x for i, x in enumerate(data_list) if y[i, 1] == 1]
    not_occurred = [x for i, x in enumerate(data_list) if y[i, 1] == 0]
    y1 = y[y[:, 1] == 1]
    y0 = y[y[:, 1] == 0]

    # split the examples randomly into a training (2/3) and test (1/3) cohort
    # both cohorts should contain equal percentage of cencored data
    sep1 = len(y1) / 3
    sep0 = len(y0) / 3

    # include only uncensored data from the training cohort for training CellCnn
    tr_list = occurred[sep1:]
    tr_stime = y1[sep1:, 0].astype(float)

    # transform survival times to [-1, 1] interval by ranking them
    tr_stime = (ss.rankdata(tr_stime) / (0.5 * len(tr_stime))) - 1

    # fit scaler to all training data
    sc = StandardScaler()
    sc.fit(np.vstack(occurred[sep1:] + not_occurred[sep0:]))
    tr_list = [sc.transform(x) for x in tr_list]

    # the test cohort
    validation_list = [
        sc.transform(x) for x in (occurred[:sep1] + not_occurred[:sep0])
    ]
    y_valid = np.vstack([y1[:sep1], y0[:sep0]])

    # cross validation on the training cohort
    nfold = 10
    nfilter = 3

    skf = KFold(len(tr_list), n_folds=nfold, shuffle=True)
    committee = []
    valid_accuracy = []
    accum_w = np.empty((nfilter * nfold, nmark + 2))

    for ifold, (train_index, test_index) in enumerate(skf):
        cv_train_samples = [tr_list[t_idx] for t_idx in train_index]
        cv_test_samples = [tr_list[t_idx] for t_idx in test_index]
        cv_y_train = list(tr_stime[train_index])
        cv_y_test = list(tr_stime[test_index])

        results = train_model(cv_train_samples,
                              cv_y_train,
                              labels,
                              valid_samples=cv_test_samples,
                              valid_phenotypes=cv_y_test,
                              ncell=500,
                              nsubset=200,
                              subset_selection='random',
                              nrun=3,
                              pooling='mean',
                              regression=True,
                              nfilter=nfilter,
                              learning_rate=0.03,
                              momentum=0.9,
                              l2_weight_decay_conv=1e-8,
                              l2_weight_decay_out=1e-8,
                              max_epochs=20,
                              verbose=1,
                              select_filters='best',
                              accur_thres=-1)

        net_dict = results['best_net']

        # update the committee of networks
        committee.append(net_dict)
        valid_accuracy.append(results['best_accuracy'])
        w_tot = param_vector(net_dict, regression=True)

        # add weights to accumulator
        accum_w[ifold * nfilter:(ifold + 1) * nfilter] = w_tot

    save_path = os.path.join(OUTDIR, 'network_committee.pkl')
    with open(save_path, 'wb') as f:
        pickle.dump((committee, valid_accuracy), f, -1)
    '''
    committee, valid_accuracy = pickle.load(open(save_path, 'r'))    
    # retrieve the filter weights
    for ifold, net_dict in enumerate(committee):
        w_tot = param_vector(net_dict, regression=True)
                
        # add weights to accumulator    
        accum_w[ifold*nfilter:(ifold+1)*nfilter] = w_tot
    '''

    # choose the strong signatures (all of them)
    w_strong = accum_w

    # members of each cluster should have cosine similarity > 0.7
    # equivalently, cosine distance < 0.3
    Z = linkage(w_strong, 'average', metric='cosine')
    clusters = fcluster(Z, .3, criterion='distance') - 1

    n_clusters = len(np.unique(clusters))
    print '%d clusters chosen' % (n_clusters)

    # plot the discovered filter profiles
    plt.figure(figsize=(3, 2))
    idx = range(nmark) + [nmark + 1]
    clmap = sns.clustermap(pd.DataFrame(w_strong[:, idx],
                                        columns=labels + ['survival']),
                           method='average',
                           metric='cosine',
                           row_linkage=Z,
                           col_cluster=False,
                           robust=True,
                           yticklabels=clusters)
    clmap.cax.set_visible(False)
    fig_path = os.path.join(OUTDIR, 'HIV_clmap.eps')
    clmap.savefig(fig_path, format='eps')
    plt.close()

    # generate the consensus filter profiles
    c = Counter(clusters)
    cons = []
    for key, val in c.items():
        if val > nfold / 2:
            cons.append(np.mean(w_strong[clusters == key], axis=0))
    cons_mat = np.vstack(cons)

    # plot the consensus filter profiles
    plt.figure(figsize=(10, 3))
    idx = range(nmark) + [nmark + 1]
    ax = sns.heatmap(pd.DataFrame(cons_mat[:, idx],
                                  columns=labels + ['survival']),
                     robust=True,
                     yticklabels=False)
    plt.xticks(rotation=90)
    ax.tick_params(axis='both', which='major', labelsize=20)
    plt.tight_layout()
    fig_path = os.path.join(OUTDIR, 'clmap_consensus.eps')
    plt.savefig(fig_path, format='eps')
    plt.close()

    # create an ensemble of neural networks
    ncell_cons = 3000
    ncell_voter = 3000
    layers_voter = [(layers.InputLayer, {
        'name': 'input',
        'shape': (None, nmark, ncell_voter)
    }),
                    (layers.Conv1DLayer, {
                        'name': 'conv',
                        'num_filters': nfilter,
                        'filter_size': 1
                    }),
                    (layers.Pool1DLayer, {
                        'name': 'meanPool',
                        'pool_size': ncell_voter,
                        'mode': 'average_exc_pad'
                    }),
                    (layers.DenseLayer, {
                        'name': 'output',
                        'num_units': 1,
                        'nonlinearity': T.tanh
                    })]

    # predict on the test cohort
    small_data_list_v = [
        x[:ncell_cons].T.reshape(1, nmark, ncell_cons) for x in validation_list
    ]
    data_v = np.vstack(small_data_list_v)
    stime, censor = y_valid[:, 0], y_valid[:, 1]

    # committee of the best nfold/2 models
    voter_risk_pred = list()
    for ifold in np.argsort(valid_accuracy):
        voter = NeuralNet(layers=layers_voter,
                          update=nesterov_momentum,
                          update_learning_rate=0.001,
                          regression=True,
                          max_epochs=5,
                          verbose=0)
        voter.load_params_from(committee[ifold])
        voter.initialize()
        # rank the risk predictions
        voter_risk_pred.append(ss.rankdata(-np.squeeze(voter.predict(data_v))))
    all_voters = np.vstack(voter_risk_pred)

    # compute mean rank per individual
    risk_p = np.mean(all_voters, axis=0)
    g1 = np.squeeze(risk_p > np.median(risk_p))
    voters_pval_v = logrank_pval(stime, censor, g1)
    fig_v = os.path.join(OUTDIR, 'cellCnn_cox_test.eps')
    plot_KM(stime, censor, g1, voters_pval_v, fig_v)

    # filter-activating cells
    data_t = np.vstack(small_data_list_v)
    data_stack = np.vstack([x for x in np.swapaxes(data_t, 2, 1)])

    # finally define a network from the consensus filters
    nfilter_cons = cons_mat.shape[0]
    ncell_cons = 3000
    layers_cons = [(layers.InputLayer, {
        'name': 'input',
        'shape': (None, nmark, ncell_cons)
    }),
                   (layers.Conv1DLayer, {
                       'name': 'conv',
                       'b': init.Constant(cons_mat[:, -2]),
                       'W': cons_mat[:, :-2].reshape(nfilter_cons, nmark, 1),
                       'num_filters': nfilter_cons,
                       'filter_size': 1
                   }),
                   (layers.Pool1DLayer, {
                       'name': 'meanPool',
                       'pool_size': ncell_cons,
                       'mode': 'average_exc_pad'
                   }),
                   (layers.DenseLayer, {
                       'name': 'output',
                       'num_units': 1,
                       'W': np.sign(cons_mat[:, -1:]),
                       'b': init.Constant(0.),
                       'nonlinearity': T.tanh
                   })]

    net_cons = NeuralNet(layers=layers_cons,
                         update=nesterov_momentum,
                         update_learning_rate=0.001,
                         regression=True,
                         max_epochs=5,
                         verbose=0)
    net_cons.initialize()

    # get the representation after mean pooling
    xs = T.tensor3('xs').astype(theano.config.floatX)
    act_conv = theano.function([xs], lh.get_output(net_cons.layers_['conv'],
                                                   xs))

    # and apply to the test data
    act_tot = act_conv(data_t)
    act_tot = np.swapaxes(act_tot, 2, 1)
    act_stack = np.vstack([x for x in act_tot])
    idx = range(7) + [8, 9]

    for i_map in range(nfilter_cons):
        val = act_stack[:, i_map]
        descending_order = np.argsort(val)[::-1]
        val_cumsum = np.cumsum(val[descending_order])
        data_sorted = data_stack[descending_order]
        thres = 0.75 * val_cumsum[-1]
        res_data = data_sorted[val_cumsum < thres]
        fig_path = os.path.join(OUTDIR, 'filter_' + str(i_map) + '_active.eps')
        plot_marker_distribution([res_data[:, idx], data_stack[:, idx]],
                                 ['filter ' + str(i_map), 'all'],
                                 [labels[l]
                                  for l in idx], (3, 3), fig_path, 24)
Beispiel #23
0
class LasagneToNolearn(object):
    """This class builds the VGG_CNN_S model from pickled weights and
    biases (vgg_cnn_s.pkl) in Lasagne and converts the model for use in Nolearn.
    Nolearn is a Lasagne wrapper that is used here to facilitate and increase speed
    of vectorizing images.

    VGG_CNN_S is a Convolutional Neural Network (CNN) trained by the Visual
    Geometry Group at Oxford Univeristy. More information on this CNN can be
    found elsewhere:

    The Devil is in the Details: An evaluation of recent feature encoding methods
    K. Chatfield, V. Lempitsky, A. Vedaldi and A. Zisserman, In Proc. BMVC, 2011.
    http://www.robots.ox.ac.uk/~vgg/research/deep_eval/

    vgg_cnn_s.pkl was obtained from the Lasagne Model Zoo:
    https://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg_cnn_s.pkl
    """
    def __init__(self, path_to_pkl):
        '''
        INPUT: Local path to vgg_cnn_s.pkl
        OUTPUT:

        Points to path of the stored weights and biases.
        '''
        self.path_to_pkl = path_to_pkl

    def lasagne_layers_method(self):
        '''
        INPUT: None
        OUTPUT: Dict

        Creates dictionary of vgg_cnn_s model Lasagne layer objects. Here the
        original output layer (softmax, 1000 classes) has been removed and
        the output layer returns a vector of shape (1,4096).
        '''
        # Create dictionary of VGG_CNN_S model layers
        self.lasagne_layers = {}
        self.lasagne_layers['input'] = InputLayer((None, 3, 224, 224))
        self.lasagne_layers['conv1'] = ConvLayer(self.lasagne_layers['input'],
                                                 num_filters=96,
                                                 filter_size=7,
                                                 stride=2,
                                                 flip_filters=False)
        self.lasagne_layers['norm1'] = NormLayer(self.lasagne_layers['conv1'],
                                                 alpha=0.0001)
        self.lasagne_layers['pool1'] = PoolLayer(self.lasagne_layers['norm1'],
                                                 pool_size=3,
                                                 stride=3,
                                                 ignore_border=False)
        self.lasagne_layers['conv2'] = ConvLayer(self.lasagne_layers['pool1'],
                                                 num_filters=256,
                                                 filter_size=5,
                                                 flip_filters=False)
        self.lasagne_layers['pool2'] = PoolLayer(self.lasagne_layers['conv2'],
                                                 pool_size=2,
                                                 stride=2,
                                                 ignore_border=False)
        self.lasagne_layers['conv3'] = ConvLayer(self.lasagne_layers['pool2'],
                                                 num_filters=512,
                                                 filter_size=3,
                                                 pad=1,
                                                 flip_filters=False)
        self.lasagne_layers['conv4'] = ConvLayer(self.lasagne_layers['conv3'],
                                                 num_filters=512,
                                                 filter_size=3,
                                                 pad=1,
                                                 flip_filters=False)
        self.lasagne_layers['conv5'] = ConvLayer(self.lasagne_layers['conv4'],
                                                 num_filters=512,
                                                 filter_size=3,
                                                 pad=1,
                                                 flip_filters=False)
        self.lasagne_layers['pool5'] = PoolLayer(self.lasagne_layers['conv5'],
                                                 pool_size=3,
                                                 stride=3,
                                                 ignore_border=False)
        self.lasagne_layers['fc6'] = DenseLayer(self.lasagne_layers['pool5'],
                                                num_units=4096)
        self.lasagne_layers['drop6'] = DropoutLayer(self.lasagne_layers['fc6'],
                                                    p=0.5)
        self.lasagne_layers['fc7'] = DenseLayer(self.lasagne_layers['drop6'],
                                                num_units=4096)

    def build_lasagne(self):
        '''
        INPUT: None
        OUTPUT: None

        Builds the CNN model using Lasagne.
        '''
        model = pickle.load(open(self.path_to_pkl))
        output_layer = self.lasagne_layers['fc7']
        self.mean_image = model['mean image']
        lasagne.layers.set_all_param_values(output_layer, model['values'][:14])

    def extract_layers(self):
        '''
        INPUT: None
        OUTPUT: None

        Extracts relavent layers from Lasagne model for use with Nolearn model.
        '''
        self.extracted_layers = {}
        for layer in self.lasagne_layers:
            if layer[:4] != 'drop' and layer != 'input' and \
                layer[:4] != 'pool' and layer[:4] != 'norm':
                self.extracted_layers[layer] = [
                    self.lasagne_layers[layer].W.get_value(),
                    self.lasagne_layers[layer].b.get_value()
                ]

    def nolearn_layers_method(self):
        '''
        INPUT: None
        OUTPUT: None

        Creates list of layers for Nolearn model.
        '''
        self.nolearn_layers = [(InputLayer, {
            'name': 'input',
            'shape': (None, 3, 224, 224)
        }),
                               (ConvLayer, {
                                   'name': 'conv1',
                                   'num_filters': 96,
                                   'filter_size': (7, 7),
                                   'stride': 2,
                                   'flip_filters': False,
                                   'W': self.extracted_layers['conv1'][0],
                                   'b': self.extracted_layers['conv1'][1]
                               }),
                               (NormLayer, {
                                   'name': 'norm11',
                                   'alpha': .0001
                               }),
                               (PoolLayer, {
                                   'name': 'pool1',
                                   'pool_size': (3, 3),
                                   'stride': 3,
                                   'ignore_border': False
                               }),
                               (ConvLayer, {
                                   'name': 'conv2',
                                   'num_filters': 256,
                                   'filter_size': (5, 5),
                                   'flip_filters': False,
                                   'W': self.extracted_layers['conv2'][0],
                                   'b': self.extracted_layers['conv2'][1]
                               }),
                               (PoolLayer, {
                                   'name': 'pool2',
                                   'pool_size': (2, 2),
                                   'stride': 2,
                                   'ignore_border': False
                               }),
                               (ConvLayer, {
                                   'name': 'conv3',
                                   'num_filters': 512,
                                   'filter_size': (3, 3),
                                   'flip_filters': False,
                                   'pad': 1,
                                   'W': self.extracted_layers['conv3'][0],
                                   'b': self.extracted_layers['conv3'][1]
                               }),
                               (ConvLayer, {
                                   'name': 'conv4',
                                   'num_filters': 512,
                                   'filter_size': (3, 3),
                                   'flip_filters': False,
                                   'pad': 1,
                                   'W': self.extracted_layers['conv4'][0],
                                   'b': self.extracted_layers['conv4'][1]
                               }),
                               (ConvLayer, {
                                   'name': 'conv5',
                                   'num_filters': 512,
                                   'filter_size': (3, 3),
                                   'flip_filters': False,
                                   'pad': 1,
                                   'W': self.extracted_layers['conv5'][0],
                                   'b': self.extracted_layers['conv5'][1]
                               }),
                               (PoolLayer, {
                                   'name': 'pool5',
                                   'pool_size': (3, 3),
                                   'stride': 3,
                                   'ignore_border': False
                               }),
                               (DenseLayer, {
                                   'name': 'fc6',
                                   'num_units': 4096,
                                   'W': self.extracted_layers['fc6'][0],
                                   'b': self.extracted_layers['fc6'][1]
                               }), (DropoutLayer, {
                                   'name': 'drop6',
                                   'p': 0.5
                               }),
                               (DenseLayer, {
                                   'name': 'fc7',
                                   'num_units': 4096,
                                   'W': self.extracted_layers['fc7'][0],
                                   'b': self.extracted_layers['fc7'][1]
                               })]

    def build_nolearn(self):
        '''
        INPUT: None
        OUTPUT: None

        Builds CNN model using Nolearn.
        '''
        self.nolearn_layers_method()
        self.nn = NeuralNet(layers=self.nolearn_layers,
                            update=adam,
                            update_learning_rate=0.0002)
        self.nn.initialize()

    def to_pickle(self, path):
        '''
        INPUT: Local path where pickle files will be stored
        OUTPUT: Two pickle files

        Pickles the Nolearn model as well as the mean image.
        '''
        joblib.dump(self.nn,
                    '/home/ubuntu/vintage-classifier/pkls/nolearn_nn.pkl',
                    compress=9)
        joblib.dump(self.mean_image,
                    '/home/ubuntu/vintage-classifier/pkls/mean_image.pkl',
                    compress=9)
Beispiel #24
0
    input_shape=(None, num_features),
    dense_num_units=64,
    narrow_num_units=48,
    denseReverse1_num_units=64,
    denseReverse2_num_units=128,
    output_num_units=128,

    #input_nonlinearity = None, #nonlinearities.sigmoid,
    #dense_nonlinearity = nonlinearities.tanh,
    narrow_nonlinearity=nonlinearities.softplus,
    #denseReverse1_nonlinearity = nonlinearities.tanh,
    denseReverse2_nonlinearity=nonlinearities.softplus,
    output_nonlinearity=nonlinearities.linear,  #nonlinearities.softmax,

    #dropout0_p=0.1,
    dropout1_p=0.01,
    dropout2_p=0.001,
    regression=True,
    verbose=1)

ae.initialize()
PrintLayerInfo()(ae)

maybe_this_is_a_history = ae.fit(Z, Z)

#learned_parameters = ae.get_all_params_values()
#np.save("task4/learned_parameter.npy", learned_parameters)

#SaveWeights(path='task4/koebi_train_history_AE')(ae, maybe_this_is_a_history)
ae.save_params_to('task4/koebi_train_history_AE2')
Beispiel #25
0
def make_net(
    NFEATS,
    name='hidden1_size',
    dense1_size=60,
    dense1_nonlinearity='tanh',
    dense1_init='orthogonal',
    dense2_size=None,
    dense2_nonlinearity=None,  # inherits dense1
    dense2_init=None,  # inherits dense1
    dense3_size=None,
    dense3_nonlinearity=None,  # inherits dense2
    dense3_init=None,  # inherits dense2
    learning_rate=0.001,
    learning_rate_scaling=100,
    momentum=0.9,
    momentum_scaling=100,
    max_epochs=3000,
    dropout1_rate=None,
    dropout2_rate=None,  # inherits dropout1_rate
    dropout3_rate=None,
    weight_decay=0,
    output_nonlinearity='softmax',
    auto_stopping=True,
    pretrain=False,
    save_snapshots_stepsize=None,
    verbosity=VERBOSITY >= 2,
):
    """
		Create the network with the selected parameters.

		:param name: Name for save files
		:param dense1_size: Number of neurons for first hidden layer
		:param dense1_nonlinearity: The activation function for the first hidden layer
		:param dense1_init: The weight initialization for the first hidden layer
		:param learning_rate_start: Start value at first epoch (logarithmic scale)
		:param learning_rate_end: End value at last epoch (logarithmic scale)
		:param momentum_start: Start value at first epoch (logarithmic scale)
		:param momentum_end: End value at last epoch (logarithmic scale)
		:param max_epochs: Total number of epochs (at most)
		:param dropout1_rate: Percentage of connections dropped each step.
		:param weight_decay: Constrain the weights by L2 norm.
		:param auto_stopping: Stop early if the network seems to stop performing well.
		:param pretrain: Filepath of the previous weights to start at (or None).
		:return:
	"""
    """
		Initial arguments checks and defaults.
	"""
    assert dropout1_rate is None or 0 <= dropout1_rate < 1, 'Dropout rate 1 should be a value between 0 and 1'
    assert dropout2_rate is None or 0 <= dropout1_rate < 1, 'Dropout rate 2 should be a value between 0 and 1, or None for inheritance'
    assert dropout3_rate is None or 0 <= dropout1_rate < 1, 'Dropout rate 3 should be a value between 0 and 1, or None for inheritance'
    assert dense1_nonlinearity in nonlinearities.keys(
    ), 'Linearity 1 should be one of "{0}", got "{1}" instead.'.format(
        '", "'.join(nonlinearities.keys()), dense1_nonlinearity)
    assert dense2_nonlinearity in nonlinearities.keys() + [
        None
    ], 'Linearity 2 should be one of "{0}", got "{1}" instead.'.format(
        '", "'.join(nonlinearities.keys()), dense2_nonlinearity)
    assert dense3_nonlinearity in nonlinearities.keys() + [
        None
    ], 'Linearity 3 should be one of "{0}", got "{1}" instead.'.format(
        '", "'.join(nonlinearities.keys()), dense3_nonlinearity)
    assert dense1_init in initializers.keys(
    ), 'Initializer 1 should be one of "{0}", got "{1}" instead.'.format(
        '", "'.join(initializers.keys()), dense1_init)
    assert dense2_init in initializers.keys() + [
        None
    ], 'Initializer 2 should be one of "{0}", got "{1}" instead.'.format(
        '", "'.join(initializers.keys()), dense2_init)
    assert dense3_init in initializers.keys() + [
        None
    ], 'Initializer 3 should be one of "{0}", got "{1}" instead.'.format(
        '", "'.join(initializers.keys()), dense3_init)

    if dense2_nonlinearity is None:
        dense2_nonlinearity = dense1_nonlinearity
    if dense2_init is None:
        dense2_init = dense1_init
    if dense3_nonlinearity is None:
        dense3_nonlinearity = dense2_nonlinearity
    if dense3_init is None:
        dense3_init = dense2_init
    if dropout2_rate is None and dense2_size:
        dropout2_rate = dropout1_rate
    if dropout3_rate is None and dense3_size:
        dropout3_rate = dropout2_rate
    """
		Create the layers and their settings.
	"""
    params = {}
    layers = [
        ('input', InputLayer),
        ('dense1', DenseLayer),
    ]
    if dropout1_rate:
        layers += [('dropout1', DropoutLayer)]
        params['dropout1_p'] = dropout1_rate
    if dense2_size:
        layers += [('dense2', DenseLayer)]
        params.update({
            'dense2_num_units':
            dense2_size,
            'dense2_nonlinearity':
            nonlinearities[dense2_nonlinearity],
            'dense2_W':
            initializers[dense2_init],
            'dense2_b':
            Constant(0.),
        })
    else:
        assert dense3_size is None, 'There cannot be a third dense layer without a second one'
    if dropout2_rate:
        assert dense2_size is not None, 'There cannot be a second dropout layer without a second dense layer.'
        layers += [('dropout2', DropoutLayer)]
        params['dropout2_p'] = dropout2_rate
    if dense3_size:
        layers += [('dense3', DenseLayer)]
        params.update({
            'dense3_num_units':
            dense3_size,
            'dense3_nonlinearity':
            nonlinearities[dense3_nonlinearity],
            'dense3_W':
            initializers[dense3_init],
            'dense3_b':
            Constant(0.),
        })
    if dropout3_rate:
        assert dense2_size is not None, 'There cannot be a third dropout layer without a third dense layer.'
        layers += [('dropout3', DropoutLayer)]
        params['dropout3_p'] = dropout2_rate
    layers += [('output', DenseLayer)]
    """
		Create meta parameters and special handlers.
	"""
    if VERBOSITY >= 3:
        print 'learning rate: {0:.6f} -> {1:.6f}'.format(
            learning_rate, learning_rate / float(learning_rate_scaling))
        print 'momentum:      {0:.6f} -> {1:.6f}'.format(
            momentum, 1 - ((1 - momentum) / float(momentum_scaling)))
    handlers = [
        LogarithmicVariable('update_learning_rate',
                            start=learning_rate,
                            stop=learning_rate / float(learning_rate_scaling)),
        LogarithmicVariable('update_momentum',
                            start=momentum,
                            stop=1 -
                            ((1 - momentum) / float(momentum_scaling))),
        StopNaN(),
    ]
    snapshot_name = 'nn_' + params_name(params, prefix=name)[0]
    if save_snapshots_stepsize:
        handlers += [
            SnapshotStepSaver(every=save_snapshots_stepsize,
                              base_name=snapshot_name),
        ]
    if auto_stopping:
        handlers += [
            StopWhenOverfitting(loss_fraction=0.8, base_name=snapshot_name),
            StopAfterMinimum(patience=40, base_name=name),
        ]
    """
		Create the actual nolearn network with above information.
	"""
    net = NeuralNet(layers=layers,
                    objective=partial(WeightDecayObjective,
                                      weight_decay=weight_decay),
                    input_shape=(None, NFEATS),
                    dense1_num_units=dense1_size,
                    dense1_nonlinearity=nonlinearities[dense1_nonlinearity],
                    dense1_W=initializers[dense1_init],
                    dense1_b=Constant(0.),
                    output_nonlinearity=nonlinearities[output_nonlinearity],
                    output_num_units=NCLASSES,
                    output_W=Orthogonal(),
                    update=nesterov_momentum,
                    update_learning_rate=shared(float32(learning_rate)),
                    update_momentum=shared(float32(momentum)),
                    on_epoch_finished=handlers,
                    regression=False,
                    max_epochs=max_epochs,
                    verbose=verbosity,
                    **params)

    net.initialize()
    """
		Load weights from earlier training (by name, no auto-choosing).
	"""
    if pretrain:
        assert isfile(pretrain), 'Pre-train file "{0:s}" not found'.format(
            pretrain)
        load_knowledge(net, pretrain)

    return net
def make_grnn(batch_size, emb_size, g_hidden_size, word_n,
              wc_num, dence, wsm_num=1, rnn_type='LSTM',
              rnn_size=12, dropout_d=0.5,# pooling='mean',
              quest_na=4, gradient_steps = -1,
              valid_indices=None, lr=0.05, grad_clip=10):

    def select_rnn(x):
        return {
            'RNN': LL.RecurrentLayer,
            'LSTM': LL.LSTMLayer,
            'GRU': LL.GRULayer,
        }.get(x, LL.LSTMLayer)
    
#    dence = dence + [1]
    RNN = select_rnn(rnn_type)
#------------------------------------------------------------------input layers
    layers = [
        (LL.InputLayer, {'name': 'l_in_se_q', 'shape': (None, word_n, emb_size)}),
        (LL.InputLayer, {'name': 'l_in_se_a', 'shape': (None, quest_na, word_n, emb_size)}),
        (LL.InputLayer, {'name': 'l_in_mask_q', 'shape': (None, word_n)}),
        (LL.InputLayer, {'name': 'l_in_mask_a', 'shape': (None, quest_na, word_n)}),
        (LL.InputLayer, {'name': 'l_in_mask_ri_q', 'shape': (None, word_n)}),
        (LL.InputLayer, {'name': 'l_in_mask_ri_a', 'shape': (None, quest_na, word_n)}),
        (LL.InputLayer, {'name': 'l_in_wt_q', 'shape': (None, word_n, word_n)}),
        (LL.InputLayer, {'name': 'l_in_wt_a', 'shape': (None, word_n, quest_na, word_n)}),
        (LL.InputLayer, {'name': 'l_in_act_', 'shape': (None, word_n, g_hidden_size)}),
        (LL.InputLayer, {'name': 'l_in_act__', 'shape': (None, word_n, word_n, g_hidden_size)}),
    ]
#------------------------------------------------------------------slice layers
#    l_qs = []
#    l_cas = []
    l_ase_names = ['l_ase_{}'.format(i) for i in range(quest_na)]
    l_amask_names = ['l_amask_{}'.format(i) for i in range(quest_na)]
    l_amask_ri_names = ['l_amask_ri_{}'.format(i) for i in range(quest_na)]
    l_awt_names = ['l_awt_{}'.format(i) for i in range(quest_na)]
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {'name': l_ase_names[i], 'incoming': 'l_in_se_a',
                                        'indices': i, 'axis': 1})])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {'name': l_amask_names[i], 'incoming': 'l_in_mask_a',
                                        'indices': i, 'axis': 1})])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {'name': l_amask_ri_names[i], 'incoming': 'l_in_mask_ri_a',
                                        'indices': i, 'axis': 1})])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {'name': l_awt_names[i], 'incoming': 'l_in_wt_a',
                                        'indices': i, 'axis': 1})])
#-------------------------------------------------------------------GRNN layers
    WC = theano.shared(np.random.randn(wc_num, g_hidden_size, g_hidden_size).astype('float32'))
#    WC = LI.Normal(0.1)
    WSM = theano.shared(np.random.randn(emb_size, g_hidden_size).astype('float32'))
    b = theano.shared(np.ones(g_hidden_size).astype('float32'))
#    b = lasagne.init.Constant(1.0)
    layers.extend([(GRNNLayer, {'name': 'l_q_grnn',
                                'incomings': ['l_in_se_q', 'l_in_mask_q', 'l_in_wt_q', 'l_in_act_', 'l_in_act__'],
                                'emb_size': emb_size, 'hidden_size': g_hidden_size,
                                'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num,
                                'only_return_final': False,
                                'WC': WC, 'WSM': WSM, 'b': b})])
    l_a_grnns_names = ['l_a_grnn_{}'.format(i) for i in range(quest_na)]
    for i, l_a_grnns_name in enumerate(l_a_grnns_names):
        layers.extend([(GRNNLayer, {'name': l_a_grnns_name,
                                    'incomings': [l_ase_names[i], l_amask_names[i], l_awt_names[i], 'l_in_act_', 'l_in_act__'],
                                    'emb_size': emb_size, 'hidden_size': g_hidden_size,
                                    'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num,
                                    'only_return_final': False,
                                    'WC': WC, 'WSM': WSM, 'b': b})])
#------------------------------------------------------------concatenate layers
    layers.extend([(LL.ConcatLayer, {'name': 'l_qa_concat',
                                     'incomings': ['l_q_grnn'] + l_a_grnns_names})])
    layers.extend([(LL.ConcatLayer, {'name': 'l_qamask_concat',
                                     'incomings': ['l_in_mask_ri_q'] + l_amask_ri_names})])
#--------------------------------------------------------------------RNN layers
    layers.extend([(RNN, {'name': 'l_qa_rnn_f', 'incoming': 'l_qa_concat',
                          'mask_input': 'l_qamask_concat',
                          'num_units': rnn_size,
                          'backwards': False, 'only_return_final': True,
                          'grad_clipping': grad_clip})])
    layers.extend([(RNN, {'name': 'l_qa_rnn_b', 'incoming': 'l_qa_concat',
                          'mask_input': 'l_qamask_concat',
                          'num_units': rnn_size,
                          'backwards': True, 'only_return_final': True,
                          'grad_clipping': grad_clip})])
    layers.extend([(LL.ElemwiseSumLayer, {'name': 'l_qa_rnn_conc',
                                          'incomings': ['l_qa_rnn_f', 'l_qa_rnn_b']})])
##-----------------------------------------------------------------pooling layer
##    l_qa_pool = layers.extend([(LL.ExpressionLayer, {'name': 'l_qa_pool',
##                                                     'incoming': l_qa_rnn_conc,
##                                                     'function': lambda X: X.mean(-1),
##                                                     'output_shape'='auto'})])
#------------------------------------------------------------------dence layers
    l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence)]
    if dropout_d:
        layers.extend([(LL.DropoutLayer, {'name': 'l_dence_do' + 'do', 'p': dropout_d})])
    for i, d in enumerate(dence):
        if i < len(dence) - 1:
            nonlin = LN.tanh
        else:
            nonlin = LN.softmax
        layers.extend([(LL.DenseLayer, {'name': l_dence_names[i], 'num_units': d,
                                        'nonlinearity': nonlin})])
        if i < len(dence) - 1 and dropout_d:
            layers.extend([(LL.DropoutLayer, {'name': l_dence_names[i] + 'do', 'p': dropout_d})])


    def loss(x, t):
        return LO.aggregate(LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))
#        return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t))

    if isinstance(valid_indices, np.ndarray) or isinstance(valid_indices, list):
        train_split=TrainSplit_indices(valid_indices=valid_indices)
    else:
        train_split=TrainSplit(eval_size=valid_indices, stratify=False)
    nnet = NeuralNet(
            y_tensor_type=T.ivector,
            layers=layers,
            update=LU.adagrad,
            update_learning_rate=lr,
#            update_epsilon=1e-7,
            objective_loss_function=loss,
            regression=False,
            verbose=2,
            batch_iterator_train=PermIterator(batch_size=batch_size),
            batch_iterator_test=BatchIterator(batch_size=batch_size/2),
#            batch_iterator_train=BatchIterator(batch_size=batch_size),
#            batch_iterator_test=BatchIterator(batch_size=batch_size),            
            #train_split=TrainSplit(eval_size=eval_size)
            train_split=train_split
        )
    nnet.initialize()
    PrintLayerInfo()(nnet)
    return nnet
def make_grnn(
        batch_size,
        emb_size,
        g_hidden_size,
        word_n,
        wc_num,
        dence,
        wsm_num=1,
        rnn_type='LSTM',
        rnn_size=12,
        dropout_d=0.5,  # pooling='mean',
        quest_na=4,
        gradient_steps=-1,
        valid_indices=None,
        lr=0.05,
        grad_clip=10):
    def select_rnn(x):
        return {
            'RNN': LL.RecurrentLayer,
            'LSTM': LL.LSTMLayer,
            'GRU': LL.GRULayer,
        }.get(x, LL.LSTMLayer)

#    dence = dence + [1]

    RNN = select_rnn(rnn_type)
    #------------------------------------------------------------------input layers
    layers = [
        (LL.InputLayer, {
            'name': 'l_in_se_q',
            'shape': (None, word_n, emb_size)
        }),
        (LL.InputLayer, {
            'name': 'l_in_se_a',
            'shape': (None, quest_na, word_n, emb_size)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_q',
            'shape': (None, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_a',
            'shape': (None, quest_na, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_ri_q',
            'shape': (None, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_mask_ri_a',
            'shape': (None, quest_na, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_wt_q',
            'shape': (None, word_n, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_wt_a',
            'shape': (None, word_n, quest_na, word_n)
        }),
        (LL.InputLayer, {
            'name': 'l_in_act_',
            'shape': (None, word_n, g_hidden_size)
        }),
        (LL.InputLayer, {
            'name': 'l_in_act__',
            'shape': (None, word_n, word_n, g_hidden_size)
        }),
    ]
    #------------------------------------------------------------------slice layers
    #    l_qs = []
    #    l_cas = []
    l_ase_names = ['l_ase_{}'.format(i) for i in range(quest_na)]
    l_amask_names = ['l_amask_{}'.format(i) for i in range(quest_na)]
    l_amask_ri_names = ['l_amask_ri_{}'.format(i) for i in range(quest_na)]
    l_awt_names = ['l_awt_{}'.format(i) for i in range(quest_na)]
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_ase_names[i],
            'incoming': 'l_in_se_a',
            'indices': i,
            'axis': 1
        })])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_amask_names[i],
            'incoming': 'l_in_mask_a',
            'indices': i,
            'axis': 1
        })])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_amask_ri_names[i],
            'incoming': 'l_in_mask_ri_a',
            'indices': i,
            'axis': 1
        })])
    for i in range(quest_na):
        layers.extend([(LL.SliceLayer, {
            'name': l_awt_names[i],
            'incoming': 'l_in_wt_a',
            'indices': i,
            'axis': 1
        })])
#-------------------------------------------------------------------GRNN layers
    WC = theano.shared(
        np.random.randn(wc_num, g_hidden_size,
                        g_hidden_size).astype('float32'))
    #    WC = LI.Normal(0.1)
    WSM = theano.shared(
        np.random.randn(emb_size, g_hidden_size).astype('float32'))
    b = theano.shared(np.ones(g_hidden_size).astype('float32'))
    #    b = lasagne.init.Constant(1.0)
    layers.extend([(GRNNLayer, {
        'name':
        'l_q_grnn',
        'incomings':
        ['l_in_se_q', 'l_in_mask_q', 'l_in_wt_q', 'l_in_act_', 'l_in_act__'],
        'emb_size':
        emb_size,
        'hidden_size':
        g_hidden_size,
        'word_n':
        word_n,
        'wc_num':
        wc_num,
        'wsm_num':
        wsm_num,
        'only_return_final':
        False,
        'WC':
        WC,
        'WSM':
        WSM,
        'b':
        b
    })])
    l_a_grnns_names = ['l_a_grnn_{}'.format(i) for i in range(quest_na)]
    for i, l_a_grnns_name in enumerate(l_a_grnns_names):
        layers.extend([(GRNNLayer, {
            'name':
            l_a_grnns_name,
            'incomings': [
                l_ase_names[i], l_amask_names[i], l_awt_names[i], 'l_in_act_',
                'l_in_act__'
            ],
            'emb_size':
            emb_size,
            'hidden_size':
            g_hidden_size,
            'word_n':
            word_n,
            'wc_num':
            wc_num,
            'wsm_num':
            wsm_num,
            'only_return_final':
            False,
            'WC':
            WC,
            'WSM':
            WSM,
            'b':
            b
        })])
#------------------------------------------------------------concatenate layers
    layers.extend([(LL.ConcatLayer, {
        'name': 'l_qa_concat',
        'incomings': ['l_q_grnn'] + l_a_grnns_names
    })])
    layers.extend([(LL.ConcatLayer, {
        'name': 'l_qamask_concat',
        'incomings': ['l_in_mask_ri_q'] + l_amask_ri_names
    })])
    #--------------------------------------------------------------------RNN layers
    layers.extend([(RNN, {
        'name': 'l_qa_rnn_f',
        'incoming': 'l_qa_concat',
        'mask_input': 'l_qamask_concat',
        'num_units': rnn_size,
        'backwards': False,
        'only_return_final': True,
        'grad_clipping': grad_clip
    })])
    layers.extend([(RNN, {
        'name': 'l_qa_rnn_b',
        'incoming': 'l_qa_concat',
        'mask_input': 'l_qamask_concat',
        'num_units': rnn_size,
        'backwards': True,
        'only_return_final': True,
        'grad_clipping': grad_clip
    })])
    layers.extend([(LL.ElemwiseSumLayer, {
        'name': 'l_qa_rnn_conc',
        'incomings': ['l_qa_rnn_f', 'l_qa_rnn_b']
    })])
    ##-----------------------------------------------------------------pooling layer
    ##    l_qa_pool = layers.extend([(LL.ExpressionLayer, {'name': 'l_qa_pool',
    ##                                                     'incoming': l_qa_rnn_conc,
    ##                                                     'function': lambda X: X.mean(-1),
    ##                                                     'output_shape'='auto'})])
    #------------------------------------------------------------------dence layers
    l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence)]
    if dropout_d:
        layers.extend([(LL.DropoutLayer, {
            'name': 'l_dence_do' + 'do',
            'p': dropout_d
        })])
    for i, d in enumerate(dence):
        if i < len(dence) - 1:
            nonlin = LN.tanh
        else:
            nonlin = LN.softmax
        layers.extend([(LL.DenseLayer, {
            'name': l_dence_names[i],
            'num_units': d,
            'nonlinearity': nonlin
        })])
        if i < len(dence) - 1 and dropout_d:
            layers.extend([(LL.DropoutLayer, {
                'name': l_dence_names[i] + 'do',
                'p': dropout_d
            })])

    def loss(x, t):
        return LO.aggregate(
            LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))


#        return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t))

    if isinstance(valid_indices, np.ndarray) or isinstance(
            valid_indices, list):
        train_split = TrainSplit_indices(valid_indices=valid_indices)
    else:
        train_split = TrainSplit(eval_size=valid_indices, stratify=False)
    nnet = NeuralNet(
        y_tensor_type=T.ivector,
        layers=layers,
        update=LU.adagrad,
        update_learning_rate=lr,
        #            update_epsilon=1e-7,
        objective_loss_function=loss,
        regression=False,
        verbose=2,
        batch_iterator_train=PermIterator(batch_size=batch_size),
        batch_iterator_test=BatchIterator(batch_size=batch_size / 2),
        #            batch_iterator_train=BatchIterator(batch_size=batch_size),
        #            batch_iterator_test=BatchIterator(batch_size=batch_size),
        #train_split=TrainSplit(eval_size=eval_size)
        train_split=train_split)
    nnet.initialize()
    PrintLayerInfo()(nnet)
    return nnet
    ('dropout1', DropoutLayer),
    ('narrow', DenseLayer),
]
encoder = NeuralNet(
    layers=const_layers,
    update=nesterov_momentum,
    update_learning_rate=0.01,
    update_momentum=0.975,
    input_shape=(None, num_features),
    dense_num_units=64,
    narrow_num_units=num_encoder,
    narrow_nonlinearity=nonlinearities.softplus,
    regression=True,
)

encoder.initialize()
encoder.load_params_from('task4/koebi_train_history_AE2')

# encode train and test data
x_encoded = encoder.predict(X)
test_encoded = encoder.predict(test_data)
X_plus = np.hstack([X, x_encoded])
test_plus = np.hstack([test_data, test_encoded])

# supervised learning with the encoded data
dynamic_layers = [
    ('input', InputLayer),
    ('dense', DenseLayer),
    ('dropout', DropoutLayer),
    ('dense1', DenseLayer),
    ('dropout1', DropoutLayer),
        #objective_loss_function=binary_crossentropy,
        objective_loss_function=multilabel_objective,
        custom_score=("validation score", lambda x, y: 1 - np.mean(np.abs(x - y))),
        max_epochs= 1200,
		#on_epoch_finished    = [
        #    AdjustVariable('update_learning_rate',start=0.00001,stop=0.000001)
            #AdjustVariable('update_momentum',start=0.9,stop=0.999)
        #],
        batch_iterator_train=BatchIterator(batch_size=250),
        #batch_iterator_train = FlipBatchIterator(batch_size=25),
        verbose=2,
        )
    print "Training NN..."
    print datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
    X_offset = np.mean(X_train, axis = 0)
    nnet.initialize()
    layer_info = PrintLayerInfo()
    layer_info(nnet)
    nnet.fit(X_train-X_offset,y_train)

    print "Using trained model to predict"
    print datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
    y_predictions = nnet.predict(X_test-X_offset)

    print datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
    score = 0
    for i,j in zip(y_test,y_predictions):
        temp = []
        for a in j:
            if a == max(j):
                temp.append(1.)
Beispiel #30
0
        'num_units': 4096,
        'W': layer_w_b['fc6'][0],
        'b': layer_w_b['fc6'][1]
    }),
    (DropoutLayer, {
        'name': 'drop6',
        'p': 0.5
    }),
    (DenseLayer, {
        'name': 'fc7',
        'num_units': 4096,
        'W': layer_w_b['fc7'][0],
        'b': layer_w_b['fc7'][1]
    })
]

net0 = NeuralNet(
    layers=layers0,
    update=nesterov_momentum,
    update_learning_rate=0.01,
    update_momentum=0.9,
    #  regression=True,  # flag to indicate we're dealing with regression problem
    #  max_epochs=400,  # we want to train this many epochs
    verbose=1,
)

#initialize nolearn net
net0.initialize()

#save weights and biases to the file for future use
net0.save_params_to('nolearn_with_w_b.pkl')
Beispiel #31
0
# prediction set might too small to calculate a meaningful mean and standard deviation.
X_train_z = zscore(X_train, train_mean, train_sdev) #scipy.stats.mstats.zscore(X_train)
X_validate_z = zscore(X_validate, train_mean, train_sdev)  #scipy.stats.mstats.zscore(X_validate)

#These can be used to check my zscore calc to numpy
#print(X_train_z)
#print(scipy.stats.mstats.zscore(X_train))

# Provide our own validation set
def my_split(self, X, y, eval_size):
    return X_train_z,X_validate_z,y_train,y_validate

net0.train_test_split = types.MethodType(my_split, net0)

# Train the network
net0.initialize()
d = extract_weights(net0)
print("D:" + str(len(d)))

#net0.fit(X_train_z,y_train)

# Predict the validation set
pred_y = net0.predict(X_validate_z)

# Display predictions and count the number of incorrect predictions.
species_names = ['setosa','versicolour','virginica']

count = 0
wrong = 0
for element in zip(X_validate,y_validate,pred_y):
    print("Input: sepal length: {}, sepal width: {}, petal length: {}, petal width: {}; Expected: {}; Actual: {}".format(
Beispiel #32
0
def make_memnn(vocab_size,
               cont_sl,
               cont_wl,
               quest_wl,
               answ_wl,
               rnn_size,
               rnn_type='LSTM',
               pool_size=4,
               answ_n=4,
               dence_l=[100],
               dropout=0.5,
               batch_size=16,
               emb_size=50,
               grad_clip=40,
               init_std=0.1,
               num_hops=3,
               rnn_style=False,
               nonlin=LN.softmax,
               init_W=None,
               rng=None,
               art_pool=4,
               lr=0.01,
               mom=0,
               updates=LU.adagrad,
               valid_indices=0.2,
               permute_answ=False,
               permute_cont=False):
    def select_rnn(x):
        return {
            'RNN': LL.RecurrentLayer,
            'LSTM': LL.LSTMLayer,
            'GRU': LL.GRULayer,
        }.get(x, LL.LSTMLayer)

#    dence = dence + [1]

    RNN = select_rnn(rnn_type)
    #-----------------------------------------------------------------------weights
    tr_variables = {}
    tr_variables['WQ'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WA'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WC'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WTA'] = theano.shared(
        init_std * np.random.randn(cont_sl, emb_size).astype('float32'))
    tr_variables['WTC'] = theano.shared(
        init_std * np.random.randn(cont_sl, emb_size).astype('float32'))
    tr_variables['WAnsw'] = theano.shared(
        init_std * np.random.randn(vocab_size, emb_size).astype('float32'))

    #------------------------------------------------------------------input layers
    layers = [(LL.InputLayer, {
        'name': 'l_in_q',
        'shape': (batch_size, 1, quest_wl),
        'input_var': T.itensor3('l_in_q_')
    }),
              (LL.InputLayer, {
                  'name': 'l_in_a',
                  'shape': (batch_size, answ_n, answ_wl),
                  'input_var': T.itensor3('l_in_a_')
              }),
              (LL.InputLayer, {
                  'name': 'l_in_q_pe',
                  'shape': (batch_size, 1, quest_wl, emb_size)
              }),
              (LL.InputLayer, {
                  'name': 'l_in_a_pe',
                  'shape': (batch_size, answ_n, answ_wl, emb_size)
              }),
              (LL.InputLayer, {
                  'name': 'l_in_cont',
                  'shape': (batch_size, cont_sl, cont_wl),
                  'input_var': T.itensor3('l_in_cont_')
              }),
              (LL.InputLayer, {
                  'name': 'l_in_cont_pe',
                  'shape': (batch_size, cont_sl, cont_wl, emb_size)
              })]
    #------------------------------------------------------------------slice layers
    #    l_qs = []
    #    l_cas = []
    l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)]
    l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)]
    for i in range(answ_n):
        layers.extend([(LL.SliceLayer, {
            'name': l_a_names[i],
            'incoming': 'l_in_a',
            'indices': slice(i, i + 1),
            'axis': 1
        })])
    for i in range(answ_n):
        layers.extend([(LL.SliceLayer, {
            'name': l_a_pe_names[i],
            'incoming': 'l_in_a_pe',
            'indices': slice(i, i + 1),
            'axis': 1
        })])
#------------------------------------------------------------------MEMNN layers
#question----------------------------------------------------------------------
    layers.extend([(EncodingFullLayer, {
        'name': 'l_emb_f_q',
        'incomings': ('l_in_q', 'l_in_q_pe'),
        'vocab_size': vocab_size,
        'emb_size': emb_size,
        'W': tr_variables['WQ'],
        'WT': None
    })])

    l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)]

    layers.extend([(MemoryLayer, {
        'name': l_mem_names[0],
        'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'),
        'vocab_size': vocab_size,
        'emb_size': emb_size,
        'A': tr_variables['WA'],
        'C': tr_variables['WC'],
        'AT': tr_variables['WTA'],
        'CT': tr_variables['WTC'],
        'nonlin': nonlin
    })])
    for i in range(1, num_hops):
        if i % 2:
            WC, WA = tr_variables['WA'], tr_variables['WC']
            WTC, WTA = tr_variables['WTA'], tr_variables['WTC']
        else:
            WA, WC = tr_variables['WA'], tr_variables['WC']
            WTA, WTC = tr_variables['WTA'], tr_variables['WTC']
        layers.extend([(MemoryLayer, {
            'name':
            l_mem_names[i],
            'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i - 1]),
            'vocab_size':
            vocab_size,
            'emb_size':
            emb_size,
            'A':
            WA,
            'C':
            WC,
            'AT':
            WTA,
            'CT':
            WTC,
            'nonlin':
            nonlin
        })])
#answers-----------------------------------------------------------------------
    l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)]
    for i in range(answ_n):
        layers.extend([(EncodingFullLayer, {
            'name': l_emb_f_a_names[i],
            'incomings': (l_a_names[i], l_a_pe_names[i]),
            'vocab_size': vocab_size,
            'emb_size': emb_size,
            'W': tr_variables['WAnsw'],
            'WT': None
        })])
#------------------------------------------------------------concatenate layers
    layers.extend([(LL.ConcatLayer, {
        'name': 'l_qma_concat',
        'incomings': l_mem_names + l_emb_f_a_names
    })])
    #--------------------------------------------------------------------RNN layers
    layers.extend([(
        RNN,
        {
            'name': 'l_qa_rnn_f',
            'incoming': 'l_qma_concat',
            #                          'mask_input': 'l_qamask_concat',
            'num_units': rnn_size,
            'backwards': False,
            'only_return_final': False,
            'grad_clipping': grad_clip
        })])
    layers.extend([(
        RNN,
        {
            'name': 'l_qa_rnn_b',
            'incoming': 'l_qma_concat',
            #                          'mask_input': 'l_qamask_concat',
            'num_units': rnn_size,
            'backwards': True,
            'only_return_final': False,
            'grad_clipping': grad_clip
        })])

    layers.extend([(LL.SliceLayer, {
        'name': 'l_qa_rnn_f_sl',
        'incoming': 'l_qa_rnn_f',
        'indices': slice(-answ_n, None),
        'axis': 1
    })])
    layers.extend([(LL.SliceLayer, {
        'name': 'l_qa_rnn_b_sl',
        'incoming': 'l_qa_rnn_b',
        'indices': slice(-answ_n, None),
        'axis': 1
    })])

    layers.extend([(LL.ElemwiseMergeLayer, {
        'name': 'l_qa_rnn_conc',
        'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'),
        'merge_function': T.add
    })])
    #-----------------------------------------------------------------pooling layer
    #    layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_',
    #                                         'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})])
    layers.extend([(LL.Pool1DLayer, {
        'name': 'l_qa_pool',
        'incoming': 'l_qa_rnn_conc',
        'pool_size': pool_size,
        'mode': 'max'
    })])
    #------------------------------------------------------------------dence layers
    l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)]
    if dropout:
        layers.extend([(LL.DropoutLayer, {
            'name': 'l_dence_do',
            'p': dropout
        })])
    for i, d in enumerate(dence_l):
        if i < len(dence_l) - 1:
            nonlin = LN.tanh
        else:
            nonlin = LN.softmax
        layers.extend([(LL.DenseLayer, {
            'name': l_dence_names[i],
            'num_units': d,
            'nonlinearity': nonlin
        })])
        if i < len(dence_l) - 1 and dropout:
            layers.extend([(LL.DropoutLayer, {
                'name': l_dence_names[i] + 'do',
                'p': dropout
            })])

    if isinstance(valid_indices, np.ndarray) or isinstance(
            valid_indices, list):
        train_split = TrainSplit_indices(valid_indices=valid_indices)
    else:
        train_split = TrainSplit(eval_size=valid_indices, stratify=False)

    if permute_answ or permute_cont:
        batch_iterator_train = PermIterator(batch_size, permute_answ,
                                            permute_cont)
    else:
        batch_iterator_train = BatchIterator(batch_size=batch_size)

    def loss(x, t):
        return LO.aggregate(
            LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))
#        return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t))

    nnet = NeuralNet(
        y_tensor_type=T.ivector,
        layers=layers,
        update=updates,
        update_learning_rate=lr,
        #            update_epsilon=1e-7,
        objective_loss_function=loss,
        regression=False,
        verbose=2,
        batch_iterator_train=batch_iterator_train,
        batch_iterator_test=BatchIterator(batch_size=batch_size / 2),
        #            batch_iterator_train=BatchIterator(batch_size=batch_size),
        #            batch_iterator_test=BatchIterator(batch_size=batch_size),
        #train_split=TrainSplit(eval_size=eval_size)
        train_split=train_split,
        on_batch_finished=[zero_memnn])
    nnet.initialize()
    PrintLayerInfo()(nnet)
    return nnet
Beispiel #33
0
]

net1 = NeuralNet(
    layers=layers1,
    update_learning_rate=0.01,
    verbose=2,
)

# To see information about the capacity and coverage of each layer,
# we need to set the verbosity of the net to a value of 2 and
# then initialize the net. We next pass the initialized net to PrintLayerInfo
# to see some useful information. By the way, we could also just call the
# fit method of the net to get the same outcome, but since we don't want
# to fit just now, we proceed as shown below.

net1.initialize()
layer_info = PrintLayerInfo()
layer_info(net1)
# This net is fine. The capacity never falls below 1/6, which would be 16.7%,
# and the coverage of the image never exceeds 100%. However,
# with only 4 convolutional layers, this net is not very deep and will
# properly not achieve the best possible results.

# if we use max pooling too often, the coverage will quickly
# exceed 100% and we cannot go sufficiently deep.

# Too little maxpooling
layers2 = [
    (InputLayer, {'shape': (None, 1, 28, 28)}),

    (Conv2DLayer, {'num_filters': 32, 'filter_size': (3, 3)}),
Beispiel #34
0
        conv2d8_nonlinearity=lasagne.nonlinearities.rectify,
        conv2d8_W=W[7],

        #output_nonlinearity=lasagne.nonlinearities.softmax,#,  # output layer uses identity function
        #output_num_units=1000,  # 1000 target values
        #output_W = W[7],

        # optimization method params
        update=nesterov_momentum,
        update_learning_rate=0.01,
        update_momentum=0.9,
        max_epochs=10,
        verbose=1,
        regression=True)
    for i, w in enumerate(W):
        print i, w.shape

    net1.initialize()
    import cv2
    from training_images import simpleProcessImage
    img = cv2.imread(
        "/home/simon/python/sklearn-theano/sklearn_theano/datasets/images/cat_and_dog.jpg"
    )

    crop = simpleProcessImage(img)
    cv2.imshow("X", crop)
    res = net1.predict(crop.transpose(2, 0, 1).reshape(-1, 3, 231, 231))
    print res

    cv2.waitKey()
Beispiel #35
0
    #on_training_finished = None,
    verbose=bool(VERBOSITY),
    input_shape=(None, train.shape[1]),
    output_num_units=NCLASSES,
    dense1_num_units=500,
    dense2_num_units=500,
    dense3_num_units=400,
    dense1_nonlinearity=LeakyRectify(leakiness=0.1),
    dense2_nonlinearity=LeakyRectify(leakiness=0.1),
    dense3_nonlinearity=LeakyRectify(leakiness=0.1),
    output_nonlinearity=softmax,
    dense1_W=HeUniform(),
    dense2_W=HeUniform(),
    dense3_W=HeUniform(),
    dense1_b=Constant(0.),
    dense2_b=Constant(0.),
    dense3_b=Constant(0.),
    output_b=Constant(0.),
    dropout0_p=0.1,
    dropout1_p=0.6,
    dropout2_p=0.6,
    dropout3_p=0.6,
    update_learning_rate=shared(float32(0.02)),  #
    update_momentum=shared(float32(0.9)),  #
    batch_iterator_train=BatchIterator(batch_size=128),
    batch_iterator_test=BatchIterator(batch_size=128),
)
net.initialize()

net.fit(train, labels)
Beispiel #36
0
def main():
       
    seed = 12345
    np.random.seed(seed)
    set_lasagne_rng(RandomState(seed))
     
    LOOKUP_PATH = os.path.join(WDIR, 'data', 'HIV.pkl')
    lookup =  pickle.load(open(LOOKUP_PATH, 'rb'))
    data_list = lookup['data']
    y = lookup['y']
    labels = lookup['labels']
    nmark = len(labels)
    
    # event occurence list    
    occurred = [x for i, x in enumerate(data_list) if y[i,1] == 1]
    not_occurred = [x for i, x in enumerate(data_list) if y[i,1] == 0]
    y1 = y[y[:,1] == 1]
    y0 = y[y[:,1] == 0]
    
    # split the examples randomly into a training (2/3) and test (1/3) cohort
    # both cohorts should contain equal percentage of cencored data
    sep1 = len(y1) / 3
    sep0 = len(y0) / 3
        
    # include only uncensored data from the training cohort for training CellCnn
    tr_list = occurred[sep1:]
    tr_stime = y1[sep1:,0].astype(float)
            
    # transform survival times to [-1, 1] interval by ranking them
    tr_stime = (ss.rankdata(tr_stime) / (0.5 * len(tr_stime))) - 1
                
    # fit scaler to all training data
    sc = StandardScaler()
    sc.fit(np.vstack(occurred[sep1:] + not_occurred[sep0:]))
    tr_list = [sc.transform(x) for x in tr_list]
            
    # the test cohort
    validation_list = [sc.transform(x) for x in (occurred[:sep1] + not_occurred[:sep0])]
    y_valid = np.vstack([y1[:sep1], y0[:sep0]])
    
    # cross validation on the training cohort    
    nfold = 10
    nfilter = 3
           
    skf = KFold(len(tr_list), n_folds=nfold, shuffle=True)
    committee = []
    valid_accuracy = []
    accum_w = np.empty((nfilter * nfold, nmark+2))
    
    for ifold, (train_index, test_index) in enumerate(skf):
        cv_train_samples = [tr_list[t_idx] for t_idx in train_index]
        cv_test_samples = [tr_list[t_idx] for t_idx in test_index]
        cv_y_train = list(tr_stime[train_index])
        cv_y_test = list(tr_stime[test_index])
        
        results = train_model(cv_train_samples, cv_y_train, labels,
                                valid_samples=cv_test_samples, valid_phenotypes=cv_y_test, 
                                ncell=500, nsubset=200, subset_selection='random',
                                nrun=3, pooling='mean', regression=True, nfilter=nfilter,
                                learning_rate=0.03, momentum=0.9, l2_weight_decay_conv=1e-8,
                                l2_weight_decay_out=1e-8, max_epochs=20, verbose=1,
                                select_filters='best', accur_thres=-1)
            
        net_dict = results['best_net']
            
        # update the committee of networks        
        committee.append(net_dict)
        valid_accuracy.append(results['best_accuracy'])
        w_tot = param_vector(net_dict, regression=True)
                
        # add weights to accumulator    
        accum_w[ifold*nfilter:(ifold+1)*nfilter] = w_tot
         
    save_path = os.path.join(OUTDIR, 'network_committee.pkl')
    with open(save_path, 'wb') as f:
        pickle.dump((committee, valid_accuracy), f, -1)    
        
    '''
    committee, valid_accuracy = pickle.load(open(save_path, 'r'))    
    # retrieve the filter weights
    for ifold, net_dict in enumerate(committee):
        w_tot = param_vector(net_dict, regression=True)
                
        # add weights to accumulator    
        accum_w[ifold*nfilter:(ifold+1)*nfilter] = w_tot
    '''    
    
    # choose the strong signatures (all of them)
    w_strong = accum_w
    
    # members of each cluster should have cosine similarity > 0.7 
    # equivalently, cosine distance < 0.3
    Z = linkage(w_strong, 'average', metric='cosine')
    clusters = fcluster(Z, .3, criterion='distance') - 1
        
    n_clusters = len(np.unique(clusters))
    print '%d clusters chosen' % (n_clusters)   
            
    # plot the discovered filter profiles
    plt.figure(figsize=(3,2))
    idx = range(nmark) + [nmark+1]
    clmap = sns.clustermap(pd.DataFrame(w_strong[:,idx], columns=labels+['survival']),
                                method='average', metric='cosine', row_linkage=Z,
                                col_cluster=False, robust=True, yticklabels=clusters)
    clmap.cax.set_visible(False)
    fig_path = os.path.join(OUTDIR, 'HIV_clmap.eps')
    clmap.savefig(fig_path, format='eps')
    plt.close()
        
        
    # generate the consensus filter profiles
    c = Counter(clusters)
    cons = []
    for key, val in c.items():
        if val > nfold/2:
            cons.append(np.mean(w_strong[clusters == key], axis=0))
    cons_mat = np.vstack(cons)
        
    # plot the consensus filter profiles
    plt.figure(figsize=(10, 3))
    idx = range(nmark) + [nmark+1]
    ax = sns.heatmap(pd.DataFrame(cons_mat[:,idx], columns=labels + ['survival']),
                            robust=True, yticklabels=False)
    plt.xticks(rotation=90)
    ax.tick_params(axis='both', which='major', labelsize=20)
    plt.tight_layout()
    fig_path = os.path.join(OUTDIR, 'clmap_consensus.eps')
    plt.savefig(fig_path, format='eps')
    plt.close()
       
    # create an ensemble of neural networks
    ncell_cons = 3000
    ncell_voter = 3000
    layers_voter = [
                    (layers.InputLayer, {'name': 'input', 'shape': (None, nmark, ncell_voter)}),
                    (layers.Conv1DLayer, {'name': 'conv', 
                                        'num_filters': nfilter, 'filter_size': 1}),
                    (layers.Pool1DLayer, {'name': 'meanPool', 'pool_size' : ncell_voter,
                                        'mode': 'average_exc_pad'}),
                    (layers.DenseLayer, {'name': 'output',
                                        'num_units': 1,
                                        'nonlinearity': T.tanh})]
             
    # predict on the test cohort
    small_data_list_v = [x[:ncell_cons].T.reshape(1,nmark,ncell_cons) for x in validation_list]
    data_v = np.vstack(small_data_list_v)
    stime, censor = y_valid[:,0], y_valid[:,1]
    
    # committee of the best nfold/2 models
    voter_risk_pred = list()
    for ifold in np.argsort(valid_accuracy):
        voter = NeuralNet(layers = layers_voter,                
                                    update = nesterov_momentum,
                                    update_learning_rate = 0.001,
                                    regression=True,
                                    max_epochs=5,
                                    verbose=0)
        voter.load_params_from(committee[ifold])
        voter.initialize()
        # rank the risk predictions
        voter_risk_pred.append(ss.rankdata(- np.squeeze(voter.predict(data_v))))
    all_voters = np.vstack(voter_risk_pred)
                
    # compute mean rank per individual
    risk_p = np.mean(all_voters, axis=0)
    g1 = np.squeeze(risk_p > np.median(risk_p))
    voters_pval_v = logrank_pval(stime, censor, g1)
    fig_v = os.path.join(OUTDIR, 'cellCnn_cox_test.eps')
    plot_KM(stime, censor, g1, voters_pval_v, fig_v)

    # filter-activating cells
    data_t = np.vstack(small_data_list_v)
    data_stack = np.vstack([x for x in np.swapaxes(data_t, 2, 1)])
                
    # finally define a network from the consensus filters
    nfilter_cons = cons_mat.shape[0]
    ncell_cons = 3000
    layers_cons = [
                    (layers.InputLayer, {'name': 'input', 'shape': (None, nmark, ncell_cons)}),
                    (layers.Conv1DLayer, {'name': 'conv', 
                                        'b': init.Constant(cons_mat[:,-2]),
                                        'W': cons_mat[:,:-2].reshape(nfilter_cons, nmark, 1),
                                        'num_filters': nfilter_cons, 'filter_size': 1}),
                    (layers.Pool1DLayer, {'name': 'meanPool', 'pool_size' : ncell_cons,
                                        'mode': 'average_exc_pad'}),
                    (layers.DenseLayer, {'name': 'output',
                                        'num_units': 1,
                                        'W': np.sign(cons_mat[:,-1:]),
                                        'b': init.Constant(0.),
                                        'nonlinearity': T.tanh})]
            
    net_cons = NeuralNet(layers = layers_cons,                
                            update = nesterov_momentum,
                            update_learning_rate = 0.001,
                            regression=True,
                            max_epochs=5,
                            verbose=0)
    net_cons.initialize()

    # get the representation after mean pooling
    xs = T.tensor3('xs').astype(theano.config.floatX)
    act_conv = theano.function([xs], lh.get_output(net_cons.layers_['conv'], xs)) 
    
    # and apply to the test data
    act_tot = act_conv(data_t)
    act_tot = np.swapaxes(act_tot, 2, 1)
    act_stack = np.vstack([x for x in act_tot])
    idx = range(7) + [8,9]
                
    for i_map in range(nfilter_cons):
        val = act_stack[:, i_map]
        descending_order = np.argsort(val)[::-1]
        val_cumsum = np.cumsum(val[descending_order])
        data_sorted = data_stack[descending_order]
        thres = 0.75 * val_cumsum[-1]
        res_data = data_sorted[val_cumsum < thres] 
        fig_path = os.path.join(OUTDIR, 'filter_'+str(i_map)+'_active.eps')       
        plot_marker_distribution([res_data[:,idx], data_stack[:,idx]],
                                            ['filter '+str(i_map), 'all'],
                                            [labels[l] for l in idx],
                                            (3,3), fig_path, 24)
def make_memnn(vocab_size, cont_sl,
               cont_wl, quest_wl,
               answ_wl, rnn_size, rnn_type='LSTM', pool_size=4,
               answ_n=4, dence_l=[100], dropout=0.5,
               batch_size=16, emb_size=50, grad_clip=40, init_std=0.1,
               num_hops=3, rnn_style=False, nonlin=LN.softmax,
               init_W=None, rng=None, art_pool=4,
               lr=0.01, mom=0, updates=LU.adagrad, valid_indices=0.2,
               permute_answ=False, permute_cont=False):

    def select_rnn(x):
        return {
            'RNN': LL.RecurrentLayer,
            'LSTM': LL.LSTMLayer,
            'GRU': LL.GRULayer,
        }.get(x, LL.LSTMLayer)
    
#    dence = dence + [1]
    RNN = select_rnn(rnn_type)
#-----------------------------------------------------------------------weights
    tr_variables = {}
    tr_variables['WQ'] = theano.shared(init_std*np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WA'] = theano.shared(init_std*np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WC'] = theano.shared(init_std*np.random.randn(vocab_size, emb_size).astype('float32'))
    tr_variables['WTA'] = theano.shared(init_std*np.random.randn(cont_sl, emb_size).astype('float32'))
    tr_variables['WTC'] = theano.shared(init_std*np.random.randn(cont_sl, emb_size).astype('float32'))
    tr_variables['WAnsw'] = theano.shared(init_std*np.random.randn(vocab_size, emb_size).astype('float32'))

#------------------------------------------------------------------input layers
    layers = [
        (LL.InputLayer, {'name': 'l_in_q', 'shape': (batch_size, 1, quest_wl), 'input_var': T.itensor3('l_in_q_')}),
        (LL.InputLayer, {'name': 'l_in_a', 'shape': (batch_size, answ_n, answ_wl), 'input_var': T.itensor3('l_in_a_')}),
        (LL.InputLayer, {'name': 'l_in_q_pe', 'shape': (batch_size, 1, quest_wl, emb_size)}),
        (LL.InputLayer, {'name': 'l_in_a_pe', 'shape': (batch_size, answ_n, answ_wl, emb_size)}),
        (LL.InputLayer, {'name': 'l_in_cont', 'shape': (batch_size, cont_sl, cont_wl), 'input_var': T.itensor3('l_in_cont_')}),
        (LL.InputLayer, {'name': 'l_in_cont_pe', 'shape': (batch_size, cont_sl, cont_wl, emb_size)})
    ]
#------------------------------------------------------------------slice layers
#    l_qs = []
#    l_cas = []
    l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)]
    l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)]
    for i in range(answ_n):
        layers.extend([(LL.SliceLayer, {'name': l_a_names[i], 'incoming': 'l_in_a',
                                        'indices': slice(i, i+1), 'axis': 1})])
    for i in range(answ_n):
        layers.extend([(LL.SliceLayer, {'name': l_a_pe_names[i], 'incoming': 'l_in_a_pe',
                                        'indices': slice(i, i+1), 'axis': 1})])
#------------------------------------------------------------------MEMNN layers
#question----------------------------------------------------------------------
    layers.extend([(EncodingFullLayer, {'name': 'l_emb_f_q', 'incomings': ('l_in_q', 'l_in_q_pe'),
                                        'vocab_size': vocab_size, 'emb_size': emb_size,
                                        'W': tr_variables['WQ'], 'WT': None})])

    l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)]

    layers.extend([(MemoryLayer, {'name': l_mem_names[0],
                                  'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'),
                                  'vocab_size': vocab_size, 'emb_size': emb_size,
                                  'A': tr_variables['WA'], 'C': tr_variables['WC'],
                                  'AT': tr_variables['WTA'], 'CT': tr_variables['WTC'], 'nonlin': nonlin})])
    for i in range(1, num_hops):
        if i%2:
            WC, WA = tr_variables['WA'], tr_variables['WC']
            WTC, WTA = tr_variables['WTA'], tr_variables['WTC']
        else:
            WA, WC = tr_variables['WA'], tr_variables['WC']
            WTA, WTC = tr_variables['WTA'], tr_variables['WTC']
        layers.extend([(MemoryLayer, {'name': l_mem_names[i],
                                      'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i-1]),
                                      'vocab_size': vocab_size, 'emb_size': emb_size,
                                      'A': WA, 'C': WC, 'AT': WTA, 'CT': WTC, 'nonlin': nonlin})])
#answers-----------------------------------------------------------------------
    l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)]
    for i in range(answ_n):
        layers.extend([(EncodingFullLayer, {'name': l_emb_f_a_names[i], 'incomings': (l_a_names[i], l_a_pe_names[i]),
                                            'vocab_size': vocab_size, 'emb_size': emb_size,
                                            'W': tr_variables['WAnsw'], 'WT': None})])
#------------------------------------------------------------concatenate layers
    layers.extend([(LL.ConcatLayer, {'name': 'l_qma_concat',
                                     'incomings': l_mem_names + l_emb_f_a_names})])
#--------------------------------------------------------------------RNN layers
    layers.extend([(RNN, {'name': 'l_qa_rnn_f', 'incoming': 'l_qma_concat',
#                          'mask_input': 'l_qamask_concat',
                          'num_units': rnn_size,
                          'backwards': False, 'only_return_final': False,
                          'grad_clipping': grad_clip})])
    layers.extend([(RNN, {'name': 'l_qa_rnn_b', 'incoming': 'l_qma_concat',
#                          'mask_input': 'l_qamask_concat',
                          'num_units': rnn_size,
                          'backwards': True, 'only_return_final': False,
                          'grad_clipping': grad_clip})])

    layers.extend([(LL.SliceLayer, {'name': 'l_qa_rnn_f_sl', 'incoming': 'l_qa_rnn_f',
                                    'indices': slice(-answ_n, None), 'axis': 1})])
    layers.extend([(LL.SliceLayer, {'name': 'l_qa_rnn_b_sl', 'incoming': 'l_qa_rnn_b',
                                    'indices': slice(-answ_n, None), 'axis': 1})])

    layers.extend([(LL.ElemwiseMergeLayer, {'name': 'l_qa_rnn_conc',
                                            'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'),
                                            'merge_function': T.add})])
#-----------------------------------------------------------------pooling layer
#    layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_',
#                                         'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})])
    layers.extend([(LL.Pool1DLayer, {'name': 'l_qa_pool',
                                     'incoming': 'l_qa_rnn_conc',
                                     'pool_size': pool_size, 'mode': 'max'})])
#------------------------------------------------------------------dence layers
    l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)]
    if dropout:
        layers.extend([(LL.DropoutLayer, {'name': 'l_dence_do', 'p': dropout})])
    for i, d in enumerate(dence_l):
        if i < len(dence_l) - 1:
            nonlin = LN.tanh
        else:
            nonlin = LN.softmax
        layers.extend([(LL.DenseLayer, {'name': l_dence_names[i], 'num_units': d,
                                        'nonlinearity': nonlin})])
        if i < len(dence_l) - 1 and dropout:
            layers.extend([(LL.DropoutLayer, {'name': l_dence_names[i] + 'do', 'p': dropout})])

    if isinstance(valid_indices, np.ndarray) or isinstance(valid_indices, list):
        train_split=TrainSplit_indices(valid_indices=valid_indices)
    else:
        train_split=TrainSplit(eval_size=valid_indices, stratify=False)

    if permute_answ or permute_cont:
        batch_iterator_train = PermIterator(batch_size, permute_answ, permute_cont)
    else:
        batch_iterator_train = BatchIterator(batch_size=batch_size)

    def loss(x, t):
        return LO.aggregate(LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))
#        return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t))

    nnet = NeuralNet(
            y_tensor_type=T.ivector,
            layers=layers,
            update=updates,
            update_learning_rate=lr,
#            update_epsilon=1e-7,
            objective_loss_function=loss,
            regression=False,
            verbose=2,
            batch_iterator_train=batch_iterator_train,
            batch_iterator_test=BatchIterator(batch_size=batch_size/2),
#            batch_iterator_train=BatchIterator(batch_size=batch_size),
#            batch_iterator_test=BatchIterator(batch_size=batch_size),            
            #train_split=TrainSplit(eval_size=eval_size)
            train_split=train_split,
            on_batch_finished=[zero_memnn]
        )
    nnet.initialize()
    PrintLayerInfo()(nnet)
    return nnet
Beispiel #38
0
    coutput_num_units=10,

    #input_nonlinearity = None, #nonlinearities.sigmoid,
    #dense_nonlinearity = nonlinearities.tanh,
    narrow_nonlinearity=nonlinearities.softplus,
    reverse_nonlinearity=nonlinearities.sigmoid,
    coutput_nonlinearity=nonlinearities.softmax,

    #dropout0_p=0.1,
    dropout1_p=0.01,

    #regression=True,
    regression=False,
    verbose=1)

nn.initialize()

nn.load_params_from('task4/koebi_train_history_AE')

PrintLayerInfo()(nn)

nn.fit(X, Y)

test = pd.read_hdf("task4/test.h5", "test")
id_col = test.index
test_data = np.array(test)
test_data = skpre.StandardScaler().fit_transform(test_data)
test_prediction = nn.predict(test_data)

# Write prediction and it's linenumber into a csv file
with open('task4/' + result_file_name + '.csv', 'wb') as csvfile: