예제 #1
0
    def __init__(
        self,
        conv_activations,
        num_channels,
        image_shape,
        filter_sizes,
        feature_maps,
        pooling_sizes,
        top_mlp_activations,
        top_mlp_dims,
        conv_step=None,
        border_mode="valid",
        **kwargs
    ):
        if conv_step is None:
            self.conv_step = (1, 1)
        else:
            self.conv_step = conv_step
        self.num_channels = num_channels
        self.image_shape = image_shape
        self.top_mlp_activations = top_mlp_activations
        self.top_mlp_dims = top_mlp_dims
        self.border_mode = border_mode

        conv_parameters = zip(filter_sizes, feature_maps)

        # Construct convolutional, activation, and pooling layers with corresponding parameters
        self.convolution_layer = (
            Convolutional(
                filter_size=filter_size,
                num_filters=num_filter,
                step=self.conv_step,
                border_mode=self.border_mode,
                name="conv_{}".format(i),
            )
            for i, (filter_size, num_filter) in enumerate(conv_parameters)
        )

        self.BN_layer = (BatchNormalization(name="bn_conv_{}".format(i)) for i in enumerate(conv_parameters))

        self.pooling_layer = (MaxPooling(size, name="pool_{}".format(i)) for i, size in enumerate(pooling_sizes))

        self.layers = list(interleave([self.convolution_layer, self.BN_layer, conv_activations, self.pooling_layer]))

        self.conv_sequence = ConvolutionalSequence(self.layers, num_channels, image_size=image_shape)

        # Construct a top MLP
        self.top_mlp = MLP(top_mlp_activations, top_mlp_dims)

        # Construct a top batch normalized MLP
        # mlp_class = BatchNormalizedMLP
        # extra_kwargs = {'conserve_memory': False}
        # self.top_mlp = mlp_class(top_mlp_activations, top_mlp_dims, **extra_kwargs)

        # We need to flatten the output of the last convolutional layer.
        # This brick accepts a tensor of dimension (batch_size, ...) and
        # returns a matrix (batch_size, features)
        self.flattener = Flattener()
        application_methods = [self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply]
        super(LeNet, self).__init__(application_methods, **kwargs)
예제 #2
0
    def __init__(self,
                 conv_activations,
                 num_channels,
                 image_shape,
                 filter_sizes,
                 feature_maps,
                 pooling_sizes,
                 top_mlp_activations,
                 top_mlp_dims,
                 conv_step=None,
                 border_mode='valid',
                 **kwargs):
        if conv_step is None:
            self.conv_step = (1, 1)
        else:
            self.conv_step = conv_step
        self.num_channels = num_channels
        self.image_shape = image_shape
        self.top_mlp_activations = top_mlp_activations
        self.top_mlp_dims = top_mlp_dims
        self.border_mode = border_mode

        conv_parameters = zip(filter_sizes, feature_maps)

        # Construct convolutional, activation, and pooling layers with corresponding parameters
        self.convolution_layer = (
            Convolutional(filter_size=filter_size,
                          num_filters=num_filter,
                          step=self.conv_step,
                          border_mode=self.border_mode,
                          name='conv_{}'.format(i))
            for i, (filter_size, num_filter) in enumerate(conv_parameters))

        self.BN_layer = (BatchNormalization(name='bn_conv_{}'.format(i))
                         for i in enumerate(conv_parameters))

        self.pooling_layer = (MaxPooling(size, name='pool_{}'.format(i))
                              for i, size in enumerate(pooling_sizes))

        self.layers = list(
            interleave([
                self.convolution_layer, self.BN_layer, conv_activations,
                self.pooling_layer
            ]))

        self.conv_sequence = ConvolutionalSequence(self.layers,
                                                   num_channels,
                                                   image_size=image_shape)

        # Construct a top MLP
        self.top_mlp = MLP(top_mlp_activations, top_mlp_dims)

        # We need to flatten the output of the last convolutional layer.
        # This brick accepts a tensor of dimension (batch_size, ...) and
        # returns a matrix (batch_size, features)
        self.flattener = Flattener()
        application_methods = [
            self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply
        ]
        super(LeNet, self).__init__(application_methods, **kwargs)
    def __init__(
        self,
        conv_activations,
        num_channels,
        image_shape,
        filter_sizes,
        feature_maps,
        conv_steps,
        pooling_sizes,
        top_mlp_activations,
        top_mlp_dims,
        border_mode="valid",
        **kwargs
    ):

        self.num_channels = num_channels
        self.image_shape = image_shape
        self.top_mlp_activations = top_mlp_activations
        self.top_mlp_dims = top_mlp_dims
        self.border_mode = border_mode

        conv_parameters = zip(filter_sizes, feature_maps, conv_steps)

        # Construct convolutional, activation, and pooling layers with corresponding parameters
        conv_layers = list(
            interleave(
                [
                    (
                        Convolutional(
                            filter_size=filter_size,
                            num_filters=num_filter,
                            step=conv_step,
                            border_mode=self.border_mode,
                            name="conv_{}".format(i),
                        )
                        for i, (filter_size, num_filter, conv_step) in enumerate(conv_parameters)
                    ),
                    conv_activations,
                    (MaxPooling(size, name="pool_{}".format(i)) for i, size in enumerate(pooling_sizes)),
                ]
            )
        )

        # Applying SpatialBatchNormalization to inputs
        self.layers = [SpatialBatchNormalization()] + conv_layers
        # self.layers = conv_layers
        self.conv_sequence = ConvolutionalSequence(self.layers, num_channels, image_size=image_shape)

        # Construct a top MLP
        self.top_mlp = MLP(top_mlp_activations, top_mlp_dims)

        # We need to flatten the output of the last convolutional layer.
        # This brick accepts a tensor of dimension (batch_size, ...) and
        # returns a matrix (batch_size, features)
        self.flattener = Flattener()
        application_methods = [self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply]
        super(LeNet, self).__init__(application_methods, **kwargs)
예제 #4
0
    def __init__(self, image_shape=None, output_size=None, **kwargs):
        self.num_channels = 3
        self.image_shape = image_shape or (32, 32)
        self.output_size = output_size or 10
        conv_parameters = [
                (96, 3, 1, 'half'),
                (96, 3, 1, 'half'),
                (96, 3, 2, 'half'),
                (192, 3, 1, 'half'),
                (192, 3, 1, 'half'),
                (192, 3, 2, 'half'),
                (192, 3, 1, 'half'),
                (192, 1, 1, 'valid'),
                (10, 1, 1, 'valid')
        ]
        fc_layer = 10

        self.convolutions = list([
            Convolutional(filter_size=(filter_size, filter_size),
                           num_filters=num_filters,
                           step=(conv_step, conv_step),
                           border_mode=border_mode,
                           tied_biases=True,
                           name='conv_{}'.format(i))
             for i, (num_filters, filter_size, conv_step, border_mode)
                 in enumerate(conv_parameters)])

        # Add two trivial channel masks to allow by-channel dropout
        self.convolutions.insert(6, ChannelMask(name='mask_1'))
        self.convolutions.insert(3, ChannelMask(name='mask_0'))

        self.conv_sequence = ConvolutionalSequence(list(interleave([
            self.convolutions,
            (Rectifier() for _ in self.convolutions)
        ])), self.num_channels, self.image_shape)

        # The AllConvNet applies average pooling to combine top-level
        # features across the image.
        self.flattener = GlobalAverageFlattener()

        # Then it inserts one final 10-way FC layer before softmax
        # self.top_mlp = MLP([Rectifier(), Softmax()],
        #     [conv_parameters[-1][0], fc_layer, self.output_size])
        self.top_softmax = Softmax()

        application_methods = [
            self.conv_sequence.apply,
            self.flattener.apply,
            self.top_softmax.apply
        ]

        super(AllConvNet, self).__init__(application_methods, **kwargs)
예제 #5
0
    def __init__(self, conv_activations, num_channels, image_shape,
                 noise_batch_size,
                 filter_sizes, feature_maps, pooling_sizes,
                 top_mlp_activations, top_mlp_dims,
                 conv_step=None, border_mode='valid',
                 tied_biases=True, **kwargs):
        if conv_step is None:
            self.conv_step = (1, 1)
        else:
            self.conv_step = conv_step
        self.num_channels = num_channels
        self.image_shape = image_shape
        self.noise_batch_size = noise_batch_size
        self.top_mlp_activations = top_mlp_activations
        self.top_mlp_dims = top_mlp_dims
        self.border_mode = border_mode
        self.tied_biases = tied_biases

        conv_parameters = zip(filter_sizes, feature_maps)

        # Construct convolutional layers with corresponding parameters
        self.layers = list(interleave([
            (NoisyConvolutional(filter_size=filter_size,
                           num_filters=num_filter,
                           step=self.conv_step,
                           border_mode=self.border_mode,
                           tied_biases=self.tied_biases,
                           noise_batch_size=self.noise_batch_size,
                           name='conv_{}'.format(i))
             for i, (filter_size, num_filter)
             in enumerate(conv_parameters)),
            conv_activations,
            (MaxPooling(size, name='pool_{}'.format(i))
             for i, size in enumerate(pooling_sizes))]))

        self.conv_sequence = ConvolutionalSequence(
                self.layers, num_channels,
                image_size=image_shape)
        self.conv_sequence.name = 'cs'

        # Construct a top MLP
        self.top_mlp = MLP(top_mlp_activations, top_mlp_dims,
                prototype=NoisyLinear(noise_batch_size=self.noise_batch_size))

        # We need to flatten the output of the last convolutional layer.
        # This brick accepts a tensor of dimension (batch_size, ...) and
        # returns a matrix (batch_size, features)
        self.flattener = Flattener()
        application_methods = [self.conv_sequence.apply, self.flattener.apply,
                               self.top_mlp.apply]
        super(NoisyLeNet, self).__init__(application_methods, **kwargs)
예제 #6
0
    def __init__(self, image_shape=None, output_size=None, **kwargs):
        self.num_channels = 3
        self.image_shape = image_shape or (32, 32)
        self.output_size = output_size or 10
        conv_parameters = [(96, 3, 1, 'half'), (96, 3, 1, 'half'),
                           (96, 3, 2, 'half'), (192, 3, 1, 'half'),
                           (192, 3, 1, 'half'), (192, 3, 2, 'half'),
                           (192, 3, 1, 'half'), (192, 1, 1, 'valid'),
                           (10, 1, 1, 'valid')]
        fc_layer = 10

        self.convolutions = list([
            Convolutional(filter_size=(filter_size, filter_size),
                          num_filters=num_filters,
                          step=(conv_step, conv_step),
                          border_mode=border_mode,
                          tied_biases=True,
                          name='conv_{}'.format(i))
            for i, (num_filters, filter_size, conv_step,
                    border_mode) in enumerate(conv_parameters)
        ])

        # Add two trivial channel masks to allow by-channel dropout
        self.convolutions.insert(6, ChannelMask(name='mask_1'))
        self.convolutions.insert(3, ChannelMask(name='mask_0'))

        self.conv_sequence = ConvolutionalSequence(
            list(
                interleave([
                    self.convolutions, (Rectifier() for _ in self.convolutions)
                ])), self.num_channels, self.image_shape)

        # The AllConvNet applies average pooling to combine top-level
        # features across the image.
        self.flattener = GlobalAverageFlattener()

        # Then it inserts one final 10-way FC layer before softmax
        # self.top_mlp = MLP([Rectifier(), Softmax()],
        #     [conv_parameters[-1][0], fc_layer, self.output_size])
        self.top_softmax = Softmax()

        application_methods = [
            self.conv_sequence.apply, self.flattener.apply,
            self.top_softmax.apply
        ]

        super(AllConvNet, self).__init__(application_methods, **kwargs)
예제 #7
0
def test_interleave():
    assert ''.join(interleave(('ABC', '123'))) == 'A1B2C3'
    assert ''.join(interleave(('ABC', '1'))) == 'A1BC'
예제 #8
0
    def fit(self, X, X_tar, y, y_tar, max_iter=500, warm_start=False):
        m = X.shape[0]
        m_tar = X_tar.shape[0]

        n_x = X.shape[1]
        n_class_src = len(set(y))

        n_class_tar = len(set(y_tar))

        if not warm_start:
            ''' weight and bias initialization'''
            # shared weights
            self.W1 = np.random.randn(self.nn_hidden, n_x)
            self.b1 = np.zeros((self.nn_hidden, 1))

            # task 1 specific weights
            self.W2_1 = np.random.randn(n_class_src, self.nn_hidden)
            self.b2_1 = np.zeros((n_class_src, 1))

            # task 2 specific weights
            self.W2_2 = np.random.randn(n_class_tar, self.nn_hidden)
            self.b2_2 = np.zeros((n_class_tar, 1))

        X_shuf, y_shuf = shuffle(X, y)

        if len(y_tar) > 0:
            X_tar_shuf, y_tar_shuf = shuffle(X_tar, y_tar)

        le = LabelBinarizer()
        le.fit(y)

        if len(y_tar) > 0:
            le_tar = LabelBinarizer()
            le_tar.fit(y_tar)

        bs = np.min([self.batch_size, X_shuf.shape[0]])
        batches_X = np.array_split(X_shuf, m / bs)
        batches_y = np.array_split(y_shuf, m / bs)
        tasks_1 = [1 for i in range(len(batches_y))]

        batches_X_tar = np.array([])
        batches_y_tar = np.array([])
        if len(y_tar) > 0:
            batches_X_tar = np.array_split(X_tar_shuf,
                                           max(1, m_tar / self.batch_size))
            batches_y_tar = np.array_split(y_tar_shuf,
                                           max(1, m_tar / self.batch_size))
        tasks_2 = [2 for i in range(len(batches_y_tar))]

        # TO DO: hstack source and target batches in alternating way
        all_batches_X = list(itertoolz.interleave([batches_X,
                                                   batches_X_tar]))[::-1]
        all_batches_y = list(itertoolz.interleave([batches_y,
                                                   batches_y_tar]))[::-1]
        all_tasks = list(itertoolz.interleave([tasks_1, tasks_2]))[::-1]

        for j in range(1, max_iter +
                       1):  #progressbar.progressbar(range(max_iter)):
            batch_errors = []

            for i in range(len(all_batches_X)):
                task = all_tasks[i]
                X_new = all_batches_X[i].T
                y_new = all_batches_y[i]
                y_new = le.transform(y_new)
                y_new = y_new.T
                Z1 = np.matmul(self.W1, X_new) + self.b1
                A1 = relu(Z1)

                reg = np.linalg.norm(self.W1, ord=2)

                if task == 1:
                    Z2 = np.matmul(self.W2_1, A1) + self.b2_1

                    A2 = np.nan_to_num(
                        np.nan_to_num(np.exp(Z2)) /
                        np.nan_to_num(np.sum(np.exp(Z2), axis=0)))

                    cost = loss(y_new, A2, reg)

                    dZ2 = A2 - y_new
                    dW2 = (1. / m) * np.matmul(dZ2, A1.T)
                    db2 = (1. / m) * np.sum(dZ2, axis=1, keepdims=True)

                    dA1 = np.matmul(self.W2_1.T, dZ2)
                    dZ1 = dA1 * relu_der(Z1)
                    dW1 = (1. / m) * np.matmul(dZ1, X_new.T)
                    db1 = (1. / m) * np.sum(dZ1, axis=1, keepdims=True)

                    self.W2_1 = self.W2_1 - self.learning_rate * dW2
                    self.b2_1 = self.b2_1 - self.learning_rate * db2

                if task == 2:
                    Z2 = np.matmul(self.W2_2, A1) + self.b2_2
                    A2 = np.nan_to_num(
                        np.nan_to_num(np.exp(Z2)) /
                        np.nan_to_num(np.sum(np.exp(Z2), axis=0)))

                    cost = loss(y_new, A2, reg)

                    dZ2 = A2 - y_new
                    dW2 = (1. / m) * np.matmul(dZ2, A1.T)
                    db2 = (1. / m) * np.sum(dZ2, axis=1, keepdims=True)

                    dA1 = np.matmul(self.W2_1.T, dZ2)
                    dZ1 = dA1 * relu_der(Z1)
                    dW1 = (1. / m) * np.matmul(dZ1, X_new.T)
                    db1 = (1. / m) * np.sum(dZ1, axis=1, keepdims=True)

                    self.W2_2 = self.W2_2 - self.learning_rate * dW2
                    self.b2_2 = self.b2_2 - self.learning_rate * db2

                    batch_errors.append(cost)

                self.W1 = self.W1 - self.learning_rate * dW1
                self.b1 = self.b1 - self.learning_rate * db1

            if (j % 100 == 0):
                print("Batch %s loss: %s" % (j, np.mean(batch_errors)))

        return self
예제 #9
0
    def fit(self,
            X,
            X_tar,
            y,
            y_tar,
            max_iter=500,
            warm_start=False,
            use_dropout=False,
            desc='',
            regularize=True):
        m = X.shape[0]
        n_x = X.shape[1]

        print(n_x)

        n_class_src = len(set(y))
        if len(set(y_tar)) > 0:
            n_class_tar = len(set(y_tar))
            m_tar = X_tar.shape[0]

        if not warm_start:
            ''' weight and bias initialization'''
            # shared weights
            self.W1 = np.random.randn(self.nn_hidden, n_x)
            self.b1 = np.zeros((self.nn_hidden, 1))

            # task 1 (source) specific weights
            self.task_1 = Task(self.nn_hidden, n_class_src, self.learning_rate,
                               m, self.T)

            # task 2 (target) specific weights
            self.task_2 = Task(self.nn_hidden, n_class_src, self.learning_rate,
                               m, self.T)

        X_shuf, y_shuf = shuffle(X, y)

        if len(y_tar) > 0:
            X_tar_shuf, y_tar_shuf = shuffle(X_tar, y_tar)

        # transform labels into one-hot vectors
        le = LabelBinarizer()
        le.fit(list(y) + list(y_tar))

        if len(y_tar) > 0:
            le_tar = LabelBinarizer()
            le_tar.fit(y_tar)

        bs = np.min([self.batch_size, X_shuf.shape[0]])
        batches_X = np.array_split(X_shuf, m / bs)
        batches_y = np.array_split(y_shuf, m / bs)
        tasks_1 = [1 for i in range(len(batches_y))]

        batches_X_tar = np.array([])
        batches_y_tar = np.array([])
        if len(y_tar) > 0:
            batches_X_tar = np.array_split(X_tar_shuf,
                                           max(1, m_tar / self.batch_size))
            batches_y_tar = np.array_split(y_tar_shuf,
                                           max(1, m_tar / self.batch_size))
        tasks_2 = [2 for i in range(len(batches_y_tar))]

        # TO DO: hstack source and target batches in alternating way
        all_batches_X = list(itertoolz.interleave([batches_X,
                                                   batches_X_tar]))[::-1]
        all_batches_y = list(itertoolz.interleave([batches_y,
                                                   batches_y_tar]))[::-1]
        all_tasks = list(itertoolz.interleave([tasks_1, tasks_2]))[::-1]

        def get_batch(step):
            idx = step % len(all_tasks)
            task = all_tasks[idx]
            X_new = all_batches_X[idx].T
            y_new = all_batches_y[idx]
            y_new = le.transform(y_new)
            y_new = y_new.T
            return X_new, y_new, task

        def batch_normalize(W):
            mu = np.mean(W, axis=0)
            var = np.var(W, axis=0)
            W = (W - mu) / np.sqrt(var + 1)
            return W

        def bhattacharyya(a, b):
            """ Bhattacharyya distance between distributions (lists of floats). """
            if not len(a) == len(b):
                raise ValueError("a and b must be of the same size")
            return -np.log(sum((np.sqrt(u * w) for u, w in zip(a, b))))

        def model_loss(params, step):
            W, b1, W2_1, b2_1, W2_2, b2_2 = params

            W_norm = W  #batch_normalize(W)
            # W2_1 = batch_normalize(W2_1)
            # W2_2 = batch_normalize(W2_2)

            X, y, task = get_batch(step)
            prod = W_norm @ X + b1
            nonlin = relu(prod)
            if use_dropout:
                nonlin *= np.random.binomial(
                    [np.ones((len(prod), nonlin.shape[1]))],
                    1 - self.dropout_percent)[0] * (1.0 /
                                                    (1 - self.dropout_percent))

            if task == 1:
                out = (W2_1 @ nonlin) + b2_1
            else:
                out = (W2_2 @ nonlin) + b2_2

            prob = np.exp(out / self.T) / np.sum(np.exp(out / self.T))
            L = loss(y, prob)

            # task relatedness
            if regularize:
                a_bar = (flatten(self.task_1.W)[0] +
                         flatten(self.task_2.W)[0]) / 2
                a_bar_norm = np.linalg.norm(a_bar, 2)
                source_norm = np.linalg.norm(
                    flatten(self.task_1.W)[0] - a_bar, 2)
                tar_norm = np.linalg.norm(flatten(self.task_2.W)[0] - a_bar, 2)

                reg = a_bar_norm + 0.1 * (source_norm + tar_norm) / 2
            else:
                reg = 0

            # bhattacharya penalty
            P_s_prime = relu(((W_norm @ X_shuf.T) + b1)).T.mean(axis=0)
            P_t_prime = relu(((W_norm @ X_tar_shuf.T) + b1)).T.mean(axis=0)
            P_s = P_s_prime / (np.sum(P_s_prime))
            P_t = P_t_prime / (np.sum(P_t_prime))
            m = np.multiply(P_s, P_t)
            bt_distance = -(np.log(np.sum(P_s * P_t)))

            return L + 0.3 * bt_distance  #+ 0.3 * reg

        params = [
            self.W1, self.b1, self.task_1.W, self.task_1.b, self.task_2.W,
            self.task_2.b
        ]

        model_loss_grad = grad(model_loss)

        max_epoch = 500

        def callback(params, step, g):
            if step % max_epoch == 0:
                print("Iteration {0:3d} objective {1:1.2e}; task {2}".format(
                    step // max_epoch + 1, model_loss(params, step), '-'))

        self.W1, self.b1, self.task_1.W, self.task_1.b, self.task_2.W, self.task_2.b = adam(
            model_loss_grad,
            params,
            step_size=self.learning_rate,
            num_iters=30 * max_epoch,
            callback=callback)
        return self
예제 #10
0
파일: convnet.py 프로젝트: bordesf/IFT6266
output_size = 2
activation = [Rectifier().apply for _ in num_filter]
mlp_activation = [Rectifier().apply for _ in mlp_hiddens] + [Softmax().apply]

#Create the symbolics variable
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')

#Get the parameters
conv_parameters = zip(activation, filter_size, num_filter)

#Create the convolutions layers
conv_layers = list(interleave([(ConvolutionalActivation(
                                  filter_size=filter_size,
                                  num_filters=num_filter,
                                  activation=activation,
                                  name='conv_{}'.format(i))
                for i, (activation, filter_size, num_filter)
                in enumerate(conv_parameters)),
        (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

#Create the sequence
conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.))
#Initialize the convnet
conv_sequence.initialize()
#Add the MLP
top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
out = Flattener().apply(conv_sequence.apply(x))
mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2),
          biases_init=Constant(0.))
#Initialisze the MLP
mlp.initialize()
예제 #11
0
# Convolutional layers

filter_sizes = [5, 5]
num_filters = [50, 256]
pooling_sizes = [2, 2]
conv_step = (1, 1)
border_mode = 'full'
conv_activations = [Logistic() for _ in num_filters]

conv_layers = list(
    interleave([
        (Convolutional(filter_size=filter_size,
                       num_filters=num_filter,
                       step=conv_step,
                       border_mode=border_mode,
                       name='conv_{}'.format(i))
         for i, (filter_size,
                 num_filter) in enumerate(zip(filter_sizes, num_filters))),
        conv_activations,
        (MaxPooling(pooling_sizes, name='pool_{}'.format(i))
         for i, size in enumerate(pooling_sizes))
    ]))

convnet = ConvolutionalSequence(conv_layers,
                                num_channels=3,
                                image_size=(32, 32),
                                weights_init=Uniform(0, 0.2),
                                biases_init=Constant(0.))

convnet.push_initialization_config()

convnet.initialize()
예제 #12
0
num_filter = [20, 40, 60, 80, 100, 120, 1024, 2]
pooling_sizes = [(2,2),(2,2),(2,2),(2,2),(2,2),(2,2),(2,2),(1,1)]
output_size = 2

#Create the stmbolics variable
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')

#Get the parameters
conv_parameters = zip(filter_size, num_filter)

#Create the convolutions layers
conv_layers = list(interleave([(Convolutional(
                                  filter_size=filter_size,
                                  num_filters=num_filter,
                                  name='conv_{}'.format(i))
                for i, (filter_size, num_filter) in enumerate(conv_parameters)),
        (BatchNormalization(name='batch_{}'.format(i)) for i, _ in enumerate(conv_parameters)),
        (Rectifier() for i, (f_size, num_f) in enumerate(conv_parameters)),
        (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

#Create the sequence
conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, use_bias=False)
#Add the Softmax function
out = Flattener().apply(conv_sequence.apply(x))
predict = NDimensionalSoftmax().apply(out)

#get the test stream
from fuel.datasets.dogs_vs_cats import DogsVsCats
from fuel.streams import DataStream, ServerDataStream
from fuel.schemes import ShuffledScheme, SequentialExampleScheme
from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, MaximumImageDimensions, Random2DRotation
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')

# Conv net model
conv_activation = [Rectifier() for _ in num_filters]
mlp_activation = [Rectifier() for _ in mlp_hiddens] + [Softmax()]

# conv_parameters = zip(filter_sizes, num_filters)
sbn = SpatialBatchNormalization()
conv_parameters = zip(filter_sizes, num_filters)

conv_layers = list(interleave([
  (Convolutional(
  filter_size=filter_size,
  num_filters=num_filter,
  step=conv_step,
  border_mode=border_mode,
  name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)),
  conv_activation,
  (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))
# conv_layers = [sbn] + conv_layers
conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.))
conv_sequence.initialize()
out = Flattener().apply(conv_sequence.apply(x))

top_mlp_dims = [numpy.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=Uniform(0.1),biases_init=Constant(0.))
top_mlp.initialize()

predict = top_mlp.apply(out)
예제 #14
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Splits available (multilingual) sentences in train-dev-test')
    parser.add_argument('--input_dir',
                        type=str,
                        default='samples_man_annotated',
                        help='location of the annotated Eltec files')
    parser.add_argument('--split_dir',
                        type=str,
                        default='multilingual_splits',
                        help='location of the train-dev-test files')
    parser.add_argument(
        '--train_prop',
        type=float,
        default=.8,
        help='Proportion of training items (dev and test are equal-size)')
    parser.add_argument('--seed', type=int, default=43438, help='Random seed')
    parser.add_argument('--paragraphs', action='store_true', help='')
    args = parser.parse_args()
    print(args)

    try:
        shutil.rmtree(args.split_dir)
    except FileNotFoundError:
        pass
    os.mkdir(args.split_dir)

    sentence_iters = []
    paragraphs = []

    if args.paragraphs:
        for filename in glob.iglob(f'{args.input_dir}/**/*.xml',
                                   recursive=True):
            print(filename)
            paragraphs += annotated2paragraphs(filename)
        train, dev, test = split_paragraphs(paragraphs,
                                            train_ratio=args.train_prop,
                                            random_state=args.seed)
        train = get_sentences(train)
        dev = get_sentences(dev)
        test = get_sentences(test)
    else:
        for filename in glob.iglob(f'{args.input_dir}/**/*.xml',
                                   recursive=True):
            print(filename)
            sentences = annotated2sentences(filename)
            sentence_iters.append(sentences)

        mixed_sentences = interleave(sentence_iters)

        formatted_sentences = []
        for sentence in mixed_sentences:
            s = '\n'.join([' '.join(token) for token in sentence]) + '\n'
            formatted_sentences.append(s)

        # for sentence in formatted_sentences:
        # print(sentence)

        train, rest = split(formatted_sentences,
                            train_size=args.train_prop,
                            shuffle=True,
                            random_state=args.seed)
        dev, test = split(rest,
                          train_size=0.5,
                          shuffle=True,
                          random_state=args.seed)

    print(f'# train items: {len(train)}')
    print(f'# dev test: {len(dev)}')
    print(f'# test items: {len(test)}')

    for items in ('train', 'dev', 'test'):
        with open(os.sep.join((args.split_dir, items + '.txt')),
                  'w',
                  encoding='utf8') as f:
            f.write('\n'.join(eval(items)))
예제 #15
0
activation = [Rectifier().apply for _ in num_filter]
mlp_activation = [Rectifier().apply for _ in mlp_hiddens] + [Softmax().apply]

#Create the symbolics variable
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')

#Get the parameters
conv_parameters = zip(activation, filter_size, num_filter)

#Create the convolutions layers
conv_layers = list(
    interleave([(ConvolutionalActivation(filter_size=filter_size,
                                         num_filters=num_filter,
                                         activation=activation,
                                         name='conv_{}'.format(i))
                 for i, (activation, filter_size,
                         num_filter) in enumerate(conv_parameters)),
                (MaxPooling(size, name='pool_{}'.format(i))
                 for i, size in enumerate(pooling_sizes))]))

#Create the sequence
conv_sequence = ConvolutionalSequence(conv_layers,
                                      num_channels,
                                      image_size=image_shape,
                                      weights_init=Uniform(width=0.2),
                                      biases_init=Constant(0.))
#Initialize the convnet
conv_sequence.initialize()
#Add the MLP
top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))
                ] + mlp_hiddens + [output_size]
예제 #16
0
#Create the stmbolics variable
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')

#Get the parameters
conv_parameters = zip(filter_size, num_filter)

#Create the convolutions layers
conv_layers = list(
    interleave([
        (Convolutional(filter_size=filter_size,
                       num_filters=num_filter,
                       name='conv_{}'.format(i))
         for i, (filter_size, num_filter) in enumerate(conv_parameters)),
        (BatchNormalization(name='batch_{}'.format(i))
         for i, _ in enumerate(conv_parameters)),
        (Rectifier() for i, (f_size, num_f) in enumerate(conv_parameters)),
        (MaxPooling(size, name='pool_{}'.format(i))
         for i, size in enumerate(pooling_sizes))
    ]))

#Create the sequence
conv_sequence = ConvolutionalSequence(conv_layers,
                                      num_channels,
                                      image_size=image_shape,
                                      use_bias=False)
#Add the Softmax function
out = Flattener().apply(conv_sequence.apply(x))
predict = NDimensionalSoftmax().apply(out)
예제 #17
0
def build_and_run(label, config):
    ############## CREATE THE NETWORK ###############
    #Define the parameters
    num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config[
        'num_epochs'], config['num_batches'], config['num_channels'], config[
            'image_shape'], config['filter_size'], config[
                'num_filter'], config['pooling_sizes'], config[
                    'mlp_hiddens'], config['output_size'], config[
                        'batch_size'], config['activation'], config[
                            'mlp_activation']
    #    print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation)
    lambda_l1 = 0.000025
    lambda_l2 = 0.000025

    print("Building model")
    #Create the symbolics variable
    x = T.tensor4('image_features')
    y = T.lmatrix('targets')

    #Get the parameters
    conv_parameters = zip(filter_size, num_filter)

    #Create the convolutions layers
    conv_layers = list(
        interleave([(Convolutional(filter_size=filter_size,
                                   num_filters=num_filter,
                                   name='conv_{}'.format(i))
                     for i, (filter_size,
                             num_filter) in enumerate(conv_parameters)),
                    (activation),
                    (MaxPooling(size, name='pool_{}'.format(i))
                     for i, size in enumerate(pooling_sizes))]))
    #    (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

    #Create the sequence
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels,
                                          image_size=image_shape,
                                          weights_init=Uniform(width=0.2),
                                          biases_init=Constant(0.))
    #Initialize the convnet
    conv_sequence.initialize()
    #Add the MLP
    top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))
                    ] + mlp_hiddens + [output_size]
    out = Flattener().apply(conv_sequence.apply(x))
    mlp = MLP(mlp_activation,
              top_mlp_dims,
              weights_init=Uniform(0, 0.2),
              biases_init=Constant(0.))
    #Initialisze the MLP
    mlp.initialize()
    #Get the output
    predict = mlp.apply(out)

    cost = CategoricalCrossEntropy().apply(y.flatten(),
                                           predict).copy(name='cost')
    error = MisclassificationRate().apply(y.flatten(), predict)

    #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
    error_rate = error.copy(name='error_rate')
    error_rate2 = error.copy(name='error_rate2')

    ########### REGULARIZATION ##################
    cg = ComputationGraph([cost])
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    biases = VariableFilter(roles=[BIAS])(cg.variables)
    # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    l2_penalty = T.sum([
        lambda_l2 * (W**2).sum() for i, W in enumerate(weights + biases)
    ])  # Gradually increase penalty for layer
    # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases])
    # # #l2_penalty = l2_penalty_weights + l2_penalty_bias
    l2_penalty.name = 'l2_penalty'
    l1_penalty = T.sum([lambda_l1 * T.abs_(z).sum() for z in weights + biases])
    #  l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    #  l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases])
    #  l1_penalty = l1_penalty_biases + l1_penalty_weights
    l1_penalty.name = 'l1_penalty'
    costreg = cost + l2_penalty + l1_penalty
    costreg.name = 'costreg'

    ########### DEFINE THE ALGORITHM #############
    #  algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum())
    algorithm = GradientDescent(cost=costreg,
                                parameters=cg.parameters,
                                step_rule=Adam())

    ########### GET THE DATA #####################
    istest = 'test' in config.keys()
    train_stream, valid_stream, test_stream = get_stream(batch_size,
                                                         image_shape,
                                                         test=istest)

    ########### INITIALIZING EXTENSIONS ##########
    checkpoint = Checkpoint('models/best_' + label + '.tar')
    checkpoint.add_condition(
        ['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far'))
    #Adding a live plot with the bokeh server
    plot = Plot(
        label,
        channels=[
            ['train_error_rate', 'valid_error_rate'],
            ['valid_cost', 'valid_error_rate2'],
            # ['train_costreg','train_grad_norm']], #
            [
                'train_costreg', 'train_total_gradient_norm',
                'train_l2_penalty', 'train_l1_penalty'
            ]
        ],
        server_url="http://hades.calculquebec.ca:5042")

    grad_norm = aggregation.mean(algorithm.total_gradient_norm)
    grad_norm.name = 'grad_norm'

    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        DataStreamMonitoring([cost, error_rate, error_rate2],
                             valid_stream,
                             prefix="valid"),
        TrainingDataMonitoring([
            costreg, error_rate, error_rate2, grad_norm, l2_penalty, l1_penalty
        ],
                               prefix="train",
                               after_epoch=True),
        plot,
        ProgressBar(),
        Printing(),
        TrackTheBest('valid_error_rate', min),  #Keep best
        checkpoint,  #Save best
        FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)
    ]  # Early-stopping
    model = Model(cost)
    main_loop = MainLoop(algorithm,
                         data_stream=train_stream,
                         model=model,
                         extensions=extensions)
    main_loop.run()
예제 #18
0
def build_and_run(label, config):
    ############## CREATE THE NETWORK ###############
    #Define the parameters
    num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation  = config['num_epochs'], config['num_batches'], config['num_channels'], config['image_shape'], config['filter_size'], config['num_filter'], config['pooling_sizes'], config['mlp_hiddens'], config['output_size'], config['batch_size'], config['activation'], config['mlp_activation']
#    print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation)
    lambda_l1 = 0.000025
    lambda_l2 = 0.000025

    print("Building model")
    #Create the symbolics variable
    x = T.tensor4('image_features')
    y = T.lmatrix('targets')

    #Get the parameters
    conv_parameters = zip(filter_size, num_filter)

    #Create the convolutions layers
    conv_layers = list(interleave([(Convolutional(
                                      filter_size=filter_size,
                                      num_filters=num_filter,
                                      name='conv_{}'.format(i))
                    for i, (filter_size, num_filter)
                    in enumerate(conv_parameters)),
                  (activation),
            (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))
        #    (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

    #Create the sequence
    conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.))
    #Initialize the convnet
    conv_sequence.initialize()
    #Add the MLP
    top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
    out = Flattener().apply(conv_sequence.apply(x))
    mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2),
              biases_init=Constant(0.))
    #Initialisze the MLP
    mlp.initialize()
    #Get the output
    predict = mlp.apply(out)

    cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
    error = MisclassificationRate().apply(y.flatten(), predict)

    #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
    error_rate = error.copy(name='error_rate')
    error_rate2 = error.copy(name='error_rate2')

    ########### REGULARIZATION ##################
    cg = ComputationGraph([cost])
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    biases = VariableFilter(roles=[BIAS])(cg.variables)
  # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    l2_penalty = T.sum([lambda_l2 * (W ** 2).sum() for i,W in enumerate(weights+biases)]) # Gradually increase penalty for layer
  # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases])
  # # #l2_penalty = l2_penalty_weights + l2_penalty_bias
    l2_penalty.name = 'l2_penalty'
    l1_penalty = T.sum([lambda_l1*T.abs_(z).sum() for z in weights+biases])
  #  l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer    
  #  l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases])
  #  l1_penalty = l1_penalty_biases + l1_penalty_weights
    l1_penalty.name = 'l1_penalty'
    costreg = cost + l2_penalty + l1_penalty
    costreg.name = 'costreg'
    
    ########### DEFINE THE ALGORITHM #############
  #  algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum())
    algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam())

    ########### GET THE DATA #####################
    istest = 'test' in config.keys()
    train_stream, valid_stream, test_stream = get_stream(batch_size,image_shape,test=istest)
    

    ########### INITIALIZING EXTENSIONS ##########
    checkpoint = Checkpoint('models/best_'+label+'.tar')
    checkpoint.add_condition(['after_epoch'],
                         predicate=OnLogRecord('valid_error_rate_best_so_far'))
    #Adding a live plot with the bokeh server
    plot = Plot(label,
        channels=[['train_error_rate', 'valid_error_rate'],
                  ['valid_cost', 'valid_error_rate2'],
                 # ['train_costreg','train_grad_norm']], #  
                 ['train_costreg','train_total_gradient_norm','train_l2_penalty','train_l1_penalty']],
                  server_url="http://hades.calculquebec.ca:5042")  
   
    grad_norm = aggregation.mean(algorithm.total_gradient_norm)
    grad_norm.name = 'grad_norm'

    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                  after_n_batches=num_batches),
                  DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"),
                  TrainingDataMonitoring([costreg, error_rate, error_rate2,
                    grad_norm,l2_penalty,l1_penalty],
                     prefix="train", after_epoch=True),
                  plot,
                  ProgressBar(),
                  Printing(),
                  TrackTheBest('valid_error_rate',min), #Keep best
                  checkpoint,  #Save best
                  FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)] # Early-stopping                  
    model = Model(cost)
    main_loop = MainLoop(algorithm,data_stream=train_stream,model=model,extensions=extensions)
    main_loop.run()