def __init__( self, conv_activations, num_channels, image_shape, filter_sizes, feature_maps, pooling_sizes, top_mlp_activations, top_mlp_dims, conv_step=None, border_mode="valid", **kwargs ): if conv_step is None: self.conv_step = (1, 1) else: self.conv_step = conv_step self.num_channels = num_channels self.image_shape = image_shape self.top_mlp_activations = top_mlp_activations self.top_mlp_dims = top_mlp_dims self.border_mode = border_mode conv_parameters = zip(filter_sizes, feature_maps) # Construct convolutional, activation, and pooling layers with corresponding parameters self.convolution_layer = ( Convolutional( filter_size=filter_size, num_filters=num_filter, step=self.conv_step, border_mode=self.border_mode, name="conv_{}".format(i), ) for i, (filter_size, num_filter) in enumerate(conv_parameters) ) self.BN_layer = (BatchNormalization(name="bn_conv_{}".format(i)) for i in enumerate(conv_parameters)) self.pooling_layer = (MaxPooling(size, name="pool_{}".format(i)) for i, size in enumerate(pooling_sizes)) self.layers = list(interleave([self.convolution_layer, self.BN_layer, conv_activations, self.pooling_layer])) self.conv_sequence = ConvolutionalSequence(self.layers, num_channels, image_size=image_shape) # Construct a top MLP self.top_mlp = MLP(top_mlp_activations, top_mlp_dims) # Construct a top batch normalized MLP # mlp_class = BatchNormalizedMLP # extra_kwargs = {'conserve_memory': False} # self.top_mlp = mlp_class(top_mlp_activations, top_mlp_dims, **extra_kwargs) # We need to flatten the output of the last convolutional layer. # This brick accepts a tensor of dimension (batch_size, ...) and # returns a matrix (batch_size, features) self.flattener = Flattener() application_methods = [self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply] super(LeNet, self).__init__(application_methods, **kwargs)
def __init__(self, conv_activations, num_channels, image_shape, filter_sizes, feature_maps, pooling_sizes, top_mlp_activations, top_mlp_dims, conv_step=None, border_mode='valid', **kwargs): if conv_step is None: self.conv_step = (1, 1) else: self.conv_step = conv_step self.num_channels = num_channels self.image_shape = image_shape self.top_mlp_activations = top_mlp_activations self.top_mlp_dims = top_mlp_dims self.border_mode = border_mode conv_parameters = zip(filter_sizes, feature_maps) # Construct convolutional, activation, and pooling layers with corresponding parameters self.convolution_layer = ( Convolutional(filter_size=filter_size, num_filters=num_filter, step=self.conv_step, border_mode=self.border_mode, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)) self.BN_layer = (BatchNormalization(name='bn_conv_{}'.format(i)) for i in enumerate(conv_parameters)) self.pooling_layer = (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes)) self.layers = list( interleave([ self.convolution_layer, self.BN_layer, conv_activations, self.pooling_layer ])) self.conv_sequence = ConvolutionalSequence(self.layers, num_channels, image_size=image_shape) # Construct a top MLP self.top_mlp = MLP(top_mlp_activations, top_mlp_dims) # We need to flatten the output of the last convolutional layer. # This brick accepts a tensor of dimension (batch_size, ...) and # returns a matrix (batch_size, features) self.flattener = Flattener() application_methods = [ self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply ] super(LeNet, self).__init__(application_methods, **kwargs)
def __init__( self, conv_activations, num_channels, image_shape, filter_sizes, feature_maps, conv_steps, pooling_sizes, top_mlp_activations, top_mlp_dims, border_mode="valid", **kwargs ): self.num_channels = num_channels self.image_shape = image_shape self.top_mlp_activations = top_mlp_activations self.top_mlp_dims = top_mlp_dims self.border_mode = border_mode conv_parameters = zip(filter_sizes, feature_maps, conv_steps) # Construct convolutional, activation, and pooling layers with corresponding parameters conv_layers = list( interleave( [ ( Convolutional( filter_size=filter_size, num_filters=num_filter, step=conv_step, border_mode=self.border_mode, name="conv_{}".format(i), ) for i, (filter_size, num_filter, conv_step) in enumerate(conv_parameters) ), conv_activations, (MaxPooling(size, name="pool_{}".format(i)) for i, size in enumerate(pooling_sizes)), ] ) ) # Applying SpatialBatchNormalization to inputs self.layers = [SpatialBatchNormalization()] + conv_layers # self.layers = conv_layers self.conv_sequence = ConvolutionalSequence(self.layers, num_channels, image_size=image_shape) # Construct a top MLP self.top_mlp = MLP(top_mlp_activations, top_mlp_dims) # We need to flatten the output of the last convolutional layer. # This brick accepts a tensor of dimension (batch_size, ...) and # returns a matrix (batch_size, features) self.flattener = Flattener() application_methods = [self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply] super(LeNet, self).__init__(application_methods, **kwargs)
def __init__(self, image_shape=None, output_size=None, **kwargs): self.num_channels = 3 self.image_shape = image_shape or (32, 32) self.output_size = output_size or 10 conv_parameters = [ (96, 3, 1, 'half'), (96, 3, 1, 'half'), (96, 3, 2, 'half'), (192, 3, 1, 'half'), (192, 3, 1, 'half'), (192, 3, 2, 'half'), (192, 3, 1, 'half'), (192, 1, 1, 'valid'), (10, 1, 1, 'valid') ] fc_layer = 10 self.convolutions = list([ Convolutional(filter_size=(filter_size, filter_size), num_filters=num_filters, step=(conv_step, conv_step), border_mode=border_mode, tied_biases=True, name='conv_{}'.format(i)) for i, (num_filters, filter_size, conv_step, border_mode) in enumerate(conv_parameters)]) # Add two trivial channel masks to allow by-channel dropout self.convolutions.insert(6, ChannelMask(name='mask_1')) self.convolutions.insert(3, ChannelMask(name='mask_0')) self.conv_sequence = ConvolutionalSequence(list(interleave([ self.convolutions, (Rectifier() for _ in self.convolutions) ])), self.num_channels, self.image_shape) # The AllConvNet applies average pooling to combine top-level # features across the image. self.flattener = GlobalAverageFlattener() # Then it inserts one final 10-way FC layer before softmax # self.top_mlp = MLP([Rectifier(), Softmax()], # [conv_parameters[-1][0], fc_layer, self.output_size]) self.top_softmax = Softmax() application_methods = [ self.conv_sequence.apply, self.flattener.apply, self.top_softmax.apply ] super(AllConvNet, self).__init__(application_methods, **kwargs)
def __init__(self, conv_activations, num_channels, image_shape, noise_batch_size, filter_sizes, feature_maps, pooling_sizes, top_mlp_activations, top_mlp_dims, conv_step=None, border_mode='valid', tied_biases=True, **kwargs): if conv_step is None: self.conv_step = (1, 1) else: self.conv_step = conv_step self.num_channels = num_channels self.image_shape = image_shape self.noise_batch_size = noise_batch_size self.top_mlp_activations = top_mlp_activations self.top_mlp_dims = top_mlp_dims self.border_mode = border_mode self.tied_biases = tied_biases conv_parameters = zip(filter_sizes, feature_maps) # Construct convolutional layers with corresponding parameters self.layers = list(interleave([ (NoisyConvolutional(filter_size=filter_size, num_filters=num_filter, step=self.conv_step, border_mode=self.border_mode, tied_biases=self.tied_biases, noise_batch_size=self.noise_batch_size, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), conv_activations, (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) self.conv_sequence = ConvolutionalSequence( self.layers, num_channels, image_size=image_shape) self.conv_sequence.name = 'cs' # Construct a top MLP self.top_mlp = MLP(top_mlp_activations, top_mlp_dims, prototype=NoisyLinear(noise_batch_size=self.noise_batch_size)) # We need to flatten the output of the last convolutional layer. # This brick accepts a tensor of dimension (batch_size, ...) and # returns a matrix (batch_size, features) self.flattener = Flattener() application_methods = [self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply] super(NoisyLeNet, self).__init__(application_methods, **kwargs)
def __init__(self, image_shape=None, output_size=None, **kwargs): self.num_channels = 3 self.image_shape = image_shape or (32, 32) self.output_size = output_size or 10 conv_parameters = [(96, 3, 1, 'half'), (96, 3, 1, 'half'), (96, 3, 2, 'half'), (192, 3, 1, 'half'), (192, 3, 1, 'half'), (192, 3, 2, 'half'), (192, 3, 1, 'half'), (192, 1, 1, 'valid'), (10, 1, 1, 'valid')] fc_layer = 10 self.convolutions = list([ Convolutional(filter_size=(filter_size, filter_size), num_filters=num_filters, step=(conv_step, conv_step), border_mode=border_mode, tied_biases=True, name='conv_{}'.format(i)) for i, (num_filters, filter_size, conv_step, border_mode) in enumerate(conv_parameters) ]) # Add two trivial channel masks to allow by-channel dropout self.convolutions.insert(6, ChannelMask(name='mask_1')) self.convolutions.insert(3, ChannelMask(name='mask_0')) self.conv_sequence = ConvolutionalSequence( list( interleave([ self.convolutions, (Rectifier() for _ in self.convolutions) ])), self.num_channels, self.image_shape) # The AllConvNet applies average pooling to combine top-level # features across the image. self.flattener = GlobalAverageFlattener() # Then it inserts one final 10-way FC layer before softmax # self.top_mlp = MLP([Rectifier(), Softmax()], # [conv_parameters[-1][0], fc_layer, self.output_size]) self.top_softmax = Softmax() application_methods = [ self.conv_sequence.apply, self.flattener.apply, self.top_softmax.apply ] super(AllConvNet, self).__init__(application_methods, **kwargs)
def test_interleave(): assert ''.join(interleave(('ABC', '123'))) == 'A1B2C3' assert ''.join(interleave(('ABC', '1'))) == 'A1BC'
def fit(self, X, X_tar, y, y_tar, max_iter=500, warm_start=False): m = X.shape[0] m_tar = X_tar.shape[0] n_x = X.shape[1] n_class_src = len(set(y)) n_class_tar = len(set(y_tar)) if not warm_start: ''' weight and bias initialization''' # shared weights self.W1 = np.random.randn(self.nn_hidden, n_x) self.b1 = np.zeros((self.nn_hidden, 1)) # task 1 specific weights self.W2_1 = np.random.randn(n_class_src, self.nn_hidden) self.b2_1 = np.zeros((n_class_src, 1)) # task 2 specific weights self.W2_2 = np.random.randn(n_class_tar, self.nn_hidden) self.b2_2 = np.zeros((n_class_tar, 1)) X_shuf, y_shuf = shuffle(X, y) if len(y_tar) > 0: X_tar_shuf, y_tar_shuf = shuffle(X_tar, y_tar) le = LabelBinarizer() le.fit(y) if len(y_tar) > 0: le_tar = LabelBinarizer() le_tar.fit(y_tar) bs = np.min([self.batch_size, X_shuf.shape[0]]) batches_X = np.array_split(X_shuf, m / bs) batches_y = np.array_split(y_shuf, m / bs) tasks_1 = [1 for i in range(len(batches_y))] batches_X_tar = np.array([]) batches_y_tar = np.array([]) if len(y_tar) > 0: batches_X_tar = np.array_split(X_tar_shuf, max(1, m_tar / self.batch_size)) batches_y_tar = np.array_split(y_tar_shuf, max(1, m_tar / self.batch_size)) tasks_2 = [2 for i in range(len(batches_y_tar))] # TO DO: hstack source and target batches in alternating way all_batches_X = list(itertoolz.interleave([batches_X, batches_X_tar]))[::-1] all_batches_y = list(itertoolz.interleave([batches_y, batches_y_tar]))[::-1] all_tasks = list(itertoolz.interleave([tasks_1, tasks_2]))[::-1] for j in range(1, max_iter + 1): #progressbar.progressbar(range(max_iter)): batch_errors = [] for i in range(len(all_batches_X)): task = all_tasks[i] X_new = all_batches_X[i].T y_new = all_batches_y[i] y_new = le.transform(y_new) y_new = y_new.T Z1 = np.matmul(self.W1, X_new) + self.b1 A1 = relu(Z1) reg = np.linalg.norm(self.W1, ord=2) if task == 1: Z2 = np.matmul(self.W2_1, A1) + self.b2_1 A2 = np.nan_to_num( np.nan_to_num(np.exp(Z2)) / np.nan_to_num(np.sum(np.exp(Z2), axis=0))) cost = loss(y_new, A2, reg) dZ2 = A2 - y_new dW2 = (1. / m) * np.matmul(dZ2, A1.T) db2 = (1. / m) * np.sum(dZ2, axis=1, keepdims=True) dA1 = np.matmul(self.W2_1.T, dZ2) dZ1 = dA1 * relu_der(Z1) dW1 = (1. / m) * np.matmul(dZ1, X_new.T) db1 = (1. / m) * np.sum(dZ1, axis=1, keepdims=True) self.W2_1 = self.W2_1 - self.learning_rate * dW2 self.b2_1 = self.b2_1 - self.learning_rate * db2 if task == 2: Z2 = np.matmul(self.W2_2, A1) + self.b2_2 A2 = np.nan_to_num( np.nan_to_num(np.exp(Z2)) / np.nan_to_num(np.sum(np.exp(Z2), axis=0))) cost = loss(y_new, A2, reg) dZ2 = A2 - y_new dW2 = (1. / m) * np.matmul(dZ2, A1.T) db2 = (1. / m) * np.sum(dZ2, axis=1, keepdims=True) dA1 = np.matmul(self.W2_1.T, dZ2) dZ1 = dA1 * relu_der(Z1) dW1 = (1. / m) * np.matmul(dZ1, X_new.T) db1 = (1. / m) * np.sum(dZ1, axis=1, keepdims=True) self.W2_2 = self.W2_2 - self.learning_rate * dW2 self.b2_2 = self.b2_2 - self.learning_rate * db2 batch_errors.append(cost) self.W1 = self.W1 - self.learning_rate * dW1 self.b1 = self.b1 - self.learning_rate * db1 if (j % 100 == 0): print("Batch %s loss: %s" % (j, np.mean(batch_errors))) return self
def fit(self, X, X_tar, y, y_tar, max_iter=500, warm_start=False, use_dropout=False, desc='', regularize=True): m = X.shape[0] n_x = X.shape[1] print(n_x) n_class_src = len(set(y)) if len(set(y_tar)) > 0: n_class_tar = len(set(y_tar)) m_tar = X_tar.shape[0] if not warm_start: ''' weight and bias initialization''' # shared weights self.W1 = np.random.randn(self.nn_hidden, n_x) self.b1 = np.zeros((self.nn_hidden, 1)) # task 1 (source) specific weights self.task_1 = Task(self.nn_hidden, n_class_src, self.learning_rate, m, self.T) # task 2 (target) specific weights self.task_2 = Task(self.nn_hidden, n_class_src, self.learning_rate, m, self.T) X_shuf, y_shuf = shuffle(X, y) if len(y_tar) > 0: X_tar_shuf, y_tar_shuf = shuffle(X_tar, y_tar) # transform labels into one-hot vectors le = LabelBinarizer() le.fit(list(y) + list(y_tar)) if len(y_tar) > 0: le_tar = LabelBinarizer() le_tar.fit(y_tar) bs = np.min([self.batch_size, X_shuf.shape[0]]) batches_X = np.array_split(X_shuf, m / bs) batches_y = np.array_split(y_shuf, m / bs) tasks_1 = [1 for i in range(len(batches_y))] batches_X_tar = np.array([]) batches_y_tar = np.array([]) if len(y_tar) > 0: batches_X_tar = np.array_split(X_tar_shuf, max(1, m_tar / self.batch_size)) batches_y_tar = np.array_split(y_tar_shuf, max(1, m_tar / self.batch_size)) tasks_2 = [2 for i in range(len(batches_y_tar))] # TO DO: hstack source and target batches in alternating way all_batches_X = list(itertoolz.interleave([batches_X, batches_X_tar]))[::-1] all_batches_y = list(itertoolz.interleave([batches_y, batches_y_tar]))[::-1] all_tasks = list(itertoolz.interleave([tasks_1, tasks_2]))[::-1] def get_batch(step): idx = step % len(all_tasks) task = all_tasks[idx] X_new = all_batches_X[idx].T y_new = all_batches_y[idx] y_new = le.transform(y_new) y_new = y_new.T return X_new, y_new, task def batch_normalize(W): mu = np.mean(W, axis=0) var = np.var(W, axis=0) W = (W - mu) / np.sqrt(var + 1) return W def bhattacharyya(a, b): """ Bhattacharyya distance between distributions (lists of floats). """ if not len(a) == len(b): raise ValueError("a and b must be of the same size") return -np.log(sum((np.sqrt(u * w) for u, w in zip(a, b)))) def model_loss(params, step): W, b1, W2_1, b2_1, W2_2, b2_2 = params W_norm = W #batch_normalize(W) # W2_1 = batch_normalize(W2_1) # W2_2 = batch_normalize(W2_2) X, y, task = get_batch(step) prod = W_norm @ X + b1 nonlin = relu(prod) if use_dropout: nonlin *= np.random.binomial( [np.ones((len(prod), nonlin.shape[1]))], 1 - self.dropout_percent)[0] * (1.0 / (1 - self.dropout_percent)) if task == 1: out = (W2_1 @ nonlin) + b2_1 else: out = (W2_2 @ nonlin) + b2_2 prob = np.exp(out / self.T) / np.sum(np.exp(out / self.T)) L = loss(y, prob) # task relatedness if regularize: a_bar = (flatten(self.task_1.W)[0] + flatten(self.task_2.W)[0]) / 2 a_bar_norm = np.linalg.norm(a_bar, 2) source_norm = np.linalg.norm( flatten(self.task_1.W)[0] - a_bar, 2) tar_norm = np.linalg.norm(flatten(self.task_2.W)[0] - a_bar, 2) reg = a_bar_norm + 0.1 * (source_norm + tar_norm) / 2 else: reg = 0 # bhattacharya penalty P_s_prime = relu(((W_norm @ X_shuf.T) + b1)).T.mean(axis=0) P_t_prime = relu(((W_norm @ X_tar_shuf.T) + b1)).T.mean(axis=0) P_s = P_s_prime / (np.sum(P_s_prime)) P_t = P_t_prime / (np.sum(P_t_prime)) m = np.multiply(P_s, P_t) bt_distance = -(np.log(np.sum(P_s * P_t))) return L + 0.3 * bt_distance #+ 0.3 * reg params = [ self.W1, self.b1, self.task_1.W, self.task_1.b, self.task_2.W, self.task_2.b ] model_loss_grad = grad(model_loss) max_epoch = 500 def callback(params, step, g): if step % max_epoch == 0: print("Iteration {0:3d} objective {1:1.2e}; task {2}".format( step // max_epoch + 1, model_loss(params, step), '-')) self.W1, self.b1, self.task_1.W, self.task_1.b, self.task_2.W, self.task_2.b = adam( model_loss_grad, params, step_size=self.learning_rate, num_iters=30 * max_epoch, callback=callback) return self
output_size = 2 activation = [Rectifier().apply for _ in num_filter] mlp_activation = [Rectifier().apply for _ in mlp_hiddens] + [Softmax().apply] #Create the symbolics variable x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Get the parameters conv_parameters = zip(activation, filter_size, num_filter) #Create the convolutions layers conv_layers = list(interleave([(ConvolutionalActivation( filter_size=filter_size, num_filters=num_filter, activation=activation, name='conv_{}'.format(i)) for i, (activation, filter_size, num_filter) in enumerate(conv_parameters)), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize()
# Convolutional layers filter_sizes = [5, 5] num_filters = [50, 256] pooling_sizes = [2, 2] conv_step = (1, 1) border_mode = 'full' conv_activations = [Logistic() for _ in num_filters] conv_layers = list( interleave([ (Convolutional(filter_size=filter_size, num_filters=num_filter, step=conv_step, border_mode=border_mode, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(zip(filter_sizes, num_filters))), conv_activations, (MaxPooling(pooling_sizes, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes)) ])) convnet = ConvolutionalSequence(conv_layers, num_channels=3, image_size=(32, 32), weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) convnet.push_initialization_config() convnet.initialize()
num_filter = [20, 40, 60, 80, 100, 120, 1024, 2] pooling_sizes = [(2,2),(2,2),(2,2),(2,2),(2,2),(2,2),(2,2),(1,1)] output_size = 2 #Create the stmbolics variable x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list(interleave([(Convolutional( filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (BatchNormalization(name='batch_{}'.format(i)) for i, _ in enumerate(conv_parameters)), (Rectifier() for i, (f_size, num_f) in enumerate(conv_parameters)), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, use_bias=False) #Add the Softmax function out = Flattener().apply(conv_sequence.apply(x)) predict = NDimensionalSoftmax().apply(out) #get the test stream from fuel.datasets.dogs_vs_cats import DogsVsCats from fuel.streams import DataStream, ServerDataStream from fuel.schemes import ShuffledScheme, SequentialExampleScheme from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, MaximumImageDimensions, Random2DRotation
x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') # Conv net model conv_activation = [Rectifier() for _ in num_filters] mlp_activation = [Rectifier() for _ in mlp_hiddens] + [Softmax()] # conv_parameters = zip(filter_sizes, num_filters) sbn = SpatialBatchNormalization() conv_parameters = zip(filter_sizes, num_filters) conv_layers = list(interleave([ (Convolutional( filter_size=filter_size, num_filters=num_filter, step=conv_step, border_mode=border_mode, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), conv_activation, (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) # conv_layers = [sbn] + conv_layers conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.)) conv_sequence.initialize() out = Flattener().apply(conv_sequence.apply(x)) top_mlp_dims = [numpy.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=Uniform(0.1),biases_init=Constant(0.)) top_mlp.initialize() predict = top_mlp.apply(out)
def main(): parser = argparse.ArgumentParser( description= 'Splits available (multilingual) sentences in train-dev-test') parser.add_argument('--input_dir', type=str, default='samples_man_annotated', help='location of the annotated Eltec files') parser.add_argument('--split_dir', type=str, default='multilingual_splits', help='location of the train-dev-test files') parser.add_argument( '--train_prop', type=float, default=.8, help='Proportion of training items (dev and test are equal-size)') parser.add_argument('--seed', type=int, default=43438, help='Random seed') parser.add_argument('--paragraphs', action='store_true', help='') args = parser.parse_args() print(args) try: shutil.rmtree(args.split_dir) except FileNotFoundError: pass os.mkdir(args.split_dir) sentence_iters = [] paragraphs = [] if args.paragraphs: for filename in glob.iglob(f'{args.input_dir}/**/*.xml', recursive=True): print(filename) paragraphs += annotated2paragraphs(filename) train, dev, test = split_paragraphs(paragraphs, train_ratio=args.train_prop, random_state=args.seed) train = get_sentences(train) dev = get_sentences(dev) test = get_sentences(test) else: for filename in glob.iglob(f'{args.input_dir}/**/*.xml', recursive=True): print(filename) sentences = annotated2sentences(filename) sentence_iters.append(sentences) mixed_sentences = interleave(sentence_iters) formatted_sentences = [] for sentence in mixed_sentences: s = '\n'.join([' '.join(token) for token in sentence]) + '\n' formatted_sentences.append(s) # for sentence in formatted_sentences: # print(sentence) train, rest = split(formatted_sentences, train_size=args.train_prop, shuffle=True, random_state=args.seed) dev, test = split(rest, train_size=0.5, shuffle=True, random_state=args.seed) print(f'# train items: {len(train)}') print(f'# dev test: {len(dev)}') print(f'# test items: {len(test)}') for items in ('train', 'dev', 'test'): with open(os.sep.join((args.split_dir, items + '.txt')), 'w', encoding='utf8') as f: f.write('\n'.join(eval(items)))
activation = [Rectifier().apply for _ in num_filter] mlp_activation = [Rectifier().apply for _ in mlp_hiddens] + [Softmax().apply] #Create the symbolics variable x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Get the parameters conv_parameters = zip(activation, filter_size, num_filter) #Create the convolutions layers conv_layers = list( interleave([(ConvolutionalActivation(filter_size=filter_size, num_filters=num_filter, activation=activation, name='conv_{}'.format(i)) for i, (activation, filter_size, num_filter) in enumerate(conv_parameters)), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output')) ] + mlp_hiddens + [output_size]
#Create the stmbolics variable x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list( interleave([ (Convolutional(filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (BatchNormalization(name='batch_{}'.format(i)) for i, _ in enumerate(conv_parameters)), (Rectifier() for i, (f_size, num_f) in enumerate(conv_parameters)), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes)) ])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, use_bias=False) #Add the Softmax function out = Flattener().apply(conv_sequence.apply(x)) predict = NDimensionalSoftmax().apply(out)
def build_and_run(label, config): ############## CREATE THE NETWORK ############### #Define the parameters num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config[ 'num_epochs'], config['num_batches'], config['num_channels'], config[ 'image_shape'], config['filter_size'], config[ 'num_filter'], config['pooling_sizes'], config[ 'mlp_hiddens'], config['output_size'], config[ 'batch_size'], config['activation'], config[ 'mlp_activation'] # print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation) lambda_l1 = 0.000025 lambda_l2 = 0.000025 print("Building model") #Create the symbolics variable x = T.tensor4('image_features') y = T.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list( interleave([(Convolutional(filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (activation), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) # (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output')) ] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') ########### REGULARIZATION ################## cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) biases = VariableFilter(roles=[BIAS])(cg.variables) # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer l2_penalty = T.sum([ lambda_l2 * (W**2).sum() for i, W in enumerate(weights + biases) ]) # Gradually increase penalty for layer # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases]) # # #l2_penalty = l2_penalty_weights + l2_penalty_bias l2_penalty.name = 'l2_penalty' l1_penalty = T.sum([lambda_l1 * T.abs_(z).sum() for z in weights + biases]) # l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer # l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases]) # l1_penalty = l1_penalty_biases + l1_penalty_weights l1_penalty.name = 'l1_penalty' costreg = cost + l2_penalty + l1_penalty costreg.name = 'costreg' ########### DEFINE THE ALGORITHM ############# # algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum()) algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam()) ########### GET THE DATA ##################### istest = 'test' in config.keys() train_stream, valid_stream, test_stream = get_stream(batch_size, image_shape, test=istest) ########### INITIALIZING EXTENSIONS ########## checkpoint = Checkpoint('models/best_' + label + '.tar') checkpoint.add_condition( ['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Adding a live plot with the bokeh server plot = Plot( label, channels=[ ['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], # ['train_costreg','train_grad_norm']], # [ 'train_costreg', 'train_total_gradient_norm', 'train_l2_penalty', 'train_l1_penalty' ] ], server_url="http://hades.calculquebec.ca:5042") grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name = 'grad_norm' extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"), TrainingDataMonitoring([ costreg, error_rate, error_rate2, grad_norm, l2_penalty, l1_penalty ], prefix="train", after_epoch=True), plot, ProgressBar(), Printing(), TrackTheBest('valid_error_rate', min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4) ] # Early-stopping model = Model(cost) main_loop = MainLoop(algorithm, data_stream=train_stream, model=model, extensions=extensions) main_loop.run()
def build_and_run(label, config): ############## CREATE THE NETWORK ############### #Define the parameters num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config['num_epochs'], config['num_batches'], config['num_channels'], config['image_shape'], config['filter_size'], config['num_filter'], config['pooling_sizes'], config['mlp_hiddens'], config['output_size'], config['batch_size'], config['activation'], config['mlp_activation'] # print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation) lambda_l1 = 0.000025 lambda_l2 = 0.000025 print("Building model") #Create the symbolics variable x = T.tensor4('image_features') y = T.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list(interleave([(Convolutional( filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (activation), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) # (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') ########### REGULARIZATION ################## cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) biases = VariableFilter(roles=[BIAS])(cg.variables) # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer l2_penalty = T.sum([lambda_l2 * (W ** 2).sum() for i,W in enumerate(weights+biases)]) # Gradually increase penalty for layer # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases]) # # #l2_penalty = l2_penalty_weights + l2_penalty_bias l2_penalty.name = 'l2_penalty' l1_penalty = T.sum([lambda_l1*T.abs_(z).sum() for z in weights+biases]) # l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer # l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases]) # l1_penalty = l1_penalty_biases + l1_penalty_weights l1_penalty.name = 'l1_penalty' costreg = cost + l2_penalty + l1_penalty costreg.name = 'costreg' ########### DEFINE THE ALGORITHM ############# # algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum()) algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam()) ########### GET THE DATA ##################### istest = 'test' in config.keys() train_stream, valid_stream, test_stream = get_stream(batch_size,image_shape,test=istest) ########### INITIALIZING EXTENSIONS ########## checkpoint = Checkpoint('models/best_'+label+'.tar') checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Adding a live plot with the bokeh server plot = Plot(label, channels=[['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], # ['train_costreg','train_grad_norm']], # ['train_costreg','train_total_gradient_norm','train_l2_penalty','train_l1_penalty']], server_url="http://hades.calculquebec.ca:5042") grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name = 'grad_norm' extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"), TrainingDataMonitoring([costreg, error_rate, error_rate2, grad_norm,l2_penalty,l1_penalty], prefix="train", after_epoch=True), plot, ProgressBar(), Printing(), TrackTheBest('valid_error_rate',min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)] # Early-stopping model = Model(cost) main_loop = MainLoop(algorithm,data_stream=train_stream,model=model,extensions=extensions) main_loop.run()