def one_hot(x, m=None): """One-hot representation of integer vector. Given a vector of integers from 0 to m-1, returns a matrix with a one-hot representation, where each row corresponds to an element of x. Parameters ---------- x : integer vector The integer vector to convert to a one-hot representation. m : int, optional The number of different columns for the one-hot representation. This needs to be strictly greater than the maximum value of `x`. Defaults to ``max(x) + 1``. Returns ------- Theano tensor variable A Theano tensor variable of shape (``n``, `m`), where ``n`` is the length of `x`, with the one-hot representation of `x`. Notes ----- If your integer vector represents target class memberships, and you wish to compute the cross-entropy between predictions and the target class memberships, then there is no need to use this function, since the function :func:`lasagne.objectives.categorical_crossentropy()` can compute the cross-entropy from the integer vector directly. """ if m is None: m = T.cast(T.max(x) + 1, 'int32') return T.eye(m)[T.cast(x, 'int32')]
def _linspace(start, stop, num): # Theano linspace. Behaves similar to np.linspace start = T.cast(start, theano.config.floatX) stop = T.cast(stop, theano.config.floatX) num = T.cast(num, theano.config.floatX) step = (stop-start)/(num-1) return T.arange(num, dtype=theano.config.floatX)*step+start
def _transform(theta, input, downsample_factor): num_batch, num_channels, height, width = input.shape theta = T.reshape(theta, (-1, 2, 3)) # grid of (x_t, y_t, 1), eq (1) in ref [1] out_height = T.cast(height / downsample_factor[0], 'int64') out_width = T.cast(width / downsample_factor[1], 'int64') grid = _meshgrid(out_height, out_width) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = T.dot(theta, grid) x_s = T_g[:, 0] y_s = T_g[:, 1] x_s_flat = x_s.flatten() y_s_flat = y_s.flatten() # dimshuffle input to (bs, height, width, channels) input_dim = input.dimshuffle(0, 2, 3, 1) input_transformed = _interpolate( input_dim, x_s_flat, y_s_flat, out_height, out_width) output = T.reshape( input_transformed, (num_batch, out_height, out_width, num_channels)) output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format return output
def _interpolate(im, x, y, out_height, out_width): # *_f are floats num_batch, height, width, channels = im.shape height_f = T.cast(height, theano.config.floatX) width_f = T.cast(width, theano.config.floatX) # clip coordinates to [-1, 1] x = T.clip(x, -1, 1) y = T.clip(y, -1, 1) # scale coordinates from [-1, 1] to [0, width/height - 1] x = (x + 1) / 2 * (width_f - 1) y = (y + 1) / 2 * (height_f - 1) # obtain indices of the 2x2 pixel neighborhood surrounding the coordinates; # we need those in floatX for interpolation and in int64 for indexing. for # indexing, we need to take care they do not extend past the image. x0_f = T.floor(x) y0_f = T.floor(y) x1_f = x0_f + 1 y1_f = y0_f + 1 x0 = T.cast(x0_f, 'int64') y0 = T.cast(y0_f, 'int64') x1 = T.cast(T.minimum(x1_f, width_f - 1), 'int64') y1 = T.cast(T.minimum(y1_f, height_f - 1), 'int64') # The input is [num_batch, height, width, channels]. We do the lookup in # the flattened input, i.e [num_batch*height*width, channels]. We need # to offset all indices to match the flat version dim2 = width dim1 = width*height base = T.repeat( T.arange(num_batch, dtype='int64')*dim1, out_height*out_width) base_y0 = base + y0*dim2 base_y1 = base + y1*dim2 idx_a = base_y0 + x0 idx_b = base_y1 + x0 idx_c = base_y0 + x1 idx_d = base_y1 + x1 # use indices to lookup pixels for all samples im_flat = im.reshape((-1, channels)) Ia = im_flat[idx_a] Ib = im_flat[idx_b] Ic = im_flat[idx_c] Id = im_flat[idx_d] # calculate interpolated values wa = ((x1_f-x) * (y1_f-y)).dimshuffle(0, 'x') wb = ((x1_f-x) * (y-y0_f)).dimshuffle(0, 'x') wc = ((x-x0_f) * (y1_f-y)).dimshuffle(0, 'x') wd = ((x-x0_f) * (y-y0_f)).dimshuffle(0, 'x') output = T.sum([wa*Ia, wb*Ib, wc*Ic, wd*Id], axis=0) return output
def main(model='mlp', num_epochs=500): # Load the dataset print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs', fixed_shape=(128,) + X_train.shape[1:]) target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") if model == 'mlp': network = build_mlp(input_var) elif model.startswith('custom_mlp:'): depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',') network = build_custom_mlp(input_var, int(depth), int(width), float(drop_in), float(drop_hid)) elif model == 'cnn': network = build_cnn(input_var) else: print("Unrecognized model type %r." % model) return # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = T.mean(loss) # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=0.01, momentum=0.9) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = T.mean(test_loss) # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.cast(T.eq( T.cast(T.argmax(test_prediction, axis=1), 'int32'), T.cast(target_var, 'int32') ), theano.config.floatX)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 128, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, 128, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, 128, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100))