def setup_lr(optimizer, full_log, opt): # annealing learning rate lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, verbose=True, factor=opt.lr_reduce_factor, min_lr=opt.lr_min_value, threshold=opt.lr_quantity_epsilon, threshold_mode='rel', mode=opt.lr_quantity_mode, patience=ceildiv(opt.lr_patience, opt.eval_iter), cooldown=ceildiv(opt.lr_cooldown, opt.eval_iter)) # create a function and a closure averaging_buffer_max_length = ceildiv(opt.lr_quantity_smoothness, opt.eval_iter) if averaging_buffer_max_length <= 1: averaging_buffer_max_length = 1 averaging_buffer = [] def anneal_lr_func(anneal_now=True): value_to_monitor = full_log[opt.lr_quantity_to_monitor][-1] averaging_buffer.append(value_to_monitor) if len(averaging_buffer) > averaging_buffer_max_length: averaging_buffer.pop(0) averaged_value = sum(averaging_buffer) / float(len(averaging_buffer)) counter = len(full_log[opt.lr_quantity_to_monitor]) if opt.anneal_learning_rate and anneal_now: lr_scheduler.step(averaged_value, counter) return get_learning_rate(optimizer) return lr_scheduler, anneal_lr_func
def brute_force_oracle(block_size, prefix_size, encryption_oracle): decrypted = b'' prefix_pad = bytes([0] * (block_size - prefix_size % block_size)) prefix_block = utils.ceildiv(prefix_size, block_size) * block_size while True: # Calculate prefix decrypted_blocks = utils.ceildiv(len(decrypted), block_size) if (decrypted_blocks == 0): decrypted_blocks = 1 decrypted_size = decrypted_blocks * block_size prefix = prefix_pad + bytes([0] * (decrypted_size - len(decrypted) - 1)) # Build dictionary dict = {} for b in range(256): p = prefix + decrypted + bytes([b]) enc_p = encryption_oracle(p)[prefix_block:prefix_block + decrypted_size] dict[enc_p] = b # Perform oracle encryption enc = encryption_oracle(prefix)[prefix_block:prefix_block + decrypted_size] # Extract next encrypted byte by dictionary attack if (enc in dict): decrypted += bytes([dict[enc]]) else: break return decrypted
def get_output_shape_for(self, input_shape): pheight, pwidth = self.patch_size npatchesH = ceildiv(input_shape[2], pheight) npatchesW = ceildiv(input_shape[3], pwidth) if self.stack_sublayers: dim = 2 * self.n_hidden else: dim = 4 * self.n_hidden return input_shape[0], dim, npatchesH, npatchesW
def __iter__(self): ''' Produce batches according the given lengths ''' buckets = {} num_batches = 0 sequence_lengths = list(enumerate(self.sequence_lengths)) if self.shuffle: np.random.shuffle(sequence_lengths) for idx, lengths in sequence_lengths: bucket_idx = ceildiv(max(lengths), self.granularity) bucket = buckets.get(bucket_idx, None) if not bucket: bucket = TokenBucket(self.max_lengths) batch = bucket.try_add(idx, sum(lengths)) if batch: # Bucket was full so yield a batch num_batches += 1 yield batch # Add to the bucket now that it's been emptied bucket.try_add(idx, sum(lengths)) buckets[bucket_idx] = bucket # Go through all buckets to see if any can yield a batch for bucket in buckets.values(): batch = bucket.get_batch() if all(batch): # Bucket had a non-empty batch left num_batches += 1 yield batch # Update the batch estimate self.num_batches = num_batches
def random_coeff_order_combinations(number): "Generate distinct random combinations of unitary coefficients and polynomial orders." coeffs_set = {-1, 1} max_order = ceildiv(number, len(coeffs_set)) + 3 pairings = it.product(coeffs_set, range( 1, max_order)) # avoid 0 order because it falls onboundary return random.sample(list(pairings), number)
def __init__(self, max_lengths, sequence_lengths, shuffle=False, granularity=5): ''' Initializer the sequence length sampler Inputs: max_lengths - a list of lengths of the desired total sequence length for each device lengths - a list containing the length for each example in the dataset ''' super(SequenceLengthSampler, self).__init__(sequence_lengths) self.shuffle = shuffle self.granularity = granularity self.max_lengths = max_lengths self.sequence_lengths = sequence_lengths # Initial estimate of the number of batches self.num_batches = ceildiv(np.sum(sequence_lengths), np.sum(max_lengths))
def split(path, prefix=None, num_lines=1000, approx_lines=0): ''' Split a file into chunks of each line length. If approx_lines are provided, then max sure the suffix length is long enough for the resultant number of files. ''' prefix = prefix or f'{path}.chunk.' cmd = ['split'] if approx_lines: approx_files = ceildiv(approx_lines, num_lines) suffix_len = math.ceil(math.log(approx_files) / math.log(26)) cmd += ['-a', f'{suffix_len}'] cmd += ['-l', f'{num_lines}', path, prefix] try: subprocess.check_call(cmd, stderr=subprocess.PIPE) except subprocess.CalledProcessError: raise RuntimeError(f'Unable to split {path}') return glob.glob(f'{prefix}*')
async def retrieve_my_msgs(ctx: Context, as_images: str = ""): my_mrs = session.query(MessageRequest).filter_by(user_id=ctx.author.id) if my_mrs.count() == 0: await ctx.send(constants.NO_REQUESTS) for req in my_mrs: mc = req.responses.count() if mc == 0: await ctx.send(render_template("no_replies.j2", req=req)) elif as_images == "images": files = [get_image(r.message) for r in req.responses] page_count = ceildiv(len(files), 10) for i in range(page_count): await ctx.send(render_template("read_replies_img.j2", req=req, i=i, pc=page_count), files=files[i * 10:i * 10 + 10]) else: await ctx.channel.send( render_template("read_replies.j2", req=req, mc=mc)) if mc > DELETE_THRESHOLD: req.delete(session)
# Perform computation at double precision for l in lbann.traverse_layer_graph(input_): l.datatype = lbann.DataType.DOUBLE for w in l.weights: w.datatype = lbann.DataType.DOUBLE # ---------------------------------- # Run LBANN # ---------------------------------- # Create optimizer opt = lbann.SGD(learn_rate=args.learning_rate) # Create LBANN objects iterations_per_epoch = utils.ceildiv(epoch_size, args.mini_batch_size) num_epochs = utils.ceildiv(args.num_iterations, iterations_per_epoch) trainer = lbann.Trainer( mini_batch_size=args.mini_batch_size, num_parallel_readers=0, ) callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackDumpWeights( directory='embeddings', epoch_interval=num_epochs, format='distributed_binary', ), ] model = lbann.Model(
def __init__( self, l_in, n_layers, pheight, pwidth, dim_proj, nclasses, stack_sublayers, # outsampling out_upsampling_type, out_nfilters, out_filters_size, out_filters_stride, out_W_init=lasagne.init.GlorotUniform(), out_b_init=lasagne.init.Constant(0.), out_nonlinearity=lasagne.nonlinearities.identity, hypotetical_fm_size=np.array((100.0, 100.0)), # input ConvLayers in_nfilters=None, in_filters_size=((3, 3), (3, 3)), in_filters_stride=((1, 1), (1, 1)), in_W_init=lasagne.init.GlorotUniform(), in_b_init=lasagne.init.Constant(0.), in_nonlinearity=lasagne.nonlinearities.rectify, in_vgg_layer='conv3_3', # common recurrent layer params RecurrentNet=lasagne.layers.GRULayer, nonlinearity=lasagne.nonlinearities.rectify, hid_init=lasagne.init.Constant(0.), grad_clipping=0, precompute_input=True, mask_input=None, # 1x1 Conv layer for dimensional reduction conv_dim_red=False, conv_dim_red_nonlinearity=lasagne.nonlinearities.identity, # GRU specific params gru_resetgate=lasagne.layers.Gate(W_cell=None), gru_updategate=lasagne.layers.Gate(W_cell=None), gru_hidden_update=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), gru_hid_init=lasagne.init.Constant(0.), # LSTM specific params lstm_ingate=lasagne.layers.Gate(), lstm_forgetgate=lasagne.layers.Gate(), lstm_cell=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), lstm_outgate=lasagne.layers.Gate(), # RNN specific params rnn_W_in_to_hid=lasagne.init.Uniform(), rnn_W_hid_to_hid=lasagne.init.Uniform(), rnn_b=lasagne.init.Constant(0.), # Special layers batch_norm=False, name=''): """A ReSeg layer The ReSeg layer is composed by multiple ReNet layers and an upsampling layer Parameters ---------- l_in : lasagne.layers.Layer The input layer, in bc01 format n_layers : int The number of layers pheight : tuple The height of the patches, for each layer pwidth : tuple The width of the patches, for each layer dim_proj : tuple The number of hidden units of each RNN, for each layer nclasses : int The number of classes of the data stack_sublayers : bool If True the bidirectional RNNs in the ReNet layers will be stacked one over the other. See ReNet for more details. out_upsampling_type : string The kind of upsampling to be used out_nfilters : int The number of hidden units of the upsampling layer out_filters_size : tuple The size of the upsampling filters, if any out_filters_stride : tuple The stride of the upsampling filters, if any out_W_init : Theano shared variable, numpy array or callable Initializer for W out_b_init : Theano shared variable, numpy array or callable Initializer for b out_nonlinearity : Theano shared variable, numpy array or callable The nonlinearity to be applied after the upsampling hypotetical_fm_size : float The hypotetical size of the feature map that would be input of the layer if the input image of the whole network was of size (100, 100) RecurrentNet : lasagne.layers.Layer A recurrent layer class nonlinearity : callable or None The nonlinearity that is applied to the output. If None is provided, no nonlinearity will be applied. hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state grad_clipping : float If nonzero, the gradient messages are clipped to the given value during the backward pass. precompute_input : bool If True, precompute input_to_hid before iterating through the sequence. This can result in a speedup at the expense of an increase in memory usage. mask_input : lasagne.layers.Layer Layer which allows for a sequence mask to be input, for when sequences are of variable length. Default None, which means no mask will be supplied (i.e. all sequences are of the same length). gru_resetgate : lasagne.layers.Gate Parameters for the reset gate, if RecurrentNet is GRU gru_updategate : lasagne.layers.Gate Parameters for the update gate, if RecurrentNet is GRU gru_hidden_update : lasagne.layers.Gate Parameters for the hidden update, if RecurrentNet is GRU gru_hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state, if RecurrentNet is GRU lstm_ingate : lasagne.layers.Gate Parameters for the input gate, if RecurrentNet is LSTM lstm_forgetgate : lasagne.layers.Gate Parameters for the forget gate, if RecurrentNet is LSTM lstm_cell : lasagne.layers.Gate Parameters for the cell computation, if RecurrentNet is LSTM lstm_outgate : lasagne.layers.Gate Parameters for the output gate, if RecurrentNet is LSTM rnn_W_in_to_hid : Theano shared variable, numpy array or callable Initializer for input-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_W_hid_to_hid : Theano shared variable, numpy array or callable Initializer for hidden-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_b : Theano shared variable, numpy array, callable or None Initializer for bias vector, if RecurrentNet is RecurrentLaye. If None is provided there will be no bias batch_norm: this add a batch normalization layer at the end of the network right after each Gradient Upsampling layers name : string The name of the layer, optional """ super(ReSegLayer, self).__init__(l_in, name) self.l_in = l_in self.n_layers = n_layers self.pheight = pheight self.pwidth = pwidth self.dim_proj = dim_proj self.nclasses = nclasses self.stack_sublayers = stack_sublayers # upsampling self.out_upsampling_type = out_upsampling_type self.out_nfilters = out_nfilters self.out_filters_size = out_filters_size self.out_filters_stride = out_filters_stride self.out_W_init = out_W_init self.out_b_init = out_b_init self.out_nonlinearity = out_nonlinearity self.hypotetical_fm_size = hypotetical_fm_size # input ConvLayers self.in_nfilters = in_nfilters self.in_filters_size = in_filters_size self.in_filters_stride = in_filters_stride self.in_W_init = in_W_init self.in_b_init = in_b_init self.in_nonlinearity = in_nonlinearity self.in_vgg_layer = in_vgg_layer # common recurrent layer params self.RecurrentNet = RecurrentNet self.nonlinearity = nonlinearity self.hid_init = hid_init self.grad_clipping = grad_clipping self.precompute_input = precompute_input self.mask_input = mask_input # GRU specific params self.gru_resetgate = gru_resetgate self.gru_updategate = gru_updategate self.gru_hidden_update = gru_hidden_update self.gru_hid_init = gru_hid_init # LSTM specific params self.lstm_ingate = lstm_ingate self.lstm_forgetgate = lstm_forgetgate self.lstm_cell = lstm_cell self.lstm_outgate = lstm_outgate # RNN specific params self.rnn_W_in_to_hid = rnn_W_in_to_hid self.rnn_W_hid_to_hid = rnn_W_hid_to_hid self.name = name self.sublayers = [] expand_height = expand_width = 1 # Input ConvLayers l_conv = l_in if isinstance(in_nfilters, Iterable) and not isinstance(in_nfilters, str): for i, (nf, f_size, stride) in enumerate( zip(in_nfilters, in_filters_size, in_filters_stride)): l_conv = ConvLayer(l_conv, num_filters=nf, filter_size=f_size, stride=stride, W=in_W_init, b=in_b_init, pad='valid', name=self.name + '_input_conv_layer' + str(i)) self.sublayers.append(l_conv) self.hypotetical_fm_size = ( (self.hypotetical_fm_size - 1) * stride + f_size) # TODO This is right only if stride == filter... expand_height *= f_size[0] expand_width *= f_size[1] # Print shape out_shape = get_output_shape(l_conv) print('ConvNet: After in-convnet: {}'.format(out_shape)) # Pretrained vgg16 elif type(in_nfilters) == str: from vgg16 import Vgg16Layer l_conv = Vgg16Layer(l_in, self.in_nfilters, False, False) hypotetical_fm_size /= 8 expand_height = expand_width = 8 self.sublayers.append(l_conv) # Print shape out_shape = get_output_shape(l_conv) print('Vgg: After vgg: {}'.format(out_shape)) # ReNet layers l_renet = l_conv for lidx in xrange(n_layers): l_renet = ReNetLayer( l_renet, patch_size=(pwidth[lidx], pheight[lidx]), n_hidden=dim_proj[lidx], stack_sublayers=stack_sublayers[lidx], RecurrentNet=RecurrentNet, nonlinearity=nonlinearity, hid_init=hid_init, grad_clipping=grad_clipping, precompute_input=precompute_input, mask_input=mask_input, # GRU specific params gru_resetgate=gru_resetgate, gru_updategate=gru_updategate, gru_hidden_update=gru_hidden_update, gru_hid_init=gru_hid_init, # LSTM specific params lstm_ingate=lstm_ingate, lstm_forgetgate=lstm_forgetgate, lstm_cell=lstm_cell, lstm_outgate=lstm_outgate, # RNN specific params rnn_W_in_to_hid=rnn_W_in_to_hid, rnn_W_hid_to_hid=rnn_W_hid_to_hid, rnn_b=rnn_b, batch_norm=batch_norm, name=self.name + '_renet' + str(lidx)) self.sublayers.append(l_renet) self.hypotetical_fm_size /= (pwidth[lidx], pheight[lidx]) # Print shape out_shape = get_output_shape(l_renet) if stack_sublayers: msg = 'ReNet: After 2 rnns {}x{}@{} and 2 rnns 1x1@{}: {}' print( msg.format(pheight[lidx], pwidth[lidx], dim_proj[lidx], dim_proj[lidx], out_shape)) else: print('ReNet: After 4 rnns {}x{}@{}: {}'.format( pheight[lidx], pwidth[lidx], dim_proj[lidx], out_shape)) # 1x1 conv layer : dimensionality reduction layer if conv_dim_red: l_renet = lasagne.layers.Conv2DLayer( l_renet, num_filters=dim_proj[lidx], filter_size=(1, 1), W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), pad='valid', nonlinearity=conv_dim_red_nonlinearity, name=self.name + '_1x1_conv_layer' + str(lidx)) # Print shape out_shape = get_output_shape(l_renet) print('Dim reduction: After 1x1 convnet: {}'.format(out_shape)) # Upsampling if out_upsampling_type == 'autograd': raise NotImplementedError( 'This will not work as the dynamic cropping will crop ' 'part of the image.') nlayers = len(out_nfilters) assert nlayers > 1 # Compute the upsampling ratio and the corresponding params h2 = np.array((100., 100.)) up_ratio = (h2 / self.hypotetical_fm_size)**(1. / nlayers) h1 = h2 / up_ratio h0 = h1 / up_ratio stride = to_int(ceildiv(h2 - h1, h1 - h0)) filter_size = to_int( ceildiv((h1 * (h1 - 1) + h2 - h2 * h0), (h1 - h0))) target_shape = get_output(l_renet).shape[2:] l_upsampling = l_renet for l in range(nlayers): target_shape = target_shape * up_ratio l_upsampling = TransposedConv2DLayer( l_upsampling, num_filters=out_nfilters[l], filter_size=filter_size, stride=stride, W=out_W_init, b=out_b_init, nonlinearity=out_nonlinearity) self.sublayers.append(l_upsampling) up_shape = get_output(l_upsampling).shape[2:] # Print shape out_shape = get_output_shape(l_upsampling) print('Transposed autograd: {}x{} (str {}x{}) @ {}:{}'.format( filter_size[0], filter_size[1], stride[0], stride[1], out_nfilters[l], out_shape)) # CROP # pad in TransposeConv2DLayer cannot be a tensor --> we cannot # crop unless we know in advance by how much! crop = T.max(T.stack([up_shape - target_shape, T.zeros(2)]), axis=0) crop = crop.astype('uint8') # round down l_upsampling = CropLayer(l_upsampling, crop, data_format='bc01') self.sublayers.append(l_upsampling) # Print shape print('Dynamic cropping') elif out_upsampling_type == 'grad': l_upsampling = l_renet for i, (nf, f_size, stride) in enumerate( zip(out_nfilters, out_filters_size, out_filters_stride)): l_upsampling = TransposedConv2DLayer( l_upsampling, num_filters=nf, filter_size=f_size, stride=stride, crop=0, W=out_W_init, b=out_b_init, nonlinearity=out_nonlinearity) self.sublayers.append(l_upsampling) if batch_norm: l_upsampling = lasagne.layers.batch_norm(l_upsampling, axes='auto') self.sublayers.append(l_upsampling) print "Batch normalization after Grad layer " # Print shape out_shape = get_output_shape(l_upsampling) print('Transposed conv: {}x{} (str {}x{}) @ {}:{}'.format( f_size[0], f_size[1], stride[0], stride[1], nf, out_shape)) elif out_upsampling_type == 'linear': # Go to b01c l_upsampling = lasagne.layers.DimshuffleLayer( l_renet, (0, 2, 3, 1), name=self.name + '_grad_undimshuffle') self.sublayers.append(l_upsampling) expand_height *= np.prod(pheight) expand_width *= np.prod(pwidth) l_upsampling = LinearUpsamplingLayer(l_upsampling, expand_height, expand_width, nclasses, batch_norm=batch_norm, name="linear_upsample_layer") self.sublayers.append(l_upsampling) print('Linear upsampling') if batch_norm: l_upsampling = lasagne.layers.batch_norm(l_upsampling, axes=(0, 1, 2)) self.sublayers.append(l_upsampling) print "Batch normalization after Linear upsampling layer " # Go back to bc01 l_upsampling = lasagne.layers.DimshuffleLayer( l_upsampling, (0, 3, 1, 2), name=self.name + '_grad_undimshuffle') self.sublayers.append(l_upsampling) self.l_out = l_upsampling # HACK LASAGNE # This will set `self.input_layer`, which is needed by Lasagne to find # the layers with the get_all_layers() helper function in the # case of a layer with sublayers if isinstance(self.l_out, tuple): self.input_layer = None else: self.input_layer = self.l_out
def __init__(self, l_in, n_layers, pheight, pwidth, dim_proj, nclasses, stack_sublayers, # outsampling out_upsampling_type, out_nfilters, out_filters_size, out_filters_stride, out_W_init=lasagne.init.GlorotUniform(), out_b_init=lasagne.init.Constant(0.), out_nonlinearity=lasagne.nonlinearities.identity, hypotetical_fm_size=np.array((100.0, 100.0)), # input ConvLayers in_nfilters=None, in_filters_size=((3, 3), (3, 3)), in_filters_stride=((1, 1), (1, 1)), in_W_init=lasagne.init.GlorotUniform(), in_b_init=lasagne.init.Constant(0.), in_nonlinearity=lasagne.nonlinearities.rectify, in_vgg_layer='conv3_3', # common recurrent layer params RecurrentNet=lasagne.layers.GRULayer, nonlinearity=lasagne.nonlinearities.rectify, hid_init=lasagne.init.Constant(0.), grad_clipping=0, precompute_input=True, mask_input=None, # 1x1 Conv layer for dimensional reduction conv_dim_red=False, conv_dim_red_nonlinearity=lasagne.nonlinearities.identity, # GRU specific params gru_resetgate=lasagne.layers.Gate(W_cell=None), gru_updategate=lasagne.layers.Gate(W_cell=None), gru_hidden_update=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), gru_hid_init=lasagne.init.Constant(0.), # LSTM specific params lstm_ingate=lasagne.layers.Gate(), lstm_forgetgate=lasagne.layers.Gate(), lstm_cell=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), lstm_outgate=lasagne.layers.Gate(), # RNN specific params rnn_W_in_to_hid=lasagne.init.Uniform(), rnn_W_hid_to_hid=lasagne.init.Uniform(), rnn_b=lasagne.init.Constant(0.), # Special layers batch_norm=False, name=''): """A ReSeg layer The ReSeg layer is composed by multiple ReNet layers and an upsampling layer Parameters ---------- l_in : lasagne.layers.Layer The input layer, in bc01 format n_layers : int The number of layers pheight : tuple The height of the patches, for each layer pwidth : tuple The width of the patches, for each layer dim_proj : tuple The number of hidden units of each RNN, for each layer nclasses : int The number of classes of the data stack_sublayers : bool If True the bidirectional RNNs in the ReNet layers will be stacked one over the other. See ReNet for more details. out_upsampling_type : string The kind of upsampling to be used out_nfilters : int The number of hidden units of the upsampling layer out_filters_size : tuple The size of the upsampling filters, if any out_filters_stride : tuple The stride of the upsampling filters, if any out_W_init : Theano shared variable, numpy array or callable Initializer for W out_b_init : Theano shared variable, numpy array or callable Initializer for b out_nonlinearity : Theano shared variable, numpy array or callable The nonlinearity to be applied after the upsampling hypotetical_fm_size : float The hypotetical size of the feature map that would be input of the layer if the input image of the whole network was of size (100, 100) RecurrentNet : lasagne.layers.Layer A recurrent layer class nonlinearity : callable or None The nonlinearity that is applied to the output. If None is provided, no nonlinearity will be applied. hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state grad_clipping : float If nonzero, the gradient messages are clipped to the given value during the backward pass. precompute_input : bool If True, precompute input_to_hid before iterating through the sequence. This can result in a speedup at the expense of an increase in memory usage. mask_input : lasagne.layers.Layer Layer which allows for a sequence mask to be input, for when sequences are of variable length. Default None, which means no mask will be supplied (i.e. all sequences are of the same length). gru_resetgate : lasagne.layers.Gate Parameters for the reset gate, if RecurrentNet is GRU gru_updategate : lasagne.layers.Gate Parameters for the update gate, if RecurrentNet is GRU gru_hidden_update : lasagne.layers.Gate Parameters for the hidden update, if RecurrentNet is GRU gru_hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state, if RecurrentNet is GRU lstm_ingate : lasagne.layers.Gate Parameters for the input gate, if RecurrentNet is LSTM lstm_forgetgate : lasagne.layers.Gate Parameters for the forget gate, if RecurrentNet is LSTM lstm_cell : lasagne.layers.Gate Parameters for the cell computation, if RecurrentNet is LSTM lstm_outgate : lasagne.layers.Gate Parameters for the output gate, if RecurrentNet is LSTM rnn_W_in_to_hid : Theano shared variable, numpy array or callable Initializer for input-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_W_hid_to_hid : Theano shared variable, numpy array or callable Initializer for hidden-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_b : Theano shared variable, numpy array, callable or None Initializer for bias vector, if RecurrentNet is RecurrentLaye. If None is provided there will be no bias batch_norm: this add a batch normalization layer at the end of the network right after each Gradient Upsampling layers name : string The name of the layer, optional """ super(ReSegLayer, self).__init__(l_in, name) self.l_in = l_in self.n_layers = n_layers self.pheight = pheight self.pwidth = pwidth self.dim_proj = dim_proj self.nclasses = nclasses self.stack_sublayers = stack_sublayers # upsampling self.out_upsampling_type = out_upsampling_type self.out_nfilters = out_nfilters self.out_filters_size = out_filters_size self.out_filters_stride = out_filters_stride self.out_W_init = out_W_init self.out_b_init = out_b_init self.out_nonlinearity = out_nonlinearity self.hypotetical_fm_size = hypotetical_fm_size # input ConvLayers self.in_nfilters = in_nfilters self.in_filters_size = in_filters_size self.in_filters_stride = in_filters_stride self.in_W_init = in_W_init self.in_b_init = in_b_init self.in_nonlinearity = in_nonlinearity self.in_vgg_layer = in_vgg_layer # common recurrent layer params self.RecurrentNet = RecurrentNet self.nonlinearity = nonlinearity self.hid_init = hid_init self.grad_clipping = grad_clipping self.precompute_input = precompute_input self.mask_input = mask_input # GRU specific params self.gru_resetgate = gru_resetgate self.gru_updategate = gru_updategate self.gru_hidden_update = gru_hidden_update self.gru_hid_init = gru_hid_init # LSTM specific params self.lstm_ingate = lstm_ingate self.lstm_forgetgate = lstm_forgetgate self.lstm_cell = lstm_cell self.lstm_outgate = lstm_outgate # RNN specific params self.rnn_W_in_to_hid = rnn_W_in_to_hid self.rnn_W_hid_to_hid = rnn_W_hid_to_hid self.name = name self.sublayers = [] expand_height = expand_width = 1 # Input ConvLayers l_conv = l_in if isinstance(in_nfilters, Iterable) and not isinstance(in_nfilters, str): for i, (nf, f_size, stride) in enumerate( zip(in_nfilters, in_filters_size, in_filters_stride)): l_conv = ConvLayer( l_conv, num_filters=nf, filter_size=f_size, stride=stride, W=in_W_init, b=in_b_init, pad='valid', name=self.name + '_input_conv_layer' + str(i) ) self.sublayers.append(l_conv) self.hypotetical_fm_size = ( (self.hypotetical_fm_size - 1) * stride + f_size) # TODO This is right only if stride == filter... expand_height *= f_size[0] expand_width *= f_size[1] # Print shape out_shape = get_output_shape(l_conv) print('ConvNet: After in-convnet: {}'.format(out_shape)) # Pretrained vgg16 elif type(in_nfilters) == str: from vgg16 import Vgg16Layer l_conv = Vgg16Layer(l_in, self.in_nfilters, False, False) hypotetical_fm_size /= 8 expand_height = expand_width = 8 self.sublayers.append(l_conv) # Print shape out_shape = get_output_shape(l_conv) print('Vgg: After vgg: {}'.format(out_shape)) # ReNet layers l_renet = l_conv for lidx in xrange(n_layers): l_renet = ReNetLayer(l_renet, patch_size=(pwidth[lidx], pheight[lidx]), n_hidden=dim_proj[lidx], stack_sublayers=stack_sublayers[lidx], RecurrentNet=RecurrentNet, nonlinearity=nonlinearity, hid_init=hid_init, grad_clipping=grad_clipping, precompute_input=precompute_input, mask_input=mask_input, # GRU specific params gru_resetgate=gru_resetgate, gru_updategate=gru_updategate, gru_hidden_update=gru_hidden_update, gru_hid_init=gru_hid_init, # LSTM specific params lstm_ingate=lstm_ingate, lstm_forgetgate=lstm_forgetgate, lstm_cell=lstm_cell, lstm_outgate=lstm_outgate, # RNN specific params rnn_W_in_to_hid=rnn_W_in_to_hid, rnn_W_hid_to_hid=rnn_W_hid_to_hid, rnn_b=rnn_b, batch_norm=batch_norm, name=self.name + '_renet' + str(lidx)) self.sublayers.append(l_renet) self.hypotetical_fm_size /= (pwidth[lidx], pheight[lidx]) # Print shape out_shape = get_output_shape(l_renet) if stack_sublayers: msg = 'ReNet: After 2 rnns {}x{}@{} and 2 rnns 1x1@{}: {}' print(msg.format(pheight[lidx], pwidth[lidx], dim_proj[lidx], dim_proj[lidx], out_shape)) else: print('ReNet: After 4 rnns {}x{}@{}: {}'.format( pheight[lidx], pwidth[lidx], dim_proj[lidx], out_shape)) # 1x1 conv layer : dimensionality reduction layer if conv_dim_red: l_renet = lasagne.layers.Conv2DLayer( l_renet, num_filters=dim_proj[lidx], filter_size=(1, 1), W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), pad='valid', nonlinearity=conv_dim_red_nonlinearity, name=self.name + '_1x1_conv_layer' + str(lidx) ) # Print shape out_shape = get_output_shape(l_renet) print('Dim reduction: After 1x1 convnet: {}'.format(out_shape)) # Upsampling if out_upsampling_type == 'autograd': raise NotImplementedError( 'This will not work as the dynamic cropping will crop ' 'part of the image.') nlayers = len(out_nfilters) assert nlayers > 1 # Compute the upsampling ratio and the corresponding params h2 = np.array((100., 100.)) up_ratio = (h2 / self.hypotetical_fm_size) ** (1. / nlayers) h1 = h2 / up_ratio h0 = h1 / up_ratio stride = to_int(ceildiv(h2 - h1, h1 - h0)) filter_size = to_int(ceildiv((h1 * (h1 - 1) + h2 - h2 * h0), (h1 - h0))) target_shape = get_output(l_renet).shape[2:] l_upsampling = l_renet for l in range(nlayers): target_shape = target_shape * up_ratio l_upsampling = TransposedConv2DLayer( l_upsampling, num_filters=out_nfilters[l], filter_size=filter_size, stride=stride, W=out_W_init, b=out_b_init, nonlinearity=out_nonlinearity) self.sublayers.append(l_upsampling) up_shape = get_output(l_upsampling).shape[2:] # Print shape out_shape = get_output_shape(l_upsampling) print('Transposed autograd: {}x{} (str {}x{}) @ {}:{}'.format( filter_size[0], filter_size[1], stride[0], stride[1], out_nfilters[l], out_shape)) # CROP # pad in TransposeConv2DLayer cannot be a tensor --> we cannot # crop unless we know in advance by how much! crop = T.max(T.stack([up_shape - target_shape, T.zeros(2)]), axis=0) crop = crop.astype('uint8') # round down l_upsampling = CropLayer( l_upsampling, crop, data_format='bc01') self.sublayers.append(l_upsampling) # Print shape print('Dynamic cropping') elif out_upsampling_type == 'grad': l_upsampling = l_renet for i, (nf, f_size, stride) in enumerate(zip( out_nfilters, out_filters_size, out_filters_stride)): l_upsampling = TransposedConv2DLayer( l_upsampling, num_filters=nf, filter_size=f_size, stride=stride, crop=0, W=out_W_init, b=out_b_init, nonlinearity=out_nonlinearity) self.sublayers.append(l_upsampling) if batch_norm: l_upsampling = lasagne.layers.batch_norm( l_upsampling, axes='auto') self.sublayers.append(l_upsampling) print "Batch normalization after Grad layer " # Print shape out_shape = get_output_shape(l_upsampling) print('Transposed conv: {}x{} (str {}x{}) @ {}:{}'.format( f_size[0], f_size[1], stride[0], stride[1], nf, out_shape)) elif out_upsampling_type == 'linear': # Go to b01c l_upsampling = lasagne.layers.DimshuffleLayer( l_renet, (0, 2, 3, 1), name=self.name + '_grad_undimshuffle') self.sublayers.append(l_upsampling) expand_height *= np.prod(pheight) expand_width *= np.prod(pwidth) l_upsampling = LinearUpsamplingLayer(l_upsampling, expand_height, expand_width, nclasses, batch_norm=batch_norm, name="linear_upsample_layer") self.sublayers.append(l_upsampling) print('Linear upsampling') if batch_norm: l_upsampling = lasagne.layers.batch_norm( l_upsampling, axes=(0, 1, 2)) self.sublayers.append(l_upsampling) print "Batch normalization after Linear upsampling layer " # Go back to bc01 l_upsampling = lasagne.layers.DimshuffleLayer( l_upsampling, (0, 3, 1, 2), name=self.name + '_grad_undimshuffle') self.sublayers.append(l_upsampling) self.l_out = l_upsampling # HACK LASAGNE # This will set `self.input_layer`, which is needed by Lasagne to find # the layers with the get_all_layers() helper function in the # case of a layer with sublayers if isinstance(self.l_out, tuple): self.input_layer = None else: self.input_layer = self.l_out
def fit_generator(self, generator=None, steps_per_epoch=None, epochs=250, verbose=1, callbacks=None, validation_data=None, validation_steps=None, lr=1e-4, batch_size=32, source='path', **kwargs): self.freeze_top_layers(self.model, self.freeze_layers_num) assert source in {'path', 'tensor'} if generator is None: datagen_train = ImageDataGenerator( preprocessing_function=self.preprocess_fun, rotation_range=30., width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) x_train_path = config.get_x_train_path(self.base_model) y_train_path = config.y_train_path if (source == 'tensor' and os.path.exists(x_train_path) and os.path.exists(y_train_path)): x_train = utils.load_h5file(x_train_path) y_train = utils.load_h5file(y_train_path) generator = datagen_train.flow(x_train, y_train, batch_size) n_train = len(x_train) else: generator = datagen_train.flow_from_directory( config.train_dir, target_size=self.image_size, batch_size=batch_size) n_train = len( utils.images_under_subdirs(config.train_dir, subdirs=self.classes)) if not steps_per_epoch: steps_per_epoch = utils.ceildiv(n_train, batch_size) if steps_per_epoch is None: steps_per_epoch = 500 if not callbacks: callbacks = self.get_callbacks(self.model_weights_path, patience=50) if validation_data is None: datagen_valid = ImageDataGenerator( preprocessing_function=self.preprocess_fun) x_valid_path = config.get_x_valid_path(self.base_model) y_valid_path = config.y_valid_path if (source == 'tensor' and os.path.exists(x_valid_path) and os.path.exists(y_valid_path)): x_valid = utils.load_h5file(x_valid_path) y_valid = utils.load_h5file(y_valid_path) validation_data = datagen_valid.flow(x_valid, y_valid, batch_size) n_valid = len(x_valid) else: validation_data = datagen_valid.flow_from_directory( config.valid_dir, target_size=self.image_size, batch_size=batch_size) n_valid = len( utils.images_under_subdirs(config.valid_dir, subdirs=self.classes)) if not validation_steps: validation_steps = utils.ceildiv(n_valid, batch_size) if validation_steps is None: validation_steps = 100 self.model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=lr, momentum=0.9), metrics=['accuracy']) self.model.fit_generator(generator, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=validation_data, validation_steps=validation_steps, callbacks=callbacks, verbose=verbose, **kwargs)