Example #1
0
def setup_lr(optimizer, full_log, opt):
    # annealing learning rate
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        verbose=True,
        factor=opt.lr_reduce_factor,
        min_lr=opt.lr_min_value,
        threshold=opt.lr_quantity_epsilon,
        threshold_mode='rel',
        mode=opt.lr_quantity_mode,
        patience=ceildiv(opt.lr_patience, opt.eval_iter),
        cooldown=ceildiv(opt.lr_cooldown, opt.eval_iter))

    # create a function and a closure
    averaging_buffer_max_length = ceildiv(opt.lr_quantity_smoothness,
                                          opt.eval_iter)
    if averaging_buffer_max_length <= 1:
        averaging_buffer_max_length = 1
    averaging_buffer = []

    def anneal_lr_func(anneal_now=True):
        value_to_monitor = full_log[opt.lr_quantity_to_monitor][-1]
        averaging_buffer.append(value_to_monitor)
        if len(averaging_buffer) > averaging_buffer_max_length:
            averaging_buffer.pop(0)
        averaged_value = sum(averaging_buffer) / float(len(averaging_buffer))
        counter = len(full_log[opt.lr_quantity_to_monitor])
        if opt.anneal_learning_rate and anneal_now:
            lr_scheduler.step(averaged_value, counter)
        return get_learning_rate(optimizer)

    return lr_scheduler, anneal_lr_func
Example #2
0
def brute_force_oracle(block_size, prefix_size, encryption_oracle):
    decrypted = b''
    prefix_pad = bytes([0] * (block_size - prefix_size % block_size))
    prefix_block = utils.ceildiv(prefix_size, block_size) * block_size
    while True:
        # Calculate prefix
        decrypted_blocks = utils.ceildiv(len(decrypted), block_size)
        if (decrypted_blocks == 0):
            decrypted_blocks = 1
        decrypted_size = decrypted_blocks * block_size
        prefix = prefix_pad + bytes([0] *
                                    (decrypted_size - len(decrypted) - 1))
        # Build dictionary
        dict = {}
        for b in range(256):
            p = prefix + decrypted + bytes([b])
            enc_p = encryption_oracle(p)[prefix_block:prefix_block +
                                         decrypted_size]
            dict[enc_p] = b
        # Perform oracle encryption
        enc = encryption_oracle(prefix)[prefix_block:prefix_block +
                                        decrypted_size]

        # Extract next encrypted byte by dictionary attack
        if (enc in dict):
            decrypted += bytes([dict[enc]])
        else:
            break

    return decrypted
Example #3
0
    def get_output_shape_for(self, input_shape):
        pheight, pwidth = self.patch_size
        npatchesH = ceildiv(input_shape[2], pheight)
        npatchesW = ceildiv(input_shape[3], pwidth)

        if self.stack_sublayers:
            dim = 2 * self.n_hidden
        else:
            dim = 4 * self.n_hidden

        return input_shape[0], dim, npatchesH, npatchesW
Example #4
0
    def get_output_shape_for(self, input_shape):
        pheight, pwidth = self.patch_size
        npatchesH = ceildiv(input_shape[2], pheight)
        npatchesW = ceildiv(input_shape[3], pwidth)

        if self.stack_sublayers:
            dim = 2 * self.n_hidden
        else:
            dim = 4 * self.n_hidden

        return input_shape[0], dim, npatchesH, npatchesW
Example #5
0
    def __iter__(self):
        ''' Produce batches according the given lengths '''
        buckets = {}
        num_batches = 0
        sequence_lengths = list(enumerate(self.sequence_lengths))
        if self.shuffle:
            np.random.shuffle(sequence_lengths)

        for idx, lengths in sequence_lengths:
            bucket_idx = ceildiv(max(lengths), self.granularity)
            bucket = buckets.get(bucket_idx, None)
            if not bucket:
                bucket = TokenBucket(self.max_lengths)

            batch = bucket.try_add(idx, sum(lengths))
            if batch:
                # Bucket was full so yield a batch
                num_batches += 1
                yield batch

                # Add to the bucket now that it's been emptied
                bucket.try_add(idx, sum(lengths))

            buckets[bucket_idx] = bucket

        # Go through all buckets to see if any can yield a batch
        for bucket in buckets.values():
            batch = bucket.get_batch()
            if all(batch):
                # Bucket had a non-empty batch left
                num_batches += 1
                yield batch

        # Update the batch estimate
        self.num_batches = num_batches
Example #6
0
def random_coeff_order_combinations(number):
    "Generate distinct random combinations of unitary coefficients and polynomial orders."
    coeffs_set = {-1, 1}
    max_order = ceildiv(number, len(coeffs_set)) + 3
    pairings = it.product(coeffs_set, range(
        1, max_order))  # avoid 0 order because it falls onboundary
    return random.sample(list(pairings), number)
Example #7
0
    def __init__(self, max_lengths, sequence_lengths, shuffle=False, granularity=5):
        '''
        Initializer the sequence length sampler

        Inputs:
        max_lengths - a list of lengths of the desired total sequence length for each device
        lengths - a list containing the length for each example in the dataset
        '''
        super(SequenceLengthSampler, self).__init__(sequence_lengths)

        self.shuffle = shuffle
        self.granularity = granularity
        self.max_lengths = max_lengths
        self.sequence_lengths = sequence_lengths

        # Initial estimate of the number of batches
        self.num_batches = ceildiv(np.sum(sequence_lengths), np.sum(max_lengths))
Example #8
0
def split(path, prefix=None, num_lines=1000, approx_lines=0):
    '''
    Split a file into chunks of each line length.

    If approx_lines are provided, then max sure the suffix length is long enough for the resultant
    number of files.
    '''
    prefix = prefix or f'{path}.chunk.'
    cmd = ['split']
    if approx_lines:
        approx_files = ceildiv(approx_lines, num_lines)
        suffix_len = math.ceil(math.log(approx_files) / math.log(26))
        cmd += ['-a', f'{suffix_len}']
    cmd += ['-l', f'{num_lines}', path, prefix]

    try:
        subprocess.check_call(cmd, stderr=subprocess.PIPE)
    except subprocess.CalledProcessError:
        raise RuntimeError(f'Unable to split {path}')

    return glob.glob(f'{prefix}*')
Example #9
0
async def retrieve_my_msgs(ctx: Context, as_images: str = ""):
    my_mrs = session.query(MessageRequest).filter_by(user_id=ctx.author.id)
    if my_mrs.count() == 0:
        await ctx.send(constants.NO_REQUESTS)
    for req in my_mrs:
        mc = req.responses.count()
        if mc == 0:
            await ctx.send(render_template("no_replies.j2", req=req))
        elif as_images == "images":
            files = [get_image(r.message) for r in req.responses]
            page_count = ceildiv(len(files), 10)
            for i in range(page_count):
                await ctx.send(render_template("read_replies_img.j2",
                                               req=req,
                                               i=i,
                                               pc=page_count),
                               files=files[i * 10:i * 10 + 10])
        else:
            await ctx.channel.send(
                render_template("read_replies.j2", req=req, mc=mc))
            if mc > DELETE_THRESHOLD:
                req.delete(session)
Example #10
0
# Perform computation at double precision
for l in lbann.traverse_layer_graph(input_):
    l.datatype = lbann.DataType.DOUBLE
    for w in l.weights:
        w.datatype = lbann.DataType.DOUBLE

# ----------------------------------
# Run LBANN
# ----------------------------------

# Create optimizer
opt = lbann.SGD(learn_rate=args.learning_rate)

# Create LBANN objects
iterations_per_epoch = utils.ceildiv(epoch_size, args.mini_batch_size)
num_epochs = utils.ceildiv(args.num_iterations, iterations_per_epoch)
trainer = lbann.Trainer(
    mini_batch_size=args.mini_batch_size,
    num_parallel_readers=0,
)
callbacks = [
    lbann.CallbackPrint(),
    lbann.CallbackTimer(),
    lbann.CallbackDumpWeights(
        directory='embeddings',
        epoch_interval=num_epochs,
        format='distributed_binary',
    ),
]
model = lbann.Model(
Example #11
0
    def __init__(
            self,
            l_in,
            n_layers,
            pheight,
            pwidth,
            dim_proj,
            nclasses,
            stack_sublayers,
            # outsampling
            out_upsampling_type,
            out_nfilters,
            out_filters_size,
            out_filters_stride,
            out_W_init=lasagne.init.GlorotUniform(),
            out_b_init=lasagne.init.Constant(0.),
            out_nonlinearity=lasagne.nonlinearities.identity,
            hypotetical_fm_size=np.array((100.0, 100.0)),
            # input ConvLayers
            in_nfilters=None,
            in_filters_size=((3, 3), (3, 3)),
            in_filters_stride=((1, 1), (1, 1)),
            in_W_init=lasagne.init.GlorotUniform(),
            in_b_init=lasagne.init.Constant(0.),
            in_nonlinearity=lasagne.nonlinearities.rectify,
            in_vgg_layer='conv3_3',
            # common recurrent layer params
            RecurrentNet=lasagne.layers.GRULayer,
            nonlinearity=lasagne.nonlinearities.rectify,
            hid_init=lasagne.init.Constant(0.),
            grad_clipping=0,
            precompute_input=True,
            mask_input=None,
            # 1x1 Conv layer for dimensional reduction
            conv_dim_red=False,
            conv_dim_red_nonlinearity=lasagne.nonlinearities.identity,
            # GRU specific params
            gru_resetgate=lasagne.layers.Gate(W_cell=None),
            gru_updategate=lasagne.layers.Gate(W_cell=None),
            gru_hidden_update=lasagne.layers.Gate(
                W_cell=None, nonlinearity=lasagne.nonlinearities.tanh),
            gru_hid_init=lasagne.init.Constant(0.),
            # LSTM specific params
            lstm_ingate=lasagne.layers.Gate(),
            lstm_forgetgate=lasagne.layers.Gate(),
            lstm_cell=lasagne.layers.Gate(
                W_cell=None, nonlinearity=lasagne.nonlinearities.tanh),
            lstm_outgate=lasagne.layers.Gate(),
            # RNN specific params
            rnn_W_in_to_hid=lasagne.init.Uniform(),
            rnn_W_hid_to_hid=lasagne.init.Uniform(),
            rnn_b=lasagne.init.Constant(0.),
            # Special layers
            batch_norm=False,
            name=''):
        """A ReSeg layer

        The ReSeg layer is composed by multiple ReNet layers and an
        upsampling layer

        Parameters
        ----------
        l_in : lasagne.layers.Layer
            The input layer, in bc01 format
        n_layers : int
            The number of layers
        pheight : tuple
            The height of the patches, for each layer
        pwidth : tuple
            The width of the patches, for each layer
        dim_proj : tuple
            The number of hidden units of each RNN, for each layer
        nclasses : int
            The number of classes of the data
        stack_sublayers : bool
            If True the bidirectional RNNs in the ReNet layers will be
            stacked one over the other. See ReNet for more details.
        out_upsampling_type : string
            The kind of upsampling to be used
        out_nfilters : int
            The number of hidden units of the upsampling layer
        out_filters_size : tuple
            The size of the upsampling filters, if any
        out_filters_stride : tuple
            The stride of the upsampling filters, if any
        out_W_init : Theano shared variable, numpy array or callable
            Initializer for W
        out_b_init : Theano shared variable, numpy array or callable
            Initializer for b
        out_nonlinearity : Theano shared variable, numpy array or callable
            The nonlinearity to be applied after the upsampling
        hypotetical_fm_size : float
            The hypotetical size of the feature map that would be input
            of the layer if the input image of the whole network was of
            size (100, 100)
        RecurrentNet : lasagne.layers.Layer
            A recurrent layer class
        nonlinearity : callable or None
            The nonlinearity that is applied to the output. If
            None is provided, no nonlinearity will be applied.
        hid_init : callable, np.ndarray, theano.shared or
                   lasagne.layers.Layer
            Initializer for initial hidden state
        grad_clipping : float
            If nonzero, the gradient messages are clipped to the given value
            during the backward pass.
        precompute_input : bool
            If True, precompute input_to_hid before iterating through the
            sequence. This can result in a speedup at the expense of an
            increase in memory usage.
        mask_input : lasagne.layers.Layer
            Layer which allows for a sequence mask to be input, for when
            sequences are of variable length. Default None, which means no mask
            will be supplied (i.e. all sequences are of the same length).
        gru_resetgate : lasagne.layers.Gate
            Parameters for the reset gate, if RecurrentNet is GRU
        gru_updategate : lasagne.layers.Gate
            Parameters for the update gate, if RecurrentNet is GRU
        gru_hidden_update : lasagne.layers.Gate
            Parameters for the hidden update, if RecurrentNet is GRU
        gru_hid_init : callable, np.ndarray, theano.shared or
                       lasagne.layers.Layer
            Initializer for initial hidden state, if RecurrentNet is GRU
        lstm_ingate : lasagne.layers.Gate
            Parameters for the input gate, if RecurrentNet is LSTM
        lstm_forgetgate : lasagne.layers.Gate
            Parameters for the forget gate, if RecurrentNet is LSTM
        lstm_cell : lasagne.layers.Gate
            Parameters for the cell computation, if RecurrentNet is LSTM
        lstm_outgate : lasagne.layers.Gate
            Parameters for the output gate, if RecurrentNet is LSTM
        rnn_W_in_to_hid : Theano shared variable, numpy array or callable
            Initializer for input-to-hidden weight matrix, if
            RecurrentNet is RecurrentLayer
        rnn_W_hid_to_hid : Theano shared variable, numpy array or callable
            Initializer for hidden-to-hidden weight matrix, if
            RecurrentNet is RecurrentLayer
        rnn_b : Theano shared variable, numpy array, callable or None
            Initializer for bias vector, if RecurrentNet is
            RecurrentLaye. If None is provided there will be no bias
        batch_norm: this add a batch normalization layer at the end of the
            network right after each Gradient Upsampling layers
        name : string
            The name of the layer, optional
        """

        super(ReSegLayer, self).__init__(l_in, name)
        self.l_in = l_in
        self.n_layers = n_layers
        self.pheight = pheight
        self.pwidth = pwidth
        self.dim_proj = dim_proj
        self.nclasses = nclasses
        self.stack_sublayers = stack_sublayers
        # upsampling
        self.out_upsampling_type = out_upsampling_type
        self.out_nfilters = out_nfilters
        self.out_filters_size = out_filters_size
        self.out_filters_stride = out_filters_stride
        self.out_W_init = out_W_init
        self.out_b_init = out_b_init
        self.out_nonlinearity = out_nonlinearity
        self.hypotetical_fm_size = hypotetical_fm_size
        # input ConvLayers
        self.in_nfilters = in_nfilters
        self.in_filters_size = in_filters_size
        self.in_filters_stride = in_filters_stride
        self.in_W_init = in_W_init
        self.in_b_init = in_b_init
        self.in_nonlinearity = in_nonlinearity
        self.in_vgg_layer = in_vgg_layer
        # common recurrent layer params
        self.RecurrentNet = RecurrentNet
        self.nonlinearity = nonlinearity
        self.hid_init = hid_init
        self.grad_clipping = grad_clipping
        self.precompute_input = precompute_input
        self.mask_input = mask_input
        # GRU specific params
        self.gru_resetgate = gru_resetgate
        self.gru_updategate = gru_updategate
        self.gru_hidden_update = gru_hidden_update
        self.gru_hid_init = gru_hid_init
        # LSTM specific params
        self.lstm_ingate = lstm_ingate
        self.lstm_forgetgate = lstm_forgetgate
        self.lstm_cell = lstm_cell
        self.lstm_outgate = lstm_outgate
        # RNN specific params
        self.rnn_W_in_to_hid = rnn_W_in_to_hid
        self.rnn_W_hid_to_hid = rnn_W_hid_to_hid
        self.name = name
        self.sublayers = []

        expand_height = expand_width = 1

        # Input ConvLayers
        l_conv = l_in
        if isinstance(in_nfilters,
                      Iterable) and not isinstance(in_nfilters, str):
            for i, (nf, f_size, stride) in enumerate(
                    zip(in_nfilters, in_filters_size, in_filters_stride)):

                l_conv = ConvLayer(l_conv,
                                   num_filters=nf,
                                   filter_size=f_size,
                                   stride=stride,
                                   W=in_W_init,
                                   b=in_b_init,
                                   pad='valid',
                                   name=self.name + '_input_conv_layer' +
                                   str(i))
                self.sublayers.append(l_conv)
                self.hypotetical_fm_size = (
                    (self.hypotetical_fm_size - 1) * stride + f_size)
                # TODO This is right only if stride == filter...
                expand_height *= f_size[0]
                expand_width *= f_size[1]

                # Print shape
                out_shape = get_output_shape(l_conv)
                print('ConvNet: After in-convnet: {}'.format(out_shape))

        # Pretrained vgg16
        elif type(in_nfilters) == str:
            from vgg16 import Vgg16Layer
            l_conv = Vgg16Layer(l_in, self.in_nfilters, False, False)
            hypotetical_fm_size /= 8
            expand_height = expand_width = 8
            self.sublayers.append(l_conv)
            # Print shape
            out_shape = get_output_shape(l_conv)
            print('Vgg: After vgg: {}'.format(out_shape))

        # ReNet layers
        l_renet = l_conv
        for lidx in xrange(n_layers):
            l_renet = ReNetLayer(
                l_renet,
                patch_size=(pwidth[lidx], pheight[lidx]),
                n_hidden=dim_proj[lidx],
                stack_sublayers=stack_sublayers[lidx],
                RecurrentNet=RecurrentNet,
                nonlinearity=nonlinearity,
                hid_init=hid_init,
                grad_clipping=grad_clipping,
                precompute_input=precompute_input,
                mask_input=mask_input,
                # GRU specific params
                gru_resetgate=gru_resetgate,
                gru_updategate=gru_updategate,
                gru_hidden_update=gru_hidden_update,
                gru_hid_init=gru_hid_init,
                # LSTM specific params
                lstm_ingate=lstm_ingate,
                lstm_forgetgate=lstm_forgetgate,
                lstm_cell=lstm_cell,
                lstm_outgate=lstm_outgate,
                # RNN specific params
                rnn_W_in_to_hid=rnn_W_in_to_hid,
                rnn_W_hid_to_hid=rnn_W_hid_to_hid,
                rnn_b=rnn_b,
                batch_norm=batch_norm,
                name=self.name + '_renet' + str(lidx))
            self.sublayers.append(l_renet)
            self.hypotetical_fm_size /= (pwidth[lidx], pheight[lidx])

            # Print shape
            out_shape = get_output_shape(l_renet)
            if stack_sublayers:
                msg = 'ReNet: After 2 rnns {}x{}@{} and 2 rnns 1x1@{}: {}'
                print(
                    msg.format(pheight[lidx], pwidth[lidx], dim_proj[lidx],
                               dim_proj[lidx], out_shape))
            else:
                print('ReNet: After 4 rnns {}x{}@{}: {}'.format(
                    pheight[lidx], pwidth[lidx], dim_proj[lidx], out_shape))

            # 1x1 conv layer : dimensionality reduction layer
            if conv_dim_red:
                l_renet = lasagne.layers.Conv2DLayer(
                    l_renet,
                    num_filters=dim_proj[lidx],
                    filter_size=(1, 1),
                    W=lasagne.init.GlorotUniform(),
                    b=lasagne.init.Constant(0.),
                    pad='valid',
                    nonlinearity=conv_dim_red_nonlinearity,
                    name=self.name + '_1x1_conv_layer' + str(lidx))

                # Print shape
                out_shape = get_output_shape(l_renet)
                print('Dim reduction: After 1x1 convnet: {}'.format(out_shape))

        # Upsampling
        if out_upsampling_type == 'autograd':
            raise NotImplementedError(
                'This will not work as the dynamic cropping will crop '
                'part of the image.')
            nlayers = len(out_nfilters)
            assert nlayers > 1

            # Compute the upsampling ratio and the corresponding params
            h2 = np.array((100., 100.))
            up_ratio = (h2 / self.hypotetical_fm_size)**(1. / nlayers)
            h1 = h2 / up_ratio
            h0 = h1 / up_ratio
            stride = to_int(ceildiv(h2 - h1, h1 - h0))
            filter_size = to_int(
                ceildiv((h1 * (h1 - 1) + h2 - h2 * h0), (h1 - h0)))

            target_shape = get_output(l_renet).shape[2:]
            l_upsampling = l_renet
            for l in range(nlayers):
                target_shape = target_shape * up_ratio
                l_upsampling = TransposedConv2DLayer(
                    l_upsampling,
                    num_filters=out_nfilters[l],
                    filter_size=filter_size,
                    stride=stride,
                    W=out_W_init,
                    b=out_b_init,
                    nonlinearity=out_nonlinearity)
                self.sublayers.append(l_upsampling)
                up_shape = get_output(l_upsampling).shape[2:]

                # Print shape
                out_shape = get_output_shape(l_upsampling)
                print('Transposed autograd: {}x{} (str {}x{}) @ {}:{}'.format(
                    filter_size[0], filter_size[1], stride[0], stride[1],
                    out_nfilters[l], out_shape))

                # CROP
                # pad in TransposeConv2DLayer cannot be a tensor --> we cannot
                # crop unless we know in advance by how much!
                crop = T.max(T.stack([up_shape - target_shape,
                                      T.zeros(2)]),
                             axis=0)
                crop = crop.astype('uint8')  # round down
                l_upsampling = CropLayer(l_upsampling,
                                         crop,
                                         data_format='bc01')
                self.sublayers.append(l_upsampling)

                # Print shape
                print('Dynamic cropping')

        elif out_upsampling_type == 'grad':
            l_upsampling = l_renet
            for i, (nf, f_size, stride) in enumerate(
                    zip(out_nfilters, out_filters_size, out_filters_stride)):
                l_upsampling = TransposedConv2DLayer(
                    l_upsampling,
                    num_filters=nf,
                    filter_size=f_size,
                    stride=stride,
                    crop=0,
                    W=out_W_init,
                    b=out_b_init,
                    nonlinearity=out_nonlinearity)
                self.sublayers.append(l_upsampling)

                if batch_norm:
                    l_upsampling = lasagne.layers.batch_norm(l_upsampling,
                                                             axes='auto')
                    self.sublayers.append(l_upsampling)
                    print "Batch normalization after Grad layer "

                # Print shape
                out_shape = get_output_shape(l_upsampling)
                print('Transposed conv: {}x{} (str {}x{}) @ {}:{}'.format(
                    f_size[0], f_size[1], stride[0], stride[1], nf, out_shape))

        elif out_upsampling_type == 'linear':
            # Go to b01c
            l_upsampling = lasagne.layers.DimshuffleLayer(
                l_renet, (0, 2, 3, 1), name=self.name + '_grad_undimshuffle')
            self.sublayers.append(l_upsampling)

            expand_height *= np.prod(pheight)
            expand_width *= np.prod(pwidth)
            l_upsampling = LinearUpsamplingLayer(l_upsampling,
                                                 expand_height,
                                                 expand_width,
                                                 nclasses,
                                                 batch_norm=batch_norm,
                                                 name="linear_upsample_layer")
            self.sublayers.append(l_upsampling)
            print('Linear upsampling')

            if batch_norm:
                l_upsampling = lasagne.layers.batch_norm(l_upsampling,
                                                         axes=(0, 1, 2))

                self.sublayers.append(l_upsampling)
                print "Batch normalization after Linear upsampling layer "

            # Go back to bc01
            l_upsampling = lasagne.layers.DimshuffleLayer(
                l_upsampling, (0, 3, 1, 2),
                name=self.name + '_grad_undimshuffle')
            self.sublayers.append(l_upsampling)

        self.l_out = l_upsampling

        # HACK LASAGNE
        # This will set `self.input_layer`, which is needed by Lasagne to find
        # the layers with the get_all_layers() helper function in the
        # case of a layer with sublayers
        if isinstance(self.l_out, tuple):
            self.input_layer = None
        else:
            self.input_layer = self.l_out
Example #12
0
    def __init__(self,
                 l_in,
                 n_layers,
                 pheight,
                 pwidth,
                 dim_proj,
                 nclasses,
                 stack_sublayers,
                 # outsampling
                 out_upsampling_type,
                 out_nfilters,
                 out_filters_size,
                 out_filters_stride,
                 out_W_init=lasagne.init.GlorotUniform(),
                 out_b_init=lasagne.init.Constant(0.),
                 out_nonlinearity=lasagne.nonlinearities.identity,
                 hypotetical_fm_size=np.array((100.0, 100.0)),
                 # input ConvLayers
                 in_nfilters=None,
                 in_filters_size=((3, 3), (3, 3)),
                 in_filters_stride=((1, 1), (1, 1)),
                 in_W_init=lasagne.init.GlorotUniform(),
                 in_b_init=lasagne.init.Constant(0.),
                 in_nonlinearity=lasagne.nonlinearities.rectify,
                 in_vgg_layer='conv3_3',
                 # common recurrent layer params
                 RecurrentNet=lasagne.layers.GRULayer,
                 nonlinearity=lasagne.nonlinearities.rectify,
                 hid_init=lasagne.init.Constant(0.),
                 grad_clipping=0,
                 precompute_input=True,
                 mask_input=None,
                 # 1x1 Conv layer for dimensional reduction
                 conv_dim_red=False,
                 conv_dim_red_nonlinearity=lasagne.nonlinearities.identity,
                 # GRU specific params
                 gru_resetgate=lasagne.layers.Gate(W_cell=None),
                 gru_updategate=lasagne.layers.Gate(W_cell=None),
                 gru_hidden_update=lasagne.layers.Gate(
                     W_cell=None,
                     nonlinearity=lasagne.nonlinearities.tanh),
                 gru_hid_init=lasagne.init.Constant(0.),
                 # LSTM specific params
                 lstm_ingate=lasagne.layers.Gate(),
                 lstm_forgetgate=lasagne.layers.Gate(),
                 lstm_cell=lasagne.layers.Gate(
                     W_cell=None,
                     nonlinearity=lasagne.nonlinearities.tanh),
                 lstm_outgate=lasagne.layers.Gate(),
                 # RNN specific params
                 rnn_W_in_to_hid=lasagne.init.Uniform(),
                 rnn_W_hid_to_hid=lasagne.init.Uniform(),
                 rnn_b=lasagne.init.Constant(0.),
                 # Special layers
                 batch_norm=False,
                 name=''):
        """A ReSeg layer

        The ReSeg layer is composed by multiple ReNet layers and an
        upsampling layer

        Parameters
        ----------
        l_in : lasagne.layers.Layer
            The input layer, in bc01 format
        n_layers : int
            The number of layers
        pheight : tuple
            The height of the patches, for each layer
        pwidth : tuple
            The width of the patches, for each layer
        dim_proj : tuple
            The number of hidden units of each RNN, for each layer
        nclasses : int
            The number of classes of the data
        stack_sublayers : bool
            If True the bidirectional RNNs in the ReNet layers will be
            stacked one over the other. See ReNet for more details.
        out_upsampling_type : string
            The kind of upsampling to be used
        out_nfilters : int
            The number of hidden units of the upsampling layer
        out_filters_size : tuple
            The size of the upsampling filters, if any
        out_filters_stride : tuple
            The stride of the upsampling filters, if any
        out_W_init : Theano shared variable, numpy array or callable
            Initializer for W
        out_b_init : Theano shared variable, numpy array or callable
            Initializer for b
        out_nonlinearity : Theano shared variable, numpy array or callable
            The nonlinearity to be applied after the upsampling
        hypotetical_fm_size : float
            The hypotetical size of the feature map that would be input
            of the layer if the input image of the whole network was of
            size (100, 100)
        RecurrentNet : lasagne.layers.Layer
            A recurrent layer class
        nonlinearity : callable or None
            The nonlinearity that is applied to the output. If
            None is provided, no nonlinearity will be applied.
        hid_init : callable, np.ndarray, theano.shared or
                   lasagne.layers.Layer
            Initializer for initial hidden state
        grad_clipping : float
            If nonzero, the gradient messages are clipped to the given value
            during the backward pass.
        precompute_input : bool
            If True, precompute input_to_hid before iterating through the
            sequence. This can result in a speedup at the expense of an
            increase in memory usage.
        mask_input : lasagne.layers.Layer
            Layer which allows for a sequence mask to be input, for when
            sequences are of variable length. Default None, which means no mask
            will be supplied (i.e. all sequences are of the same length).
        gru_resetgate : lasagne.layers.Gate
            Parameters for the reset gate, if RecurrentNet is GRU
        gru_updategate : lasagne.layers.Gate
            Parameters for the update gate, if RecurrentNet is GRU
        gru_hidden_update : lasagne.layers.Gate
            Parameters for the hidden update, if RecurrentNet is GRU
        gru_hid_init : callable, np.ndarray, theano.shared or
                       lasagne.layers.Layer
            Initializer for initial hidden state, if RecurrentNet is GRU
        lstm_ingate : lasagne.layers.Gate
            Parameters for the input gate, if RecurrentNet is LSTM
        lstm_forgetgate : lasagne.layers.Gate
            Parameters for the forget gate, if RecurrentNet is LSTM
        lstm_cell : lasagne.layers.Gate
            Parameters for the cell computation, if RecurrentNet is LSTM
        lstm_outgate : lasagne.layers.Gate
            Parameters for the output gate, if RecurrentNet is LSTM
        rnn_W_in_to_hid : Theano shared variable, numpy array or callable
            Initializer for input-to-hidden weight matrix, if
            RecurrentNet is RecurrentLayer
        rnn_W_hid_to_hid : Theano shared variable, numpy array or callable
            Initializer for hidden-to-hidden weight matrix, if
            RecurrentNet is RecurrentLayer
        rnn_b : Theano shared variable, numpy array, callable or None
            Initializer for bias vector, if RecurrentNet is
            RecurrentLaye. If None is provided there will be no bias
        batch_norm: this add a batch normalization layer at the end of the
            network right after each Gradient Upsampling layers
        name : string
            The name of the layer, optional
        """

        super(ReSegLayer, self).__init__(l_in, name)
        self.l_in = l_in
        self.n_layers = n_layers
        self.pheight = pheight
        self.pwidth = pwidth
        self.dim_proj = dim_proj
        self.nclasses = nclasses
        self.stack_sublayers = stack_sublayers
        # upsampling
        self.out_upsampling_type = out_upsampling_type
        self.out_nfilters = out_nfilters
        self.out_filters_size = out_filters_size
        self.out_filters_stride = out_filters_stride
        self.out_W_init = out_W_init
        self.out_b_init = out_b_init
        self.out_nonlinearity = out_nonlinearity
        self.hypotetical_fm_size = hypotetical_fm_size
        # input ConvLayers
        self.in_nfilters = in_nfilters
        self.in_filters_size = in_filters_size
        self.in_filters_stride = in_filters_stride
        self.in_W_init = in_W_init
        self.in_b_init = in_b_init
        self.in_nonlinearity = in_nonlinearity
        self.in_vgg_layer = in_vgg_layer
        # common recurrent layer params
        self.RecurrentNet = RecurrentNet
        self.nonlinearity = nonlinearity
        self.hid_init = hid_init
        self.grad_clipping = grad_clipping
        self.precompute_input = precompute_input
        self.mask_input = mask_input
        # GRU specific params
        self.gru_resetgate = gru_resetgate
        self.gru_updategate = gru_updategate
        self.gru_hidden_update = gru_hidden_update
        self.gru_hid_init = gru_hid_init
        # LSTM specific params
        self.lstm_ingate = lstm_ingate
        self.lstm_forgetgate = lstm_forgetgate
        self.lstm_cell = lstm_cell
        self.lstm_outgate = lstm_outgate
        # RNN specific params
        self.rnn_W_in_to_hid = rnn_W_in_to_hid
        self.rnn_W_hid_to_hid = rnn_W_hid_to_hid
        self.name = name
        self.sublayers = []

        expand_height = expand_width = 1

        # Input ConvLayers
        l_conv = l_in
        if isinstance(in_nfilters, Iterable) and not isinstance(in_nfilters,
                                                                str):
            for i, (nf, f_size, stride) in enumerate(
                    zip(in_nfilters, in_filters_size, in_filters_stride)):

                l_conv = ConvLayer(
                    l_conv,
                    num_filters=nf,
                    filter_size=f_size,
                    stride=stride,
                    W=in_W_init,
                    b=in_b_init,
                    pad='valid',
                    name=self.name + '_input_conv_layer' + str(i)
                )
                self.sublayers.append(l_conv)
                self.hypotetical_fm_size = (
                    (self.hypotetical_fm_size - 1) * stride + f_size)
                # TODO This is right only if stride == filter...
                expand_height *= f_size[0]
                expand_width *= f_size[1]

                # Print shape
                out_shape = get_output_shape(l_conv)
                print('ConvNet: After in-convnet: {}'.format(out_shape))

        # Pretrained vgg16
        elif type(in_nfilters) == str:
            from vgg16 import Vgg16Layer
            l_conv = Vgg16Layer(l_in, self.in_nfilters, False, False)
            hypotetical_fm_size /= 8
            expand_height = expand_width = 8
            self.sublayers.append(l_conv)
            # Print shape
            out_shape = get_output_shape(l_conv)
            print('Vgg: After vgg: {}'.format(out_shape))

        # ReNet layers
        l_renet = l_conv
        for lidx in xrange(n_layers):
            l_renet = ReNetLayer(l_renet,
                                 patch_size=(pwidth[lidx], pheight[lidx]),
                                 n_hidden=dim_proj[lidx],
                                 stack_sublayers=stack_sublayers[lidx],
                                 RecurrentNet=RecurrentNet,
                                 nonlinearity=nonlinearity,
                                 hid_init=hid_init,
                                 grad_clipping=grad_clipping,
                                 precompute_input=precompute_input,
                                 mask_input=mask_input,
                                 # GRU specific params
                                 gru_resetgate=gru_resetgate,
                                 gru_updategate=gru_updategate,
                                 gru_hidden_update=gru_hidden_update,
                                 gru_hid_init=gru_hid_init,
                                 # LSTM specific params
                                 lstm_ingate=lstm_ingate,
                                 lstm_forgetgate=lstm_forgetgate,
                                 lstm_cell=lstm_cell,
                                 lstm_outgate=lstm_outgate,
                                 # RNN specific params
                                 rnn_W_in_to_hid=rnn_W_in_to_hid,
                                 rnn_W_hid_to_hid=rnn_W_hid_to_hid,
                                 rnn_b=rnn_b,
                                 batch_norm=batch_norm,
                                 name=self.name + '_renet' + str(lidx))
            self.sublayers.append(l_renet)
            self.hypotetical_fm_size /= (pwidth[lidx], pheight[lidx])

            # Print shape
            out_shape = get_output_shape(l_renet)
            if stack_sublayers:
                msg = 'ReNet: After 2 rnns {}x{}@{} and 2 rnns 1x1@{}: {}'
                print(msg.format(pheight[lidx], pwidth[lidx], dim_proj[lidx],
                                 dim_proj[lidx], out_shape))
            else:
                print('ReNet: After 4 rnns {}x{}@{}: {}'.format(
                    pheight[lidx], pwidth[lidx], dim_proj[lidx], out_shape))

            # 1x1 conv layer : dimensionality reduction layer
            if conv_dim_red:
                l_renet = lasagne.layers.Conv2DLayer(
                    l_renet,
                    num_filters=dim_proj[lidx],
                    filter_size=(1, 1),
                    W=lasagne.init.GlorotUniform(),
                    b=lasagne.init.Constant(0.),
                    pad='valid',
                    nonlinearity=conv_dim_red_nonlinearity,
                    name=self.name + '_1x1_conv_layer' + str(lidx)
                )

                # Print shape
                out_shape = get_output_shape(l_renet)
                print('Dim reduction: After 1x1 convnet: {}'.format(out_shape))

        # Upsampling
        if out_upsampling_type == 'autograd':
            raise NotImplementedError(
                'This will not work as the dynamic cropping will crop '
                'part of the image.')
            nlayers = len(out_nfilters)
            assert nlayers > 1

            # Compute the upsampling ratio and the corresponding params
            h2 = np.array((100., 100.))
            up_ratio = (h2 / self.hypotetical_fm_size) ** (1. / nlayers)
            h1 = h2 / up_ratio
            h0 = h1 / up_ratio
            stride = to_int(ceildiv(h2 - h1, h1 - h0))
            filter_size = to_int(ceildiv((h1 * (h1 - 1) + h2 - h2 * h0),
                                         (h1 - h0)))

            target_shape = get_output(l_renet).shape[2:]
            l_upsampling = l_renet
            for l in range(nlayers):
                target_shape = target_shape * up_ratio
                l_upsampling = TransposedConv2DLayer(
                    l_upsampling,
                    num_filters=out_nfilters[l],
                    filter_size=filter_size,
                    stride=stride,
                    W=out_W_init,
                    b=out_b_init,
                    nonlinearity=out_nonlinearity)
                self.sublayers.append(l_upsampling)
                up_shape = get_output(l_upsampling).shape[2:]

                # Print shape
                out_shape = get_output_shape(l_upsampling)
                print('Transposed autograd: {}x{} (str {}x{}) @ {}:{}'.format(
                    filter_size[0], filter_size[1], stride[0], stride[1],
                    out_nfilters[l], out_shape))

                # CROP
                # pad in TransposeConv2DLayer cannot be a tensor --> we cannot
                # crop unless we know in advance by how much!
                crop = T.max(T.stack([up_shape - target_shape, T.zeros(2)]),
                             axis=0)
                crop = crop.astype('uint8')  # round down
                l_upsampling = CropLayer(
                    l_upsampling,
                    crop,
                    data_format='bc01')
                self.sublayers.append(l_upsampling)

                # Print shape
                print('Dynamic cropping')

        elif out_upsampling_type == 'grad':
            l_upsampling = l_renet
            for i, (nf, f_size, stride) in enumerate(zip(
                    out_nfilters, out_filters_size, out_filters_stride)):
                l_upsampling = TransposedConv2DLayer(
                    l_upsampling,
                    num_filters=nf,
                    filter_size=f_size,
                    stride=stride,
                    crop=0,
                    W=out_W_init,
                    b=out_b_init,
                    nonlinearity=out_nonlinearity)
                self.sublayers.append(l_upsampling)

                if batch_norm:
                    l_upsampling = lasagne.layers.batch_norm(
                        l_upsampling,
                        axes='auto')
                    self.sublayers.append(l_upsampling)
                    print "Batch normalization after Grad layer "

                # Print shape
                out_shape = get_output_shape(l_upsampling)
                print('Transposed conv: {}x{} (str {}x{}) @ {}:{}'.format(
                    f_size[0], f_size[1], stride[0], stride[1], nf, out_shape))

        elif out_upsampling_type == 'linear':
            # Go to b01c
            l_upsampling = lasagne.layers.DimshuffleLayer(
                l_renet,
                (0, 2, 3, 1),
                name=self.name + '_grad_undimshuffle')
            self.sublayers.append(l_upsampling)

            expand_height *= np.prod(pheight)
            expand_width *= np.prod(pwidth)
            l_upsampling = LinearUpsamplingLayer(l_upsampling,
                                                 expand_height,
                                                 expand_width,
                                                 nclasses,
                                                 batch_norm=batch_norm,
                                                 name="linear_upsample_layer")
            self.sublayers.append(l_upsampling)
            print('Linear upsampling')

            if batch_norm:
                l_upsampling = lasagne.layers.batch_norm(
                    l_upsampling,
                    axes=(0, 1, 2))

                self.sublayers.append(l_upsampling)
                print "Batch normalization after Linear upsampling layer "

            # Go back to bc01
            l_upsampling = lasagne.layers.DimshuffleLayer(
                l_upsampling,
                (0, 3, 1, 2),
                name=self.name + '_grad_undimshuffle')
            self.sublayers.append(l_upsampling)

        self.l_out = l_upsampling

        # HACK LASAGNE
        # This will set `self.input_layer`, which is needed by Lasagne to find
        # the layers with the get_all_layers() helper function in the
        # case of a layer with sublayers
        if isinstance(self.l_out, tuple):
            self.input_layer = None
        else:
            self.input_layer = self.l_out
Example #13
0
 def fit_generator(self,
                   generator=None,
                   steps_per_epoch=None,
                   epochs=250,
                   verbose=1,
                   callbacks=None,
                   validation_data=None,
                   validation_steps=None,
                   lr=1e-4,
                   batch_size=32,
                   source='path',
                   **kwargs):
     self.freeze_top_layers(self.model, self.freeze_layers_num)
     assert source in {'path', 'tensor'}
     if generator is None:
         datagen_train = ImageDataGenerator(
             preprocessing_function=self.preprocess_fun,
             rotation_range=30.,
             width_shift_range=0.2,
             height_shift_range=0.2,
             shear_range=0.2,
             zoom_range=0.2,
             horizontal_flip=True)
         x_train_path = config.get_x_train_path(self.base_model)
         y_train_path = config.y_train_path
         if (source == 'tensor' and os.path.exists(x_train_path)
                 and os.path.exists(y_train_path)):
             x_train = utils.load_h5file(x_train_path)
             y_train = utils.load_h5file(y_train_path)
             generator = datagen_train.flow(x_train, y_train, batch_size)
             n_train = len(x_train)
         else:
             generator = datagen_train.flow_from_directory(
                 config.train_dir,
                 target_size=self.image_size,
                 batch_size=batch_size)
             n_train = len(
                 utils.images_under_subdirs(config.train_dir,
                                            subdirs=self.classes))
         if not steps_per_epoch:
             steps_per_epoch = utils.ceildiv(n_train, batch_size)
     if steps_per_epoch is None:
         steps_per_epoch = 500
     if not callbacks:
         callbacks = self.get_callbacks(self.model_weights_path,
                                        patience=50)
     if validation_data is None:
         datagen_valid = ImageDataGenerator(
             preprocessing_function=self.preprocess_fun)
         x_valid_path = config.get_x_valid_path(self.base_model)
         y_valid_path = config.y_valid_path
         if (source == 'tensor' and os.path.exists(x_valid_path)
                 and os.path.exists(y_valid_path)):
             x_valid = utils.load_h5file(x_valid_path)
             y_valid = utils.load_h5file(y_valid_path)
             validation_data = datagen_valid.flow(x_valid, y_valid,
                                                  batch_size)
             n_valid = len(x_valid)
         else:
             validation_data = datagen_valid.flow_from_directory(
                 config.valid_dir,
                 target_size=self.image_size,
                 batch_size=batch_size)
             n_valid = len(
                 utils.images_under_subdirs(config.valid_dir,
                                            subdirs=self.classes))
         if not validation_steps:
             validation_steps = utils.ceildiv(n_valid, batch_size)
     if validation_steps is None:
         validation_steps = 100
     self.model.compile(loss='categorical_crossentropy',
                        optimizer=SGD(lr=lr, momentum=0.9),
                        metrics=['accuracy'])
     self.model.fit_generator(generator,
                              steps_per_epoch=steps_per_epoch,
                              epochs=epochs,
                              validation_data=validation_data,
                              validation_steps=validation_steps,
                              callbacks=callbacks,
                              verbose=verbose,
                              **kwargs)