Example #1
0
def _materialised_conv_layer_dual_objective(w, b, padding, strides, lam_in,
                                            mu_out, lb, ub):
    """Materialised version of `conv_layer_dual_objective`."""
    # Flatten the inputs, as the materialised convolution will have no
    # spatial structure.
    mu_out_flat = snt.BatchFlatten(preserve_dims=2)(mu_out)

    # Materialise the convolution as a (sparse) fully connected linear layer.
    w_flat, b_flat = layer_utils.materialise_conv(w,
                                                  b,
                                                  lb.shape[1:].as_list(),
                                                  padding=padding,
                                                  strides=strides)

    activation_coeffs = -tf.tensordot(
        mu_out_flat, tf.transpose(w_flat), axes=1)
    dual_obj_bias = -tf.tensordot(mu_out_flat, b_flat, axes=1)

    # Flatten the inputs, as the materialised convolution will have no
    # spatial structure.
    if lam_in is not None:
        lam_in = snt.FlattenTrailingDimensions(2)(lam_in)
    lb = snt.BatchFlatten()(lb)
    ub = snt.BatchFlatten()(ub)

    return standard_layer_calcs.linear_dual_objective(lam_in,
                                                      activation_coeffs,
                                                      dual_obj_bias, lb, ub)
Example #2
0
    def _build(self, inputs):
        """Looks up rows in memory.

    In the args list, we have the following conventions:
      B: batch size
      M: number of slots in a row of the memory matrix #slot指什么?
      R: number of rows in the memory matrix
      H: number of read heads in the memory controller

    Args:
      inputs: A tuple of
        *  read_inputs, a tensor of shape [B, ...] that will be flattened and
             passed through a linear layer to get read keys/read_strengths for
             each head.
        *  mem_state, the primary memory tensor. Of shape [B, R, M].

    Returns:
      The read from the memory (concatenated across read heads) and read
        information.
    """
        # Assert input shapes are compatible and separate inputs.
        _assert_compatible_memory_reader_input(inputs)
        read_inputs, mem_state = inputs  #由两部分构成
        #print("read_inputs",read_inputs.shape)
        #print("mem_state",mem_state.shape)
        #几个要搞清楚的词:read weightings(算完cos之后的);key;srengths(只是一个用来做加权的,但是没搞懂代表什么含义);
        # Determine the read weightings for each key.
        flat_outputs = self._keys_and_read_strengths_generator(
            snt.BatchFlatten()(read_inputs))
        #各条记忆之间做一个权重?? read inputs
        #print("flat_outputs",flat_outputs.shape)
        # Separate the read_strengths from the rest of the weightings.#同一个batch中不同的记忆之间的权重
        h = self._num_read_heads
        #print("h",h)
        flat_keys = flat_outputs[:, :-h]  #前h 列
        #print("flat_keys",flat_keys)
        read_strengths = tf.nn.softplus(flat_outputs[:, -h:])  #后h 列
        #print("read_strengths",read_strengths.shape)
        # Reshape the weights.
        read_shape = (self._num_read_heads, self._memory_word_size)
        #print("read_shape",read_shape)
        read_keys = snt.BatchReshape(read_shape)(flat_keys)
        #print("read_keys",read_keys.shape)
        # Read from memory.
        #print("_top_k",self._top_k)
        memory_reads, read_weights, read_indices, read_strengths = (
            read_from_memory(read_keys, read_strengths, mem_state,
                             self._top_k))
        #print("memory_reads.shape",memory_reads.shape)
        #print("read_weights",read_weights.shape)
        #print("read_indices",read_indices.shape)
        #print("read_strength",read_strengths.shape)
        concatenated_reads = snt.BatchFlatten()(memory_reads)
        global kkk
        kkk = kkk + 1
        print("----------", kkk)
        return concatenated_reads, ReadInformation(weights=read_weights,
                                                   indices=read_indices,
                                                   keys=read_keys,
                                                   strengths=read_strengths)
Example #3
0
    def _build(self, pixel_pos, embedding):
        embedding_flattened = snt.BatchFlatten()(embedding)
        pixel_pos_flattened = snt.BatchFlatten()(pixel_pos)

        net_in = tf.concat([embedding_flattened, pixel_pos_flattened], 1)
        net_out = self._network(net_in)
        return self._output_activation(self._value(net_out))
Example #4
0
def train_and_eval(train_batch_size, test_batch_size, num_hidden,
                   learning_rate, num_train_steps, report_every, test_every):
    """Creates a basic MNIST model using Sonnet, then trains and evaluates it."""

    data_dict = dataset_mnist.get_data("mnist", train_batch_size,
                                       test_batch_size)
    train_data = data_dict["train_iterator"]
    test_data = data_dict["test_iterator"]

    # Sonnet separates the configuration of a model from its attachment into the
    # graph. Here we configure the shape of the model, but this call does not
    # place any ops into the graph.
    mlp = snt.nets.MLP([num_hidden, data_dict["num_classes"]])

    train_images, train_labels = train_data.get_next()
    test_images, test_labels = test_data.get_next()

    # Flatten images to pass to model.
    train_images = snt.BatchFlatten()(train_images)
    test_images = snt.BatchFlatten()(test_images)

    # Call our model, which creates it in the graph. Our build function
    # is parameterized by the source of images, and here we connect the model to
    # the training images.
    train_logits = mlp(train_images)

    # Training loss and optimizer.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels,
                                                          logits=train_logits)
    loss_avg = tf.reduce_mean(loss)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    optimizer_step = optimizer.minimize(loss_avg)

    # As before, we make a second instance of our model in the graph, which shares
    # its parameters with the first instance of the model. The Sonnet Module code
    # takes care of the variable sharing for us: because we are calling the same
    # instance of Model, we will automatically reference the same, shared
    # variables.
    test_logits = mlp(test_images)
    test_classes = tf.nn.softmax(test_logits)
    test_correct = tf.nn.in_top_k(test_classes, test_labels, k=1)

    with tf.train.SingularMonitoredSession() as sess:

        for step_idx in range(num_train_steps):
            current_loss, _ = sess.run([loss_avg, optimizer_step])
            if step_idx % report_every == 0:
                tf.logging.info("Step: %4d of %d - loss: %.02f.", step_idx + 1,
                                num_train_steps, current_loss)
            if step_idx % test_every == 0:
                sess.run(test_data.initializer)
                current_correct = sess.run(test_correct)
                correct_count = np.count_nonzero(current_correct)
                tf.logging.info("Test: %d of %d correct.", correct_count,
                                test_batch_size)
Example #5
0
    def test_calc_conv_batchnorm(self):
        image_data = self._image_data()
        net = self._network('conv_batchnorm')
        input_bounds = naive_bounds.input_bounds(image_data.image, delta=.1)
        dual_obj, dual_var_lists = self._build_objective(
            net, input_bounds, image_data.label)

        # Explicitly build the expected TensorFlow graph for calculating objective.
        (
            conv2d_0,
            relu_1,  # pylint:disable=unused-variable
            linear_2,
            relu_3,  # pylint:disable=unused-variable
            linear_obj) = self._verifiable_layer_builder(net).build_layers()
        (mu_0, ), (lam_1, ), (mu_2, ), _ = dual_var_lists

        # Expected input bounds for each layer.
        conv2d_0_lb, conv2d_0_ub = self._expected_input_bounds(
            image_data.image, .1)
        conv2d_0_w, conv2d_0_b = layer_utils.combine_with_batchnorm(
            conv2d_0.module.w, None, conv2d_0.batch_norm)
        relu_1_lb, relu_1_ub = ibp.IntervalBounds(
            conv2d_0_lb, conv2d_0_ub).apply_conv2d(None, conv2d_0_w,
                                                   conv2d_0_b, 'VALID', (1, 1))
        linear_2_lb = snt.BatchFlatten()(tf.nn.relu(relu_1_lb))
        linear_2_ub = snt.BatchFlatten()(tf.nn.relu(relu_1_ub))
        linear_2_w, linear_2_b = layer_utils.combine_with_batchnorm(
            linear_2.module.w, None, linear_2.batch_norm)
        relu_3_lb, relu_3_ub = ibp.IntervalBounds(linear_2_lb,
                                                  linear_2_ub).apply_linear(
                                                      None, linear_2_w,
                                                      linear_2_b)

        # Expected objective value.
        objective = 0
        act_coeffs_0 = -common.conv_transpose(
            mu_0, conv2d_0_w, conv2d_0.input_shape, 'VALID', (1, 1))
        obj_0 = -tf.reduce_sum(mu_0 * conv2d_0_b, axis=(2, 3, 4))
        objective += standard_layer_calcs.linear_dual_objective(
            None, act_coeffs_0, obj_0, conv2d_0_lb, conv2d_0_ub)
        objective += standard_layer_calcs.activation_layer_dual_objective(
            tf.nn.relu, mu_0, lam_1, relu_1_lb, relu_1_ub)
        act_coeffs_2 = -tf.tensordot(mu_2, tf.transpose(linear_2_w), axes=1)
        obj_2 = -tf.tensordot(mu_2, linear_2_b, axes=1)
        objective += standard_layer_calcs.linear_dual_objective(
            snt.BatchFlatten(preserve_dims=2)(lam_1), act_coeffs_2, obj_2,
            linear_2_lb, linear_2_ub)
        objective_w, objective_b = common.targeted_objective(
            linear_obj.module.w, linear_obj.module.b, image_data.label)
        objective += standard_layer_calcs.activation_layer_dual_objective(
            tf.nn.relu, mu_2, -objective_w, relu_3_lb, relu_3_ub)
        objective += objective_b

        self._assert_dual_objective_close(objective, dual_obj, image_data)
Example #6
0
    def _build(self, state, action):
        state_flattened = snt.BatchFlatten()(state)
        action_flattened = snt.BatchFlatten()(action)

        l1 = tf.concat([
            self._layer_activations(self._state_layer(state_flattened)),
            self._layer_activations(self._action_layer(action_flattened))
        ], 1)

        net_out = self._network(l1)
        # value = (tf.nn.tanh(self._value(net_out)) * self._value_range) + self._value_mean
        value = self._value(net_out)

        return value
Example #7
0
 def _initial_symbolic_bounds(lb, ub):
   """Returns symbolic bounds for the given interval bounds."""
   batch_size = tf.shape(lb)[0]
   input_shape = lb.shape[1:]
   zero = tf.zeros_like(lb)
   lb = snt.BatchFlatten()(lb)
   ub = snt.BatchFlatten()(ub)
   input_size = tf.shape(lb)[1]
   output_shape = tf.concat([[input_size], input_shape], axis=0)
   identity = tf.reshape(tf.eye(input_size), output_shape)
   identity = tf.expand_dims(identity, 0)
   identity = tf.tile(identity, [batch_size] + [1] * (len(input_shape) + 1))
   expr = LinearExpression(w=identity, b=zero,
                           lower=lb, upper=ub)
   return expr, expr
Example #8
0
    def connect(self, data, generator_inputs):
        """Connects the components and returns the losses, outputs and debug ops.

    Args:
      data: a `tf.Tensor`: `[batch_size, ...]`. There are no constraints on the
        rank
        of this tensor, but it has to be compatible with the shapes expected
        by the discriminator.
      generator_inputs: a `tf.Tensor`: `[g_in_batch_size, ...]`. It does not
        have to have the same batch size as the `data` tensor. There are not
        constraints on the rank of this tensor, but it has to be compatible
        with the shapes the generator network supports as inputs.

    Returns:
      An `ModelOutputs` instance.
    """

        samples, optimised_z = utils.optimise_and_sample(generator_inputs,
                                                         self,
                                                         data,
                                                         is_training=True)
        optimisation_cost = utils.get_optimisation_cost(
            generator_inputs, optimised_z)
        debug_ops = {}

        initial_samples = self.generator(generator_inputs, is_training=True)
        generator_loss = tf.reduce_mean(self.gen_loss_fn(data, samples))
        # compute the RIP loss
        # (\sqrt{F(x_1 - x_2)^2} - \sqrt{(x_1 - x_2)^2})^2
        # as a triplet loss for 3 pairs of images.

        r1 = self._get_rip_loss(samples, initial_samples)
        r2 = self._get_rip_loss(samples, data)
        r3 = self._get_rip_loss(initial_samples, data)
        rip_loss = tf.reduce_mean((r1 + r2 + r3) / 3.0)
        total_loss = generator_loss + rip_loss
        optimization_components = self._build_optimization_components(
            generator_loss=total_loss)
        debug_ops['rip_loss'] = rip_loss
        debug_ops['recons_loss'] = tf.reduce_mean(
            tf.norm(snt.BatchFlatten()(samples) - snt.BatchFlatten()(data),
                    axis=-1))

        debug_ops['z_step_size'] = self.z_step_size
        debug_ops['opt_cost'] = optimisation_cost
        debug_ops['gen_loss'] = generator_loss

        return utils.ModelOutputs(optimization_components, debug_ops)
Example #9
0
def load(config, **inputs):

    imgs, labels = inputs['train_img'], inputs['train_label']

    imgs = snt.BatchFlatten()(imgs)
    mlp = snt.nets.MLP([config.n_hidden, 10])
    logits = mlp(imgs)
    labels = tf.cast(labels, tf.int32)

    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=labels))

    pred_class = tf.argmax(logits, -1)
    acc = tf.reduce_mean(tf.to_float(tf.equal(tf.to_int32(pred_class),
                                              labels)))

    # put here everything that you might want to use later
    # for example when you load the model in a jupyter notebook
    artefects = {
        'mlp': mlp,
        'logits': logits,
        'loss': loss,
        'pred_class': pred_class,
        'accuracy': acc
    }

    # put here everything that you'd like to be reported every N training iterations
    # as tensorboard logs AND on the command line
    stats = {'crossentropy': loss, 'accuracy': acc}

    # loss will be minimized with respect to the model parameters
    return loss, stats, artefects
Example #10
0
    def _build(self, inputs):

        # Flatten input
        flatten = snt.BatchFlatten()
        flattened = flatten(inputs)

        # First linear layer
        linear_1 = VarLinear(output_size=self.units, prior=self.prior)

        dense = linear_1(flattened)
        dense = tf.nn.relu(dense)

        # Second linear layer
        linear_2 = VarLinear(output_size=self.units, prior=self.prior)

        dense = linear_2(dense)
        dense = tf.nn.relu(dense)

        # Final linear layer
        linear_out = VarLinear(output_size=1, prior=self.prior)

        logits = linear_out(dense)

        self._layers = [linear_1, linear_2, linear_out]

        return logits
    def _build(self, inputs, training=True, cur_iter=None):
        outputs = tf.identity(inputs)

        for i in range(self._num_blocks):
            outputs = self._pool(outputs)
            if self._residual:
                outputs = self._convs[i](outputs)
                outputs = self._sepconvs[i](outputs) + outputs
            else:
                outputs = self._sepconvs[i](outputs)

            outputs = self._act(outputs)
            '''
            if training:
                self._summ.register('train', self._batch_norms[i].moving_mean.name, self._batch_norms[i].moving_mean)
                self._summ.register('train', self._batch_norms[i].moving_variance.name, self._batch_norms[i].moving_variance)
            '''

        cnn_outputs = snt.BatchFlatten()(outputs)

        if self._with_memory:
            som_outputs = tf.stop_gradient(
                self._som(cnn_outputs, training, cur_iter, self._log))
            return self._seq(tf.concat([cnn_outputs, som_outputs], axis=-1))
        else:
            return self._seq(cnn_outputs)
Example #12
0
  def _build(self, inputs):

    if FLAGS.l2_reg:
      regularizers = {'w': lambda w: FLAGS.l2_reg*tf.nn.l2_loss(w),
                      'b': lambda w: FLAGS.l2_reg*tf.nn.l2_loss(w),}
    else:
      regularizers = None

    reshape = snt.BatchReshape([28, 28, 1])

    conv = snt.Conv2D(2, 5, padding=snt.SAME, regularizers=regularizers)
    act = _NONLINEARITY(conv(reshape(inputs)))

    pool = tf.nn.pool(act, window_shape=(2, 2), pooling_type=_POOL,
                      padding=snt.SAME, strides=(2, 2))

    conv = snt.Conv2D(4, 5, padding=snt.SAME, regularizers=regularizers)
    act = _NONLINEARITY(conv(pool))

    pool = tf.nn.pool(act, window_shape=(2, 2), pooling_type=_POOL,
                      padding=snt.SAME, strides=(2, 2))

    flatten = snt.BatchFlatten()(pool)

    linear = snt.Linear(32, regularizers=regularizers)(flatten)

    return snt.Linear(10, regularizers=regularizers)(linear)
Example #13
0
    def __init__(self,
                 sampling_rate,
                 filter_size=3,
                 num_filters=32,
                 pooling_stride=2,
                 pool='avg',
                 act='elu',
                 name="classifier"):
        super(Classifier, self).__init__(name=name)

        num_classes = 2

        self._act = Activation(act, verbose=True)
        self._pool = Downsample1D(2)
        self._bf = snt.BatchFlatten()

        regularizers = {
            "w": tf.contrib.layers.l2_regularizer(scale=0.1),
            "b": tf.contrib.layers.l2_regularizer(scale=0.1)
        }

        with self._enter_variable_scope():
            self._l1_conv = snt.Conv1D(num_filters, filter_size + 2)
            self._l2_sepconv = snt.SeparableConv1D(num_filters << 1, 1,
                                                   filter_size)
            self._lin1 = snt.Linear(256, regularizers=regularizers)
            self._lin2 = snt.Linear(num_classes, regularizers=regularizers)
    def _build_layers_v2(self, input_dict, num_outputs, options):

        config = options["custom_options"]

        n_resblock = config["n_resblock"]
        initializers_mlp = get_init_mlp()
        initializers_conv = get_init_conv()

        # Don't use objective at the moment.
        state = input_dict["obs"]["image"]

        # Stem
        feat_stem = state
        stem_config = config["stem_config"]

        for layer in range(stem_config["n_layer"]):
            feat_stem = snt.Conv2D(output_channels=stem_config["channel"][layer],
                                   #the number of channel is marked as list, index=channel at this layer
                                   kernel_shape=stem_config["kernel_size"][layer],
                                   stride=stem_config["stride"],
                                   padding=snt.VALID,
                                   initializers=initializers_conv)(feat_stem)


        next_block = feat_stem

        for block in range(n_resblock):
            next_block = ResBlock(config["resblock_config"])(next_block)

        flatten_resblock = snt.BatchFlatten(preserve_dims=1)(next_block)

        out_mlp1 = tf.nn.relu(snt.Linear(options["last_layer_hidden"], initializers=initializers_mlp)(flatten_resblock))
        out_mlp2 = snt.Linear(num_outputs, initializers=initializers_mlp)(out_mlp1)

        return out_mlp2, out_mlp1
Example #15
0
    def _build(batch):
        """Builds the sonnet module."""
        flat_img = snt.BatchFlatten()(batch["image"])

        if cfg["output_type"] in ["tanh", "linear_center"]:
            flat_img = flat_img * 2.0 - 1.0

        hidden_units = cfg["hidden_units"] + [flat_img.shape.as_list()[1]]
        mod = snt.nets.MLP(hidden_units, activation=act_fn, initializers=init)
        outputs = mod(flat_img)

        if cfg["output_type"] == "sigmoid":
            outputs = tf.nn.sigmoid(outputs)
        elif cfg["output_type"] == "tanh":
            outputs = tf.tanh(outputs)
        elif cfg["output_type"] in ["linear", "linear_center"]:
            # nothing to be done to the outputs
            pass
        else:
            raise ValueError("Invalid output_type [%s]." % cfg["output_type"])

        reduce_fn = getattr(tf, cfg["reduction_type"])
        if cfg["loss_type"] == "l2":
            loss_vec = reduce_fn(tf.square(outputs - flat_img), axis=1)
        elif cfg["loss_type"] == "l1":
            loss_vec = reduce_fn(tf.abs(outputs - flat_img), axis=1)
        else:
            raise ValueError("Unsupported loss_type [%s]." %
                             cfg["reduction_type"])

        return tf.reduce_mean(loss_vec)
    def _curiosty(self, input_):
        last_action, env_output = input_
        reward, _, _, frame = env_output

        # Convert to floats.
        frame = tf.to_float(frame)

        # Encoder
        frame /= 255.0
        with tf.variable_scope('convnet'):
            conv_out = frame
            for i, (nf, rf, stride) in enumerate([(32, 8, 4), (32, 4, 2),
                                                  (32, 3, 2), (32, 3, 1)]):
                # Downscale.
                conv_out = snt.Conv2D(
                    nf,
                    rf,
                    stride=stride,
                    padding='VALID',
                    initializers={
                        'b': tf.zeros_initializer(),
                        'w': tf.orthogonal_initializer()
                    },
                    name="icm_conv_2d_{}".format(i))(conv_out)
                conv_out = tf.nn.relu(conv_out)
        conv_out = snt.BatchFlatten(name='icm_flatten')(conv_out)
        conv_out = tf.nn.relu(conv_out)

        return conv_out
  def test_train(self):
    image = tf.random_uniform(shape=(_BATCH_SIZE, 784), maxval=1.)
    labels = tf.random_uniform(shape=(_BATCH_SIZE,), maxval=10, dtype=tf.int32)
    labels_one_hot = tf.one_hot(labels, 10)

    model = snt.Sequential([snt.BatchFlatten(), snt.nets.MLP([128, 128, 10])])
    logits = model(image)
    all_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits, labels=labels_one_hot)
    loss = tf.reduce_mean(all_losses)
    layers = layer_collection.LayerCollection()
    optimizer = periodic_inv_cov_update_kfac_opt.PeriodicInvCovUpdateKfacOpt(
        invert_every=10,
        cov_update_every=1,
        learning_rate=0.03,
        cov_ema_decay=0.95,
        damping=100.,
        layer_collection=layers,
        momentum=0.9,
        num_burnin_steps=0,
        placement_strategy="round_robin")
    _construct_layer_collection(layers, [logits], tf.trainable_variables())

    train_step = optimizer.minimize(loss)
    counter = optimizer.counter
    max_iterations = 50

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      coord = tf.train.Coordinator()
      tf.train.start_queue_runners(sess=sess, coord=coord)
      for iteration in range(max_iterations):
        sess.run([loss, train_step])
        counter_ = sess.run(counter)
        self.assertEqual(counter_, iteration + 1.0)
Example #18
0
    def _build(self, x, is_train):
        residual = self.resconv_ex1(x)
        residual = self.bn_resex1(residual, is_train)
        h = self.sepconv_ex1(tf.nn.relu(x, name='relu_ex1'))
        h = self.bn_sepex1(h, is_train)
        h = self.sepconv_ex2(tf.nn.relu(h, name='relu_ex2'))
        h = self.bn_sepex2(h, is_train)
        h = tf.nn.max_pool(h,
                           ksize=[1, 3, 3, 1],
                           strides=[1, 2, 2, 1],
                           padding='SAME',
                           name='maxpool_ex')
        h = tf.add(h, residual, name='add_ex2')
        h = self.sepconv_ex3(h)
        h = tf.nn.relu(self.bn_sepex3(h, is_train), name='relu_ex3')
        h = self.sepconv_ex4(h)
        h = tf.nn.relu(self.bn_sepex4(h, is_train), name='relu_ex4')
        # in paper, kernel size of global pooling is 10.
        # in this code, kernel size is 5 cause length of input image is 149.
        h = tf.nn.avg_pool(h,
                           ksize=[1, 5, 5, 1],
                           strides=[1, 1, 1, 1],
                           padding='VALID',
                           name='global_avg_pool')
        h = snt.BatchFlatten(name='flatten')(h)

        return h
    def _build(self, input_image):

        leaky_relu_activation = lambda x: tf.maximum(
            self._leaky_relu_coeff * x, x)
        init_dict = {
            'w': tf.truncated_normal_initializer(seed=547, stddev=0.02),
            'b': tf.constant_initializer(0.3)
        }
        conv2d = snt.nets.ConvNet2D(output_channels=[8, 16, 32, 64, 128],
                                    kernel_shapes=[[5, 5]],
                                    strides=[2, 1, 2, 1, 2],
                                    paddings=[snt.SAME],
                                    activate_final=True,
                                    activation=leaky_relu_activation,
                                    use_batch_norm=False,
                                    initializers=init_dict)

        convolved = conv2d(input_image)
        # Flatten the data to 2D for the classification layer
        flat_data = snt.BatchFlatten()(convolved)

        # We have two classes: one for real, and oen for fake data.
        classification_logits = snt.Linear(2,
                                           initializers=init_dict)(flat_data)
        return classification_logits
    def _build_layers_v2(self, input_dict, num_outputs, options):

        config = options["custom_options"]

        initializers_conv = get_init_conv()
        initializers_mlp = get_init_mlp()

        state = input_dict['obs']["image"]

        # Objective Processing
        # ====================

        objective = input_dict['obs']["mission"]

        # Embedding : if one-hot encoding for word -> no embedding
        embedded_obj = compute_embedding(objective=objective, config=config["text_objective_config"])
        # (+bi) lstm ( + layer_norm), specified in config
        last_ht_rnn = compute_dynamic_rnn(inputs=embedded_obj, config=config["text_objective_config"],
                                          sequence_length=input_dict['obs']['sentence_length'])


        reducing_rnn_state = config["fusing"]["reduce_text_before_fuse"]
        if reducing_rnn_state:
            last_ht_rnn = snt.Linear(reducing_rnn_state)(last_ht_rnn)


        # Duplicate rnn state to append it to the image as in "The impact of early fusion and VQA in details"
        last_ht_rnn = tf.expand_dims(tf.expand_dims(last_ht_rnn, axis=1), axis=2)

        tiled_last_ht = tf.tile(last_ht_rnn, multiples=[1,7,7,1]) # todo : don't hardcode 7x7

        # Features Extractor
        # ==================
        to_next_conv = state
        vision_config = config["vision"]
        for layer in range(vision_config["n_layers"]):

            # Early text fusing, in the conv pipeline
            if layer == config["fusing"]["layer_to_fuse"]-1:
                to_next_conv = tf.concat((to_next_conv, tiled_last_ht), axis=3)

            conv_layer = snt.Conv2D(output_channels= vision_config["n_channels"][layer],
                                                 kernel_shape=vision_config["kernel"][layer],
                                                 stride=vision_config["stride"][layer],
                                                 initializers=initializers_conv)(to_next_conv)

            to_next_conv = tf.nn.relu(conv_layer)

        flatten_vision = snt.BatchFlatten(preserve_dims=1)(to_next_conv)

        # If necessary add direction of character
        if input_dict['obs'].get('direction', False) is not False:
            direction_one_hot = input_dict['obs']['direction']
            flatten_vision = tf.concat((flatten_vision, direction_one_hot), axis=1)

        # MLP layers -> Q function or policy
        out_mlp1 = tf.nn.relu(snt.Linear(config["last_layer_hidden"],initializers=initializers_mlp)(flatten_vision))
        out_mlp2 = snt.Linear(num_outputs,initializers=initializers_mlp)(out_mlp1)

        return out_mlp2, out_mlp1 # you need to return output (out_mlp2) and input of the last layer (out_mlp1)
Example #21
0
def mnist(layers, activation="sigmoid", batch_size=128, mode="train"):
    """Mnist classification with a multi-layer perceptron."""

    if activation == "sigmoid":
        activation_op = tf.sigmoid
    elif activation == "relu":
        activation_op = tf.nn.relu
    else:
        raise ValueError("{} activation not supported".format(activation))

    # Data.
    data = mnist_dataset.load_mnist()
    data = getattr(data, mode)
    images = tf.constant(data.images, dtype=tf.float32, name="MNIST_images")
    images = tf.reshape(images, [-1, 28, 28, 1])
    labels = tf.constant(data.labels, dtype=tf.int64, name="MNIST_labels")

    # Network.
    mlp = snt.nets.MLP(list(layers) + [10],
                       activation=activation_op,
                       initializers=_nn_initializers)
    network = snt.Sequential([snt.BatchFlatten(), mlp])

    def build():
        indices = tf.random_uniform([batch_size], 0, data.num_examples,
                                    tf.int64)
        batch_images = tf.gather(images, indices)
        batch_labels = tf.gather(labels, indices)
        output = network(batch_images)
        return _xent_loss(output, batch_labels)

    return build
Example #22
0
    def _build(self, inputs):

        num_units = [w.shape[1] for w in self._w_mus]
        #print("Units: {}".format(num_units))

        # Flatten input
        flatten = snt.BatchFlatten()
        flattened = flatten(inputs)

        # Only retain the ones we didn't throw out
        flattened = list_slice(flattened, self._input_indices, axis=1)

        # First linear layer
        linear_1 = VarLinear(output_size=num_units[0], prior=self.prior)

        dense = linear_1(flattened)
        dense = tf.nn.relu(dense)

        # Second linear layer
        linear_2 = VarLinear(output_size=num_units[1], prior=self.prior)

        dense = linear_2(dense)
        dense = tf.nn.relu(dense)

        # Final linear layer
        linear_out = VarLinear(output_size=10, prior=self.prior)

        logits = linear_out(dense)

        self._layers = [linear_1, linear_2, linear_out]

        return logits
Example #23
0
    def _torso(self, input_):
        last_action, env_output = input_
        reward, _, _, frame = env_output

        # Convert to floats.
        frame = tf.to_float(frame)

        frame /= 255.0
        with tf.variable_scope('convnet'):
            conv_out = frame
            for i, (nf, rf, stride) in enumerate([(32, 8, 4), (64, 4, 2),
                                                  (64, 3, 1)]):
                # Downscale.
                conv_out = snt.Conv2D(nf,
                                      rf,
                                      stride=stride,
                                      padding='VALID',
                                      initializers={
                                          'b': tf.zeros_initializer(),
                                          'w': tf.orthogonal_initializer()
                                      })(conv_out)
                conv_out = tf.nn.relu(conv_out)
        conv_out = snt.BatchFlatten()(conv_out)
        conv_out = snt.Linear(256)(conv_out)
        conv_out = tf.nn.relu(conv_out)

        # Append clipped last reward and one hot last action.
        clipped_reward = tf.expand_dims(tf.clip_by_value(reward, -1, 1), -1)
        one_hot_last_action = tf.one_hot(last_action, self._num_actions)
        return tf.concat([conv_out, clipped_reward, one_hot_last_action],
                         axis=1)
Example #24
0
def gfootball_impala_cnn_network_fn(frame):
    # Convert to floats.
    frame = tf.to_float(frame)
    frame /= 255
    with tf.variable_scope('convnet'):
        conv_out = frame
        conv_layers = [(16, 2), (32, 2), (32, 2), (32, 2)]
        for i, (num_ch, num_blocks) in enumerate(conv_layers):
            # Downscale.
            conv_out = snt.Conv2D(num_ch, 3, stride=1, padding='SAME')(conv_out)
            conv_out = tf.nn.pool(
                conv_out,
                window_shape=[3, 3],
                pooling_type='MAX',
                padding='SAME',
                strides=[2, 2])

            # Residual block(s).
            for j in range(num_blocks):
                with tf.variable_scope('residual_%d_%d' % (i, j)):
                    block_input = conv_out
                    conv_out = tf.nn.relu(conv_out)
                    conv_out = snt.Conv2D(num_ch, 3, stride=1, padding='SAME')(conv_out)
                    conv_out = tf.nn.relu(conv_out)
                    conv_out = snt.Conv2D(num_ch, 3, stride=1, padding='SAME')(conv_out)
                    conv_out += block_input

    conv_out = tf.nn.relu(conv_out)
    conv_out = snt.BatchFlatten()(conv_out)

    conv_out = snt.Linear(256)(conv_out)
    conv_out = tf.nn.relu(conv_out)

    return conv_out
Example #25
0
    def test_train(self):
        image = tf.random_uniform(shape=(_BATCH_SIZE, 784), maxval=1.)
        labels = tf.random_uniform(shape=(_BATCH_SIZE, ),
                                   maxval=10,
                                   dtype=tf.int32)
        labels_one_hot = tf.one_hot(labels, 10)

        model = snt.Sequential(
            [snt.BatchFlatten(),
             snt.nets.MLP([128, 128, 10])])
        logits = model(image)
        all_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=labels_one_hot)
        loss = tf.reduce_mean(all_losses)
        layers = lc.LayerCollection()
        optimizer = ak.AsyncInvCovUpdateKfacOpt(inv_devices=["/cpu:0"],
                                                cov_devices=["/cpu:0"],
                                                learning_rate=1e-4,
                                                cov_ema_decay=0.95,
                                                damping=1e+3,
                                                layer_collection=layers,
                                                momentum=0.9)
        _construct_layer_collection(layers, [logits], tf.trainable_variables())
        train_step = optimizer.minimize(loss)
        target_loss = 0.05
        max_iterations = 500

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            optimizer.run_cov_inv_ops(sess)
            for _ in range(max_iterations):
                loss_, _ = sess.run([loss, train_step])
                if loss_ < target_loss:
                    break
            optimizer.stop_cov_inv_ops(sess)
Example #26
0
    def _build(self, inputs, prev_state):
        """Connects the DAM core into the graph.

        Args:
          inputs: Tensor input.
          prev_state: A `DAMState` tuple containing the fields `access_output`,
              `access_state` and `controller_state`. `access_state` is a 3-D Tensor
              of shape `[batch_size, num_reads, word_size]` containing read words.
              `access_state` is a tuple of the access module's state, and
              `controller_state` is a tuple of controller module's state.

        Returns:
          A tuple `(output, next_state)` where `output` is a tensor and `next_state`
          is a `DAMState` tuple containing the fields `access_output`,
          `access_state`, and `controller_state`.
        """

        prev_access_output = prev_state.access_output
        prev_access_state = prev_state.access_state
        prev_controller_state = prev_state.controller_state

        batch_flatten = snt.BatchFlatten()
        controller_input = tf.concat(
            [batch_flatten(inputs),
             batch_flatten(prev_access_output)], 1)

        controller_output, controller_state = self._controller(
            controller_input, prev_controller_state)

        controller_output = self._clip_if_enabled(controller_output)
        controller_state = tf.contrib.framework.nest.map_structure(
            self._clip_if_enabled, controller_state)

        controller_output = layer_normalization(controller_output)

        access_output, access_state = self._access(controller_output,
                                                   prev_access_state)

        controller_output = tf.nn.dropout(controller_output, self._keep_prob)

        output = tf.concat([controller_output,
                            batch_flatten(access_output)], 1)

        for i, (act_fn, size) in enumerate(
                zip(self._act_fn_list, self._layer_size_list)):
            output = tf.layers.dense(
                output,
                size,
                activation=act_fn,
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                name='projection_' + str(i),
                reuse=tf.AUTO_REUSE)

        output = snt.Linear(output_size=self._output_size.as_list()[0],
                            name='output_linear')(output)
        output = self._clip_if_enabled(output)

        return output, DAMState(access_output=access_output,
                                access_state=access_state,
                                controller_state=controller_state)
Example #27
0
    def _torso(self, input_):
        last_action, env_output = input_
        reward, _, _, frame = env_output

        frame = tf.to_float(frame)
        frame /= 255

        with tf.variable_scope('convnet'):
            conv_out = frame
            conv_out = snt.Conv2D(32, 8, stride=4)(conv_out)
            conv_out = tf.nn.relu(conv_out)
            conv_out = snt.Conv2D(64, 4, stride=2)(conv_out)
            conv_out = tf.nn.relu(conv_out)
            conv_out = snt.Conv2D(64, 3, stride=1)(conv_out)
            conv_out = tf.nn.relu(conv_out)

        conv_out = snt.BatchFlatten()(conv_out)
        conv_out = snt.Linear(512)(conv_out)
        conv_out = tf.nn.relu(conv_out)

        # Append clipped last reward and one hot last action.
        clipped_reward = tf.expand_dims(tf.clip_by_value(reward, -1, 1), -1)
        one_hot_last_action = tf.one_hot(last_action, self._num_actions)
        return tf.concat([conv_out, clipped_reward, one_hot_last_action],
                         axis=1)
    def test_stack_with_tf_activation(self, activation_fn):
        conv = snt.Conv2D(output_channels=5, kernel_shape=3, padding='VALID')
        linear = snt.Linear(23)
        module = snt.Sequential(
            [conv, activation_fn,
             snt.BatchFlatten(), linear])

        network = ibp.VerifiableModelWrapper(module)
        network(self._inputs)

        v_layers = auto_verifier.VerifiableLayerBuilder(network).build_layers()

        self.assertLen(v_layers, 3)

        self.assertIsInstance(v_layers[0], layers.Conv)
        self.assertIs(conv, v_layers[0].module)
        self.assertIsInstance(v_layers[0].input_node, ibp.ModelInputWrapper)

        self.assertIsInstance(v_layers[1], layers.Activation)
        self.assertEqual(activation_fn.__name__, v_layers[1].activation)
        self.assertIs(v_layers[0].output_node, v_layers[1].input_node)

        self.assertIsInstance(v_layers[2], layers.Linear)
        self.assertIs(linear, v_layers[2].module)

        self.assertIs(v_layers[2].output_node, network.output_module)
Example #29
0
    def _build(self, x, is_training=True):
        with tf.control_dependencies([tfc.assert_rank(x, 4)]):

            self.conv_shapes = [x.shape.as_list()]  # Needed by deconv module
            conv = x
        for i, (filter_i,
                stride_i) in enumerate(zip(self._filters, self.strides), 1):
            conv = tf.layers.Conv2D(filters=filter_i,
                                    kernel_size=self._kernel_size,
                                    padding='same',
                                    activation=self._activation,
                                    strides=stride_i,
                                    name='enc_conv_%d' % i)(conv)
            self.conv_shapes.append(conv.shape.as_list())
        conv_flat = snt.BatchFlatten()(conv)

        enc_mlp = snt.nets.MLP(name='enc_mlp',
                               output_sizes=[self._filters[-1]],
                               activation=self._activation,
                               activate_final=True)
        h = enc_mlp(conv_flat)

        logging.info('Shared conv module layer shapes:')
        logging.info('\n'.join([str(el) for el in self.conv_shapes]))
        logging.info(h.shape.as_list())

        return h
    def test_tolerates_identity(self):
        conv = snt.Conv2D(output_channels=5, kernel_shape=3, padding='VALID')
        linear = snt.Linear(23)
        module = snt.Sequential([
            tf.identity,
            conv,
            tf.identity,
            tf.nn.relu,
            tf.identity,
            snt.BatchFlatten(),
            tf.identity,
            linear,
            tf.identity,
        ])

        network = ibp.VerifiableModelWrapper(module)
        network(self._inputs)

        v_layers = auto_verifier.VerifiableLayerBuilder(network).build_layers()

        self.assertLen(v_layers, 3)

        self.assertIsInstance(v_layers[0], layers.Conv)
        self.assertIs(conv, v_layers[0].module)

        self.assertIsInstance(v_layers[1], layers.Activation)
        self.assertEqual('relu', v_layers[1].activation)

        self.assertIsInstance(v_layers[2], layers.Linear)
        self.assertIs(linear, v_layers[2].module)