def build_model():
    #model=models.Sequential()
    model = Sequential()
    model.add(
        layers.Dense(5,
                     activation='sigmoid',
                     input_shape=(train_data.shape[1], )))

    # model.add(Dropout(rate=0.5, trainable =True))#trainint=true v mismatch

    model.add(
        tfp.layers.VariationalGaussianProcess(
            num_inducing_points=num_inducing_points,
            kernel_provider=RBFKernelFn(),
            # kernel=optimized_kernel,
            event_shape=[2],  #outputshape
            # inducing_index_points_initializer=tf.constant_initializer(np.linspace(*x_range,num_inducing_points,dtype='float32')),
            inducing_index_points_initializer=tf.constant_initializer(
                np.linspace(*x_range, num_inducing_points, dtype='float32'),
                np.linspace(*x_range, num_inducing_points, dtype='float32')),
            unconstrained_observation_noise_variance_initializer=(
                tf.constant_initializer(noise))))

    model.compile(optimizer=optimizers.Adam(lr=0.01),
                  loss='mse',
                  metrics=['mae', 'mse'])
    #model.compile(optimizer='adam',loss='mse',metrics=['mae'])
    return model
Example #2
0
  def __init__(self,
               num_classes,
               per_class_kernel,
               initial_linear_bias,
               initial_linear_slope,
               name='vgp_kernel',
               **kwargs):
    super(LinearKernelFn, self).__init__(**kwargs)
    self._per_class_kernel = per_class_kernel
    self._initial_linear_bias = initial_linear_bias
    self._initial_linear_slope = initial_linear_slope

    with tf.compat.v1.variable_scope(name):
      if self._per_class_kernel and num_classes > 1:
        shape = (num_classes,)
      else:
        shape = ()

      self._linear_bias = self.add_variable(
          initializer=tf.constant_initializer(self._initial_linear_bias),
          shape=shape,
          name='linear_bias')
      self._linear_slope = self.add_variable(
          initializer=tf.constant_initializer(self._initial_linear_slope),
          shape=shape,
          name='linear_slope')
    def __init__(self,
                 num_classes,
                 degree,
                 per_class_kernel,
                 feature_size,
                 initial_amplitude,
                 initial_length_scale,
                 initial_linear_bias,
                 initial_linear_slope,
                 add_linear=False,
                 name='vgp_kernel',
                 **kwargs):
        super(MaternKernelFn, self).__init__(**kwargs)
        self._per_class_kernel = per_class_kernel
        self._initial_linear_bias = initial_linear_bias
        self._initial_linear_slope = initial_linear_slope
        self._add_linear = add_linear

        if degree not in [1, 3, 5]:
            raise ValueError(
                'Matern degree must be one of [1, 3, 5]: {}'.format(degree))

        self._degree = degree

        with tf.compat.v1.variable_scope(name):
            if self._per_class_kernel and num_classes > 1:
                amplitude_shape = (num_classes, )
                length_scale_shape = (num_classes, feature_size)
            else:
                amplitude_shape = ()
                length_scale_shape = (feature_size, )

            self._amplitude = self.add_variable(
                initializer=tf.constant_initializer(initial_amplitude),
                shape=amplitude_shape,
                name='amplitude')

            self._length_scale = self.add_variable(
                initializer=tf.constant_initializer(initial_length_scale),
                shape=length_scale_shape,
                name='length_scale')

            if self._add_linear:
                self._linear_bias = self.add_variable(
                    initializer=tf.constant_initializer(
                        self._initial_linear_bias),
                    shape=amplitude_shape,
                    name='linear_bias')
                self._linear_slope = self.add_variable(
                    initializer=tf.constant_initializer(
                        self._initial_linear_slope),
                    shape=amplitude_shape,
                    name='linear_slope')
  def __init__(self, **kwargs):
    super(RBFKernelFn, self).__init__(**kwargs)
    dtype = kwargs.get('dtype', None)

    self._amplitude = self.add_variable(
            initializer=tf.constant_initializer(0),
            dtype=dtype,
            name='amplitude')
    
    self._length_scale = self.add_variable(
            initializer=tf.constant_initializer(0),
            dtype=dtype,
            name='length_scale')
Example #5
0
 def build(self, unused_input_shape):
     """Initialize impulse response."""
     if self.trainable:
         self._gain = self.add_weight(
             name='gain',
             shape=[1],
             dtype=tf.float32,
             initializer=tf.constant_initializer(2.0))
         self._decay = self.add_weight(
             name='decay',
             shape=[1],
             dtype=tf.float32,
             initializer=tf.constant_initializer(4.0))
     self.built = True
Example #6
0
 def build(self, input_shape):
     """Initialize impulse response."""
     super(ExpDecayReverb, self).build(input_shape)
     if self.trainable:
         self._gain = self.add_weight(
             name='gain',
             shape=[1],
             dtype=tf.float32,
             initializer=tf.constant_initializer(2.0))
         self._decay = self.add_weight(
             name='decay',
             shape=[1],
             dtype=tf.float32,
             initializer=tf.constant_initializer(4.0))
Example #7
0
def MnistTeacher(input,keep_prob_conv, keep_prob_hidden, scope = 'Mnist',reuse = False):
  with tf2.variable_creator_scope(scope, reuse = reuse) as sc:
    with slim.arg_scope([slim.conv2d],kernel_size = [3,3],stride = [1,1], biases_initializer = tf2.constant_initializer(0.0),activation_fn= tf2.nn.relu):

      net = slim.conv2d(input ,32,scope= 'conv1')
      net = slim.max_pool2d(net,[2,2],2, scope= 'pool1')
      net = tf2.nn.dropout(net,kepp_prob_conv)


      net = slim.conv2d(net,64, scope='conv2')
      net = slim.max_pool2d(net,[2,2,],2, scope='pool2')
      net = tf2.nn.dropout(net,keep_prob_conv)

      net = slim.conv2d(net, 128, scope='conv3' )
      net = slim.max_pool2d(net,[2,2],2, scope='pool3')
      net = tf2.nn.dropout(net, keep_prob_conv)

      net = slim.flatten(net)
    with slim.arg_scope([slim.fully_connected],biases_initializer = tf2.constant_initializer(0,0),activation_fn = tf2.nn.relu):
     net = slim.fully_connected(net, 625, scope = 'fc1')
     net = tf2.nn.dropout(net, keep_prob_hidden)
     net = slim.fully_connected(net, 10, activation_fn= None, scope= 'fc2')

     net = tf2.nn.softmax(net,temperature)
     return net
 def _build_class_net_layers(self, batch_norm_relu):
     """Build re-usable layers for class prediction network."""
     self._class_predict = tf.keras.layers.Conv2D(
         self._num_classes * self._anchors_per_location,
         kernel_size=(3, 3),
         bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) /
                                                          0.01)),
         kernel_initializer=tf.keras.initializers.RandomNormal(stddev=1e-5),
         padding='same',
         name='class-predict')
     self._class_conv = []
     self._class_batch_norm_relu = {}
     for i in range(self._num_convs):
         self._class_conv.append(
             tf.keras.layers.Conv2D(
                 self._num_filters,
                 kernel_size=(3, 3),
                 bias_initializer=tf.zeros_initializer(),
                 kernel_initializer=tf.keras.initializers.RandomNormal(
                     stddev=0.01),
                 activation=None,
                 padding='same',
                 name='class-' + str(i)))
         for level in range(self._min_level, self._max_level + 1):
             name = self._class_net_batch_norm_name(i, level)
             self._class_batch_norm_relu[name] = batch_norm_relu(name=name)
Example #9
0
def MnistStudent (input, scope = "Mnist", reuse = False):
  with tf2.variable_creator_scope(scope, reuse = reuse) as sc:
    with slim.arg_scope([slim.fully_connected], biases_initializer = tf2.constant_initializer(0,0), activation_fn = tf2.nn.sigmoid):


      net = slim.fully_connected(input, 1000,scope='fc1')
      net = slim.fully_connected(net, 10, activation_fn= None, scope= 'fc2')
      return net
    def __init__(self,
                 num_classes,
                 per_class_kernel,
                 feature_size,
                 initial_amplitude,
                 initial_length_scale,
                 initial_linear_bias,
                 initial_linear_slope,
                 add_linear=False,
                 name='vgp_kernel',
                 **kwargs):
        super(RBFKernelFn, self).__init__(**kwargs)
        self._per_class_kernel = per_class_kernel
        self._initial_linear_bias = initial_linear_bias
        self._initial_linear_slope = initial_linear_slope
        self._add_linear = add_linear

        with tf.compat.v1.variable_scope(name):
            if self._per_class_kernel and num_classes > 1:
                amplitude_shape = (num_classes, )
                length_scale_shape = (num_classes, feature_size)
            else:
                amplitude_shape = ()
                length_scale_shape = (feature_size, )

            self._amplitude = self.add_variable(
                initializer=tf.constant_initializer(initial_amplitude),
                shape=amplitude_shape,
                name='amplitude')

            self._length_scale = self.add_variable(
                initializer=tf.constant_initializer(initial_length_scale),
                shape=length_scale_shape,
                name='length_scale')

            if self._add_linear:
                self._linear_bias = self.add_variable(
                    initializer=tf.constant_initializer(
                        self._initial_linear_bias),
                    shape=amplitude_shape,
                    name='linear_bias')
                self._linear_slope = self.add_variable(
                    initializer=tf.constant_initializer(
                        self._initial_linear_slope),
                    shape=amplitude_shape,
                    name='linear_slope')
    def __init__(self,
                 num_classes,
                 num_downsample_channels,
                 mask_crop_size,
                 num_convs,
                 coarse_mask_thr,
                 gt_upsample_scale,
                 batch_norm_relu=nn_ops.BatchNormRelu):
        """Initialize params to build ShapeMask coarse and fine prediction head.

    Args:
      num_classes: `int` number of mask classification categories.
      num_downsample_channels: `int` number of filters at mask head.
      mask_crop_size: feature crop size.
      num_convs: `int` number of stacked convolution before the last prediction
        layer.
      coarse_mask_thr: the threshold for suppressing noisy coarse prediction.
      gt_upsample_scale: scale for upsampling groundtruths.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._mask_num_classes = num_classes
        self._num_downsample_channels = num_downsample_channels
        self._mask_crop_size = mask_crop_size
        self._num_convs = num_convs
        self._coarse_mask_thr = coarse_mask_thr
        self._gt_upsample_scale = gt_upsample_scale

        self._class_predict_conv = tf.keras.layers.Conv2D(
            self._mask_num_classes,
            kernel_size=(1, 1),
            # Focal loss bias initialization to have foreground 0.01 probability.
            bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) /
                                                             0.01)),
            kernel_initializer=tf.keras.initializers.RandomNormal(mean=0,
                                                                  stddev=0.01),
            padding='same',
            name='affinity-class-predict')
        self._upsample_conv = tf.keras.layers.Conv2DTranspose(
            self._num_downsample_channels // 2,
            (self._gt_upsample_scale, self._gt_upsample_scale),
            (self._gt_upsample_scale, self._gt_upsample_scale))
        self._fine_class_conv = []
        self._fine_class_bn = []
        for i in range(self._num_convs):
            self._fine_class_conv.append(
                tf.keras.layers.Conv2D(
                    self._num_downsample_channels,
                    kernel_size=(3, 3),
                    bias_initializer=tf.zeros_initializer(),
                    kernel_initializer=tf.keras.initializers.RandomNormal(
                        stddev=0.01),
                    activation=None,
                    padding='same',
                    name='fine-class-%d' % i))
            self._fine_class_bn.append(
                batch_norm_relu(name='fine-class-%d-bn' % i))
Example #12
0
    def set_initial_weights(self, mean1, mean2, rot1, rot2):
        if not isinstance(mean1, np.ndarray) and mean1.shape[0] == 1:  # pytype: disable=attribute-error
            raise TypeError('mean1 matrix has the wrong size (%s)' %
                            mean2.shape)  # pytype: disable=attribute-error
        if not isinstance(mean2, np.ndarray) and mean2.shape[0] == 1:  # pytype: disable=attribute-error
            raise TypeError('mean2 matrix has the wrong size (%s)' %
                            mean2.shape)  # pytype: disable=attribute-error
        if not isinstance(rot1,
                          np.ndarray) and rot1.shape[1] == self.output_dims:
            raise TypeError('rot1 matrix has the wrong size (%s not %s)' %
                            (rot1.shape, self.output_dims))
        if not isinstance(rot2,
                          np.ndarray) and rot2.shape[1] == self.output_dims:
            raise TypeError('rot2 matrix has the wrong size (%s)' % rot2.shape)

        self._mean1_init = tf.constant_initializer(mean1)
        self._mean2_init = tf.constant_initializer(mean2)
        self._rot1_init = tf.constant_initializer(rot1)
        self._rot2_init = tf.constant_initializer(rot2)

        self.set_weights([mean1, mean2, rot1, rot2])
Example #13
0
 def build(self, unused_input_shape):
     """Initialize impulse response."""
     if self.trainable:
         initializer = tf.random_normal_initializer(mean=0, stddev=1e-2)
         self._magnitudes = self.add_weight(name='magnitudes',
                                            shape=[1, self._n_filter_banks],
                                            dtype=tf.float32,
                                            initializer=initializer)
         self._decay = self.add_weight(
             name='decay',
             shape=[1],
             dtype=tf.float32,
             initializer=tf.constant_initializer(4.0))
     self.built = True
Example #14
0
def build_dummy_sequential_net(fc_layer_params, action_spec):
    """Build a dummy sequential network."""
    num_actions = action_spec.maximum - action_spec.minimum + 1

    logits = functools.partial(
        tf.keras.layers.Dense,
        activation=None,
        kernel_initializer=tf.random_uniform_initializer(minval=-0.03,
                                                         maxval=0.03),
        bias_initializer=tf.constant_initializer(-0.2))

    dense = functools.partial(
        tf.keras.layers.Dense,
        activation=tf.keras.activations.relu,
        kernel_initializer=tf.compat.v1.variance_scaling_initializer(
            scale=2.0, mode='fan_in', distribution='truncated_normal'))

    return sequential.Sequential(
        [dense(num_units)
         for num_units in fc_layer_params] + [logits(num_actions)])
Example #15
0
    def _get_embedding_layer(self, pretrained_embed_path, oov_buckets_size,
                             vocab_size, embed_dim):
        """Get word embedding layer.

    Args:
      pretrained_embed_path: Pretrained glove embedding path.
      oov_buckets_size: Out-of-vocabularies bucket size.
      vocab_size: vocabulary size (used if pretrained_embed_path is None).
      embed_dim: the dimension of word embeddings ( used if
        pretrained_embed_path is None).

    Returns:
      A tf.keras.layers.Embedding instance.
    """
        if pretrained_embed_path:
            with tf.io.gfile.GFile(pretrained_embed_path, 'rb') as f:
                floats_np = np.load(f)
            vocab_size = floats_np.shape[0]
            embed_dim = floats_np.shape[1]
            # Initialize word embeddings
            init_tensor = tf.constant(floats_np)
            oov_init = tf.compat.v1.truncated_normal_initializer(stddev=0.01)(
                shape=(oov_buckets_size, embed_dim), dtype=tf.float32)
            init_tensor = tf.concat([init_tensor, oov_init], axis=0)
        else:
            init_tensor = tf.compat.v1.truncated_normal_initializer(
                stddev=0.01)(shape=(vocab_size + oov_buckets_size, embed_dim),
                             dtype=tf.float32)

        embeddings_initializer = tf.constant_initializer(init_tensor.numpy())
        # Now the init_tensor should have shape
        # [vocab_size+_OOV_BUCKETS_SIZE, embed_dim]
        return tf.keras.layers.Embedding(
            vocab_size + oov_buckets_size,
            embed_dim,
            embeddings_initializer=embeddings_initializer,
            mask_zero=True,
            name='embedding')
tf.keras.backend.set_floatx('float64')

# Build model.
# points to sample your data range
num_inducing_points = 40

model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(train_data.shape[1],),dtype='float32'),
    tf.keras.layers.Dense(12, kernel_initializer='zeros', use_bias=False),
    tfp.layers.VariationalGaussianProcess(
        num_inducing_points=num_inducing_points,
        kernel_provider=RBFKernelFn(),
        event_shape=[2],
        # inducing_index_points_initializer=tf.constant_initializer(np.linspace((min(train_data.T[0]),min(train_data.T[1]),min(train_data.T[2])),(max(train_data.T[0]),max(train_data.T[1]),max(train_data.T[2])),num_inducing_points,dtype='float32')),
        #change initializer dim for multiple outputs
        inducing_index_points_initializer=tf.constant_initializer([np.linspace(*x_range,num_inducing_points,dtype='float32'),np.linspace(*x_range,num_inducing_points,dtype='float32')]),
        unconstrained_observation_noise_variance_initializer=(
            tf.constant_initializer(0.1))
            # tf.constant_initializer(0.1)),
    ),
])


batch_size=264

# batch_size = 64
loss = lambda y, rv_y: rv_y.variational_loss(
    y, kl_weight=np.array(batch_size) / train_data.shape[0])

model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss=loss,metrics=['mae','mse'])
    )

# For numeric stability, set the default floating-point dtype to float64
tf.keras.backend.set_floatx('float64')

# Build model.
num_inducing_points = 40
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=[1]),
    tf.keras.layers.Dense(1, kernel_initializer='ones', use_bias=False),
    tfp.layers.VariationalGaussianProcess(
        num_inducing_points=num_inducing_points,
        kernel_provider=RBFKernelFn(),
        event_shape=[1],
        inducing_index_points_initializer=tf.constant_initializer(
            np.linspace(*x_range, num=num_inducing_points,
                        dtype=x.dtype)[..., np.newaxis]),
        unconstrained_observation_noise_variance_initializer=(
            tf.constant_initializer(np.array(0.54).astype(x.dtype))),
    ),
])

# Do inference.
batch_size = 32
loss = lambda y, rv_y: rv_y.variational_loss(
    y, kl_weight=np.array(batch_size, x.dtype) / x.shape[0])
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=loss)
model.fit(x, y, batch_size=batch_size, epochs=1000, verbose=False)

# Profit.
yhat = model(x_tst)
Example #18
0
def main(argv):
    del argv  # unused arg
    np.random.seed(FLAGS.seed)
    tf.random.set_seed(FLAGS.seed)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    tf1.disable_v2_behavior()

    session = tf1.Session()
    with session.as_default():
        x_train, y_train, x_test, y_test = datasets.load(session)
        n_train = x_train.shape[0]

        num_classes = int(np.amax(y_train)) + 1
        if not FLAGS.resnet:
            model = lenet5(n_train, x_train.shape[1:], num_classes)
        else:
            datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=90,
                width_shift_range=0.1,
                height_shift_range=0.1,
                horizontal_flip=True)
            datagen.fit(x_train)
            model = res_net(n_train,
                            x_train.shape[1:],
                            num_classes,
                            batchnorm=FLAGS.batchnorm,
                            variational='hybrid' if FLAGS.hybrid else 'full')

            def schedule_fn(epoch):
                """Learning rate schedule function."""
                rate = FLAGS.learning_rate
                if epoch > 180:
                    rate *= 0.5e-3
                elif epoch > 160:
                    rate *= 1e-3
                elif epoch > 120:
                    rate *= 1e-2
                elif epoch > 80:
                    rate *= 1e-1
                return float(rate)

            lr_callback = tf.keras.callbacks.LearningRateScheduler(schedule_fn)

        for l in model.layers:
            l.kl_cost_weight = l.add_weight(
                name='kl_cost_weight',
                shape=(),
                initializer=tf.constant_initializer(0.),
                trainable=False)
            l.kl_cost_bias = l.add_variable(
                name='kl_cost_bias',
                shape=(),
                initializer=tf.constant_initializer(0.),
                trainable=False)

        [negative_log_likelihood, accuracy, log_likelihood, kl,
         elbo] = get_losses_and_metrics(model, n_train)

        metrics = [elbo, log_likelihood, kl, accuracy]

        tensorboard = tf1.keras.callbacks.TensorBoard(
            log_dir=FLAGS.output_dir,
            update_freq=FLAGS.batch_size * FLAGS.validation_freq)
        if FLAGS.resnet:
            callbacks = [tensorboard, lr_callback]
        else:
            callbacks = [tensorboard]

        if not FLAGS.resnet or not FLAGS.data_augmentation:

            def fit_fn(model,
                       steps,
                       initial_epoch=0,
                       with_lr_schedule=FLAGS.resnet):
                return model.fit(
                    x=x_train,
                    y=y_train,
                    batch_size=FLAGS.batch_size,
                    epochs=initial_epoch +
                    (FLAGS.batch_size * steps) // n_train,
                    initial_epoch=initial_epoch,
                    validation_data=(x_test, y_test),
                    validation_freq=(
                        (FLAGS.validation_freq * FLAGS.batch_size) // n_train),
                    verbose=1,
                    callbacks=callbacks if with_lr_schedule else [tensorboard])
        else:

            def fit_fn(model,
                       steps,
                       initial_epoch=0,
                       with_lr_schedule=FLAGS.resnet):
                return model.fit_generator(
                    datagen.flow(x_train, y_train,
                                 batch_size=FLAGS.batch_size),
                    epochs=initial_epoch +
                    (FLAGS.batch_size * steps) // n_train,
                    initial_epoch=initial_epoch,
                    steps_per_epoch=n_train // FLAGS.batch_size,
                    validation_data=(x_test, y_test),
                    validation_freq=max(
                        (FLAGS.validation_freq * FLAGS.batch_size) // n_train,
                        1),
                    verbose=1,
                    callbacks=callbacks if with_lr_schedule else [tensorboard])

        model.compile(
            optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)),
            loss=negative_log_likelihood,
            metrics=metrics)
        session.run(tf1.initialize_all_variables())

        train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train
        fit_fn(model, FLAGS.training_steps)

        labels = tf.keras.layers.Input(shape=y_train.shape[1:])
        ll = tf.keras.backend.function([model.input, labels], [
            model.output.distribution.log_prob(tf.squeeze(labels)),
            model.output.distribution.logits
        ])

        base_metrics = [
            ensemble_metrics(x_train, y_train, model, ll),
            ensemble_metrics(x_test, y_test, model, ll)
        ]
        model_dir = os.path.join(FLAGS.output_dir, 'models')
        tf.io.gfile.makedirs(model_dir)
        base_model_filename = os.path.join(model_dir, 'base_model.weights')
        model.save_weights(base_model_filename)

        # Train base model further for comparison.
        fit_fn(model,
               FLAGS.n_auxiliary_variables *
               FLAGS.auxiliary_sampling_frequency * FLAGS.ensemble_size,
               initial_epoch=train_epochs)

        overtrained_metrics = [
            ensemble_metrics(x_train, y_train, model, ll),
            ensemble_metrics(x_test, y_test, model, ll)
        ]

        # Perform refined VI.
        sample_op = []
        for l in model.layers:
            if isinstance(
                    l, tfp.layers.DenseLocalReparameterization) or isinstance(
                        l, tfp.layers.Convolution2DFlipout):
                weight_op, weight_cost = sample_auxiliary_op(
                    l.kernel_prior.distribution,
                    l.kernel_posterior.distribution,
                    FLAGS.auxiliary_variance_ratio)
                sample_op.append(weight_op)
                sample_op.append(l.kl_cost_weight.assign_add(weight_cost))
                # Fix the variance of the prior
                session.run(l.kernel_prior.distribution.istrainable.assign(0.))
                if hasattr(l.bias_prior, 'distribution'):
                    bias_op, bias_cost = sample_auxiliary_op(
                        l.bias_prior.distribution,
                        l.bias_posterior.distribution,
                        FLAGS.auxiliary_variance_ratio)
                    sample_op.append(bias_op)
                    sample_op.append(l.kl_cost_bias.assign_add(bias_cost))
                    # Fix the variance of the prior
                    session.run(
                        l.bias_prior.distribution.istrainable.assign(0.))

        ensemble_filenames = []
        for i in range(FLAGS.ensemble_size):
            model.load_weights(base_model_filename)
            for j in range(FLAGS.n_auxiliary_variables):
                session.run(sample_op)
                model.compile(
                    optimizer=tf.keras.optimizers.Adam(
                        # The learning rate is proportional to the scale of the prior.
                        lr=float(FLAGS.learning_rate_for_sampling *
                                 np.sqrt(1. -
                                         FLAGS.auxiliary_variance_ratio)**j)),
                    loss=negative_log_likelihood,
                    metrics=metrics)
                fit_fn(model,
                       FLAGS.auxiliary_sampling_frequency,
                       initial_epoch=train_epochs,
                       with_lr_schedule=False)
            ensemble_filename = os.path.join(
                model_dir, 'ensemble_component_' + str(i) + '.weights')
            ensemble_filenames.append(ensemble_filename)
            model.save_weights(ensemble_filename)

        auxiliary_metrics = [
            ensemble_metrics(x_train,
                             y_train,
                             model,
                             ll,
                             weight_files=ensemble_filenames),
            ensemble_metrics(x_test,
                             y_test,
                             model,
                             ll,
                             weight_files=ensemble_filenames)
        ]

        for metrics, name in [(base_metrics, 'Base model'),
                              (overtrained_metrics, 'Overtrained model'),
                              (auxiliary_metrics, 'Auxiliary sampling')]:
            logging.info(name)
            for metrics_dict, split in [(metrics[0], 'Training'),
                                        (metrics[1], 'Testing')]:
                logging.info(split)
                for metric_name in metrics_dict:
                    logging.info('%s: %s', metric_name,
                                 metrics_dict[metric_name])
Example #19
0
def EfficientNetV2(
    width_coefficient,
    depth_coefficient,
    default_size,
    dropout_rate=0.2,
    drop_connect_rate=0.2,
    depth_divisor=8,
    min_depth=8,
    bn_momentum=0.9,
    activation="swish",
    blocks_args="default",
    model_name="efficientnetv2",
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
):
    """Instantiates the EfficientNetV2 architecture using given scaling coefficients.

  Args:
    width_coefficient: float, scaling coefficient for network width.
    depth_coefficient: float, scaling coefficient for network depth.
    default_size: integer, default input image size.
    dropout_rate: float, dropout rate before final classifier layer.
    drop_connect_rate: float, dropout rate at skip connections.
    depth_divisor: integer, a unit of network width.
    min_depth: integer, minimum number of filters.
    bn_momentum: float. Momentum parameter for Batch Normalization layers.
    activation: activation function.
    blocks_args: list of dicts, parameters to construct block modules.
    model_name: string, model name.
    include_top: whether to include the fully-connected layer at the top of the
      network.
    weights: one of `None` (random initialization), `"imagenet"` (pre-training
      on ImageNet), or the path to the weights file to be loaded.
    input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) or
      numpy array to use as image input for the model.
    input_shape: optional shape tuple, only to be specified if `include_top` is
      False. It should have exactly 3 inputs channels.
    pooling: optional pooling mode for feature extraction when `include_top` is
      `False`. - `None` means that the output of the model will be the 4D tensor
      output of the last convolutional layer. - "avg" means that global average
      pooling will be applied to the output of the last convolutional layer, and
      thus the output of the model will be a 2D tensor. - `"max"` means that
      global max pooling will be applied.
    classes: optional number of classes to classify images into, only to be
      specified if `include_top` is True, and if no `weights` argument is
      specified.
    classifier_activation: A string or callable. The activation function to use
      on the `"top"` layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the `"top"` layer.
    include_preprocessing: Boolean, whether to include the preprocessing layer
      (`Rescaling`) at the bottom of the network. Defaults to `True`.

  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `"softmax"` or `None` when
      using a pretrained top layer.
  """

    if blocks_args == "default":
        blocks_args = DEFAULT_BLOCKS_ARGS[model_name]

    if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)):
        raise ValueError("The `weights` argument should be either "
                         "`None` (random initialization), `imagenet` "
                         "(pre-training on ImageNet), "
                         "or the path to the weights file to be loaded."
                         f"Received: weights={weights}")

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            "If using `weights` as `'imagenet'` with `include_top`"
            " as true, `classes` should be 1000"
            f"Received: classes={classes}")

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=default_size,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    x = img_input

    if include_preprocessing:
        # Apply original V1 preprocessing for Bx variants
        # if number of channels allows it
        num_channels = input_shape[bn_axis - 1]
        if model_name.split("-")[-1].startswith("b") and num_channels == 3:
            x = layers.Rescaling(scale=1. / 255)(x)
            x = layers.Normalization(
                mean=[0.485, 0.456, 0.406],
                variance=[0.229**2, 0.224**2, 0.225**2],
                axis=bn_axis,
            )(x)
        else:
            x = layers.Rescaling(scale=1. / 128.0, offset=-1)(x)

    # Build stem
    stem_filters = round_filters(
        filters=blocks_args[0]["input_filters"],
        width_coefficient=width_coefficient,
        min_depth=min_depth,
        depth_divisor=depth_divisor,
    )
    x = layers.Conv2D(
        filters=stem_filters,
        kernel_size=3,
        strides=2,
        kernel_initializer=CONV_KERNEL_INITIALIZER,
        padding="same",
        use_bias=False,
        name="stem_conv",
    )(x)
    x = layers.BatchNormalization(
        axis=bn_axis,
        momentum=bn_momentum,
        name="stem_bn",
    )(x)
    x = layers.Activation(activation, name="stem_activation")(x)

    # Build blocks
    blocks_args = copy.deepcopy(blocks_args)
    b = 0
    blocks = float(sum(args["num_repeat"] for args in blocks_args))

    for (i, args) in enumerate(blocks_args):
        assert args["num_repeat"] > 0

        # Update block input and output filters based on depth multiplier.
        args["input_filters"] = round_filters(
            filters=args["input_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor)
        args["output_filters"] = round_filters(
            filters=args["output_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor)

        # Determine which conv type to use:
        block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")]
        repeats = round_repeats(repeats=args.pop("num_repeat"),
                                depth_coefficient=depth_coefficient)
        for j in range(repeats):
            # The first block needs to take care of stride and filter size increase.
            if j > 0:
                args["strides"] = 1
                args["input_filters"] = args["output_filters"]

            x = block(
                activation=activation,
                bn_momentum=bn_momentum,
                survival_probability=drop_connect_rate * b / blocks,
                name="block{}{}_".format(i + 1, chr(j + 97)),
                **args,
            )(x)
            b += 1

    # Build top
    top_filters = round_filters(filters=1280,
                                width_coefficient=width_coefficient,
                                min_depth=min_depth,
                                depth_divisor=depth_divisor)
    x = layers.Conv2D(
        filters=top_filters,
        kernel_size=1,
        strides=1,
        kernel_initializer=CONV_KERNEL_INITIALIZER,
        padding="same",
        data_format="channels_last",
        use_bias=False,
        name="top_conv",
    )(x)
    x = layers.BatchNormalization(
        axis=bn_axis,
        momentum=bn_momentum,
        name="top_bn",
    )(x)
    x = layers.Activation(activation=activation, name="top_activation")(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         kernel_initializer=DENSE_KERNEL_INITIALIZER,
                         bias_initializer=tf.constant_initializer(0),
                         name="predictions")(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name=model_name)

    # Load weights.
    if weights == "imagenet":
        if include_top:
            file_suffix = ".h5"
            file_hash = WEIGHTS_HASHES[model_name[-2:]][0]
        else:
            file_suffix = "_notop.h5"
            file_hash = WEIGHTS_HASHES[model_name[-2:]][1]
        file_name = model_name + file_suffix
        weights_path = data_utils.get_file(file_name,
                                           BASE_WEIGHTS_PATH + file_name,
                                           cache_subdir="models",
                                           file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Example #20
0
def main(argv):
    del argv  # unused arg
    np.random.seed(FLAGS.seed)
    tf.random.set_seed(FLAGS.seed)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    tf1.disable_v2_behavior()

    session = tf1.Session()
    with session.as_default():
        x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset)
        n_train = x_train.shape[0]

        model = multilayer_perceptron(
            n_train, x_train.shape[1:],
            np.std(y_train) + tf.keras.backend.epsilon())
        for l in model.layers:
            l.kl_cost_weight = l.add_weight(
                name='kl_cost_weight',
                shape=(),
                initializer=tf.constant_initializer(0.),
                trainable=False)
            l.kl_cost_bias = l.add_variable(
                name='kl_cost_bias',
                shape=(),
                initializer=tf.constant_initializer(0.),
                trainable=False)

        [negative_log_likelihood, mse, log_likelihood, kl,
         elbo] = get_losses_and_metrics(model, n_train)
        metrics = [elbo, log_likelihood, kl, mse]

        tensorboard = tf1.keras.callbacks.TensorBoard(
            log_dir=FLAGS.output_dir,
            update_freq=FLAGS.batch_size * FLAGS.validation_freq)

        def fit_fn(model, steps, initial_epoch):
            return model.fit(
                x=x_train,
                y=y_train,
                batch_size=FLAGS.batch_size,
                epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train,
                initial_epoch=initial_epoch,
                validation_data=(x_test, y_test),
                validation_freq=max(
                    (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1),
                verbose=1,
                callbacks=[tensorboard])

        model.compile(
            optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)),
            loss=negative_log_likelihood,
            metrics=metrics)
        session.run(tf1.initialize_all_variables())

        train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train
        fit_fn(model, FLAGS.training_steps, initial_epoch=0)

        labels = tf.keras.layers.Input(shape=y_train.shape[1:])
        ll = tf.keras.backend.function([model.input, labels], [
            model.output.distribution.log_prob(labels),
            model.output.distribution.loc - labels
        ])

        base_metrics = [
            utils.ensemble_metrics(x_train, y_train, model, ll),
            utils.ensemble_metrics(x_test, y_test, model, ll),
        ]
        model_dir = os.path.join(FLAGS.output_dir, 'models')
        tf.io.gfile.makedirs(model_dir)
        base_model_filename = os.path.join(model_dir, 'base_model.weights')
        model.save_weights(base_model_filename)

        # Train base model further for comparison.
        fit_fn(model,
               FLAGS.n_auxiliary_variables *
               FLAGS.auxiliary_sampling_frequency * FLAGS.ensemble_size,
               initial_epoch=train_epochs)

        overtrained_metrics = [
            utils.ensemble_metrics(x_train, y_train, model, ll),
            utils.ensemble_metrics(x_test, y_test, model, ll),
        ]

        # Perform refined VI.
        sample_op = []
        for l in model.layers:
            if hasattr(l, 'kernel_prior'):
                weight_op, weight_cost = sample_auxiliary_op(
                    l.kernel_prior.distribution,
                    l.kernel_posterior.distribution,
                    FLAGS.auxiliary_variance_ratio)
                sample_op.append(weight_op)
                sample_op.append(l.kl_cost_weight.assign_add(weight_cost))
                # Fix the variance of the prior
                session.run(l.kernel_prior.distribution.istrainable.assign(0.))
                if hasattr(l.bias_prior, 'distribution'):
                    bias_op, bias_cost = sample_auxiliary_op(
                        l.bias_prior.distribution,
                        l.bias_posterior.distribution,
                        FLAGS.auxiliary_variance_ratio)
                    sample_op.append(bias_op)
                    sample_op.append(l.kl_cost_bias.assign_add(bias_cost))
                    # Fix the variance of the prior
                    session.run(
                        l.bias_prior.distribution.istrainable.assign(0.))

        ensemble_filenames = []
        for i in range(FLAGS.ensemble_size):
            model.load_weights(base_model_filename)
            for j in range(FLAGS.n_auxiliary_variables):
                session.run(sample_op)
                model.compile(
                    optimizer=tf.keras.optimizers.Adam(
                        # The learning rate is proportional to the scale of the prior.
                        lr=float(FLAGS.learning_rate_for_sampling *
                                 np.sqrt(1. -
                                         FLAGS.auxiliary_variance_ratio)**j)),
                    loss=negative_log_likelihood,
                    metrics=metrics)
                fit_fn(model,
                       FLAGS.auxiliary_sampling_frequency,
                       initial_epoch=train_epochs)
            ensemble_filename = os.path.join(
                model_dir, 'ensemble_component_' + str(i) + '.weights')
            ensemble_filenames.append(ensemble_filename)
            model.save_weights(ensemble_filename)

        auxiliary_metrics = [
            utils.ensemble_metrics(x_train,
                                   y_train,
                                   model,
                                   ll,
                                   weight_files=ensemble_filenames),
            utils.ensemble_metrics(x_test,
                                   y_test,
                                   model,
                                   ll,
                                   weight_files=ensemble_filenames),
        ]

        for metrics, name in [(base_metrics, 'Base model'),
                              (overtrained_metrics, 'Overtrained model'),
                              (auxiliary_metrics, 'Auxiliary sampling')]:
            logging.info(name)
            for metrics_dict, split in [(metrics[0], 'train'),
                                        (metrics[1], 'test')]:
                logging.info(split)
                for metric_name in metrics_dict:
                    logging.info('%s: %s', metric_name,
                                 metrics_dict[metric_name])
    def __call__(self,
                 crop_features,
                 detection_priors,
                 inst_classes,
                 is_training=None):
        """Generate instance masks from FPN features and detection priors.

    This corresponds to the Fig. 5-6 of the ShapeMask paper at
    https://arxiv.org/pdf/1904.03239.pdf

    Args:
      crop_features: a float Tensor of shape [batch_size * num_instances,
        mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
        instance feature crop.
      detection_priors: a float Tensor of shape [batch_size * num_instances,
        mask_crop_size, mask_crop_size, 1]. This is the detection prior for
        the instance.
      inst_classes: a int Tensor of shape [batch_size, num_instances]
        of instance classes.
      is_training: a bool indicating whether in training mode.

    Returns:
      mask_outputs: instance mask prediction as a float Tensor of shape
        [batch_size * num_instances, mask_size, mask_size, num_classes].
    """
        # Embed the anchor map into some feature space for anchor conditioning.
        detection_prior_features = tf.keras.layers.Conv2D(
            self._num_downsample_channels,
            kernel_size=(1, 1),
            bias_initializer=tf.zeros_initializer(),
            kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.,
                                                                  stddev=0.01),
            padding='same',
            name='anchor-conv')(detection_priors)

        prior_conditioned_features = crop_features + detection_prior_features
        coarse_output_features = self.coarsemask_decoder_net(
            prior_conditioned_features, is_training)

        coarse_mask_classes = tf.keras.layers.Conv2D(
            self._mask_num_classes,
            kernel_size=(1, 1),
            # Focal loss bias initialization to have foreground 0.01 probability.
            bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) /
                                                             0.01)),
            kernel_initializer=tf.keras.initializers.RandomNormal(mean=0,
                                                                  stddev=0.01),
            padding='same',
            name='class-predict')(coarse_output_features)

        if self._use_category_for_mask:
            inst_classes = tf.cast(tf.reshape(inst_classes, [-1]), tf.int32)
            coarse_mask_classes_t = tf.transpose(a=coarse_mask_classes,
                                                 perm=(0, 3, 1, 2))
            # pylint: disable=g-long-lambda
            coarse_mask_logits = tf.cond(
                pred=tf.size(input=inst_classes) > 0,
                true_fn=lambda: tf.gather_nd(
                    coarse_mask_classes_t,
                    tf.stack([
                        tf.range(tf.size(input=inst_classes)), inst_classes - 1
                    ],
                             axis=1)),
                false_fn=lambda: coarse_mask_classes_t[:, 0, :, :])
            # pylint: enable=g-long-lambda
            coarse_mask_logits = tf.expand_dims(coarse_mask_logits, -1)
        else:
            coarse_mask_logits = coarse_mask_classes

        coarse_class_probs = tf.nn.sigmoid(coarse_mask_logits)
        class_probs = tf.cast(coarse_class_probs,
                              prior_conditioned_features.dtype)

        return coarse_mask_classes, class_probs, prior_conditioned_features
Example #22
0
        collect_actor.run()
        dqn_learner.run(iterations=1)

        if eval_interval and dqn_learner.train_step_numpy % eval_interval == 0:
            logging.info('Evaluating.')
            eval_actor.run_and_log()

    rb_observer.close()
    reverb_server.stop()


logits = functools.partial(tf.keras.layers.Dense,
                           activation=None,
                           kernel_initializer=tf.random_uniform_initializer(
                               minval=-0.03, maxval=0.03),
                           bias_initializer=tf.constant_initializer(-0.2))

dense = functools.partial(
    tf.keras.layers.Dense,
    activation=tf.keras.activations.relu,
    kernel_initializer=tf.compat.v1.variance_scaling_initializer(
        scale=2.0, mode='fan_in', distribution='truncated_normal'))


def main(_):
    logging.set_verbosity(logging.INFO)
    tf.enable_v2_behavior()

    gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_bindings)

    train_eval(FLAGS.root_dir,
Example #23
0
 def std_layers():
   # TODO(b/179510447): align these parameters with Schulman 17.
   std_bias_initializer_value = np.log(np.exp(0.35) - 1)
   return bias_layer.BiasLayer(
       bias_initializer=tf.constant_initializer(
           value=std_bias_initializer_value))