Example #1
0
def make_encoder(params, is_training):

    network_type = params['network_type']

    if network_type == 'fully_connected':
        encoder_ = fully_connected_encoder(params, is_training)
    elif network_type == 'conv':
        encoder_ = conv_encoder(params, is_training)
    elif network_type == 'infoGAN':
        encoder_ = infoGAN_encoder(params, is_training)
    elif network_type == 'resnet':
        encoder_ = resnet_encoder(params, is_training)
    else:
        raise NotImplementedError("Network type not implemented.")

    def encoder_spec():
        x = tf.compat.v1.placeholder(tf.float32, shape=params['full_size'])
        z = encoder_(x)
        hub.add_signature(inputs={'x': x}, outputs={'z': z})

    enc_spec = hub.create_module_spec(encoder_spec)

    encoder = hub.Module(enc_spec, name='encoder', trainable=True)

    hub.register_module_for_export(encoder, "encoder")

    return encoder
Example #2
0
def make_decoder(params, is_training):

    network_type = params['network_type']

    if network_type == 'fully_connected':
        decoder_ = fully_connected_decoder(params, is_training)
    elif network_type == 'conv':
        decoder_ = conv_decoder(params, is_training)
    elif network_type == 'infoGAN':
        decoder_ = infoGAN_decoder(params, is_training)
    elif network_type == 'resnet':
        decoder_ = resnet_decoder(params, is_training)
    else:
        raise NotImplementedError("Network type not implemented.")

    def decoder_spec():
        z = tf.compat.v1.placeholder(tf.float32,
                                     shape=[None, params['latent_size']])
        x = decoder_(z)
        hub.add_signature(inputs={'z': z}, outputs={'x': x})

    dec_spec = hub.create_module_spec(decoder_spec)

    decoder = hub.Module(dec_spec, name='decoder', trainable=True)

    hub.register_module_for_export(decoder, "decoder")

    return decoder
Example #3
0
def model_fn(features, labels, mode, params):
    training = mode == tf.estimator.ModeKeys.TRAIN

    spec = tfhub.create_module_spec(module_fn,
                                    tags_and_args=[
                                        ({'train'}, {
                                            'training': True,
                                            'params': params
                                        }),
                                        (set(), {
                                            'training': False,
                                            'params': params
                                        }),
                                    ])

    tags = {'train'} if training else None
    module = tfhub.Module(spec, trainable=training, tags=tags)
    tfhub.register_module_for_export(module, 'doodle')

    image = features['image']
    image.shape.assert_is_compatible_with([None, 28, 28, 1])
    predictions = module(image, as_dict=True)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                tf.estimator.export.PredictOutput(predictions),
            })

    with tf.variable_scope('losses'):
        cross_entropy_loss = tf.losses.sparse_softmax_cross_entropy(
            labels=labels, logits=predictions['logits'])
        total_loss = tf.losses.get_total_loss()

    tf.summary.image('image', image)
    tf.summary.scalar('total_loss', total_loss)

    metric_ops = metrics.calculate(labels, predictions['classes'],
                                   params['num_classes'])

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=total_loss,
                                          eval_metric_ops=metric_ops)

    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_or_create_global_step()
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.variable_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(params['learning_rate'])
            with tf.control_dependencies(update_ops):
                fit = optimizer.minimize(total_loss, global_step)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=total_loss,
                                          train_op=fit,
                                          eval_metric_ops=metric_ops)
    def body(self, features):
        hparams = self.hparams
        model_opt = {
            'nr_resnet': hparams.nr_resnet,
            'nr_filters': hparams.hidden_size,
            'nr_logistic_mix': 1,
            'resnet_nonlinearity': 'concat_elu',
            'energy_distance': False,
            'dropout_p': hparams.dropout
        }

        def pixel_cnn_fn(input_layer):
            model = tf.make_template('model', model_spec)
            out = model(input_layer, None, ema=None, **model_opt)
            out = tf.layers.dense(out, 2, activation=None)
            loc, scale = tf.split(out, num_or_size_splits=2, axis=-1)
            scale = tf.nn.softplus(scale) + 1e-4
            distribution = tfp.distributions.Independent(
                tfp.distributions.Normal(loc=loc, scale=scale))
            sample = distribution.sample()
            log_prob = distribution.log_prob(input_layer)
            grads = tf.gradients(log_prob, input_layer)[0]
            print(grads)
            output = {
                'sample': sample,
                'log_prob': log_prob,
                'logits': out,
                'mu': loc,
                'scale': scale,
                'grads': grads
            }
            return output

        # During predict, we use a tf hub module, otherwise we stick to normal
        # behavior to allow for multi gpu training
        if hparams.mode == tf.estimator.ModeKeys.PREDICT:

            def make_model_spec():
                input_layer = tf.placeholder(
                    tf.float32, shape=features["inputs"].get_shape())
                outputs = pixel_cnn_fn(input_layer)
                hub.add_signature(inputs=input_layer, outputs=outputs)

            spec = hub.create_module_spec(make_model_spec,
                                          drop_collections=['checkpoints'])
            pixelcnn = hub.Module(spec, name="pixelcnn_module", trainable=True)
            hub.register_module_for_export(pixelcnn, "pixelcnn")
            output = pixelcnn(features["inputs"], as_dict=True)
        else:
            with tf.variable_scope('pixelcnn_module'):
                output = pixel_cnn_fn(features["inputs"])

        self.image_summary("inputs", features["inputs"])
        self.image_summary("samples", output["sample"])

        return output["logits"], {"training": -output["log_prob"]}
Example #5
0
def flow_model_fn(features, labels, mode, params, config):
    """
    Model function to create a VAE estimator
    """
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    if mode == tf.estimator.ModeKeys.PREDICT:
        y = features

        def flow_module_spec():
            cond_layer = tf.placeholder(tf.float32, shape=[None, n_cond])
            flow = params['flow_fn'](cond_layer, is_training)
            hub.add_signature(inputs=cond_layer,
                              outputs=flow.sample(tf.shape(cond_layer)[0]))

        flow_spec = hub.create_module_spec(flow_module_spec)
        flow = hub.Module(flow_spec, name='flow_module')
        hub.register_module_for_export(flow, "code_sampler")
        predictions = {'code': flow(y)}
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    x = features['x']
    y = features['y']

    # Loads the encoding function to work on the images
    code = x

    with tf.variable_scope("flow_module"):
        cond_layer = y
        flow = params['flow_fn'](cond_layer, is_training)
        loglikelihood = flow.log_prob(code)

    # This is the loglikelihood of a batch of images
    tf.summary.scalar('loglikelihood', tf.reduce_mean(loglikelihood))
    loss = -tf.reduce_mean(loglikelihood)

    # Training of the model
    global_step = tf.train.get_or_create_global_step()
    learning_rate = tf.train.cosine_decay(params["learning_rate"], global_step,
                                          params["max_steps"])

    tf.summary.scalar("learning_rate", learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss, global_step=global_step)

    eval_metric_ops = {
        "loglikelihood": tf.metrics.mean(tf.reduce_mean(loglikelihood))
    }

    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
Example #6
0
def create_ict_module(params, mode):
    """Create hub module."""
    tags_and_args = []
    for is_training in (True, False):
        tags = set()
        if is_training:
            tags.add("train")
        tags_and_args.append((tags, dict(is_training=is_training)))
    ict_module_spec = hub.create_module_spec(functools.partial(module_fn,
                                                               params=params),
                                             tags_and_args=tags_and_args)
    ict_module = hub.Module(
        ict_module_spec,
        tags={"train"} if mode == tf.estimator.ModeKeys.TRAIN else {},
        trainable=True)
    hub.register_module_for_export(ict_module, "ict")
    return ict_module
Example #7
0
    def _model_fn(features, labels, mode):
        """A model_fn that uses a mock TF-Hub module."""
        del labels

        spec = hub.create_module_spec(text_module_fn)
        embedding = hub.Module(spec)
        if register_module:
            hub.register_module_for_export(embedding, _EXPORT_MODULE_NAME)
        predictions = embedding(features[_TEXT_FEATURE_NAME])
        loss = tf.constant(0.0)

        global_step = tf.train.get_global_step()
        train_op = tf.assign_add(global_step, 1)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op)
Example #8
0
def make_hub_predictor(model_fn):
    """Creates a tf.hub module for export (in PREDICT mode).

  Args:
    model_fn: This function is called with the placeholder inputs and
      is_training as arguments and should call the model, returning both the
      end_points collection and the tensor that should become the hub module's
      default prediction (for the default signature).

  Returns:
    The tf.hub module.
  """

    # This defines a function called by the hub module to create the model's
    # graph in a new/empty tf.Graph, hence it creates the placeholder etc.
    def create_model_fn(is_training):  # pylint: disable=missing-docstring
        input_shape = utils.str2intlist(FLAGS.serving_input_shape)
        img = tf.placeholder(shape=input_shape, dtype=tf.float32)

        # This is an example of calling `apply_model_semi` with only one of the
        # inputs provided. The outputs will simply use the given names:
        end_points, predictions = model_fn(img, is_training)

        # Register both the class output and all endpoints to the hub module.
        hub.add_signature(inputs={'image': img}, outputs=predictions)
        hub.add_signature(inputs={'image': img},
                          outputs=end_points,
                          name='representation')

    tf_hub_module_spec = hub.create_module_spec(
        create_model_fn,
        [(["is_training"], {
            'is_training': True
        }), (set(), {
            'is_training': False
        })],
        # For some not understood reason, this is necessary when the model uses
        # cross_replica_batch_norm. We verified that moving averages are still
        # being stored in the hub module just fine.
        drop_collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES])
    tf_hub_module = hub.Module(tf_hub_module_spec, trainable=False, tags=set())
    hub.register_module_for_export(tf_hub_module, export_name='module')

    return tf_hub_module
Example #9
0
  def _model_fn(features, labels, mode):
    """A model_fn that uses a mock TF-Hub module."""
    del labels

    spec = hub.create_module_spec(text_module_fn)
    embedding = hub.Module(spec)
    if register_module:
      hub.register_module_for_export(embedding, _EXPORT_MODULE_NAME)
    predictions = embedding(features[_TEXT_FEATURE_NAME])
    loss = tf.constant(0.0)

    global_step = tf.train.get_global_step()
    train_op = tf.assign_add(global_step, 1)

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=loss,
        train_op=train_op)
Example #10
0
def model_fn(data, mode):
    """Produces a loss for the rotation task.

  Args:
    data: Dict of inputs containing, among others, "image" and "label."
    mode: model's mode: training, eval or prediction

  Returns:
    EstimatorSpec
  """
    num_angles = 4
    images = data['image']

    if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]:
        images = tf.reshape(images, [-1] + images.get_shape().as_list()[-3:])
        with tf.variable_scope('module'):
            image_fn = lambda: images
            logits = apply_model(
                image_fn=image_fn,
                is_training=(mode == tf.estimator.ModeKeys.TRAIN),
                num_outputs=num_angles,
                make_signature=False)
    else:
        input_shape = utils.str2intlist(
            FLAGS.get_flag_value('serving_input_shape', 'None,None,None,3'))
        image_fn = lambda: tf.placeholder(
            shape=input_shape,  # pylint: disable=g-long-lambda
            dtype=tf.float32)
        apply_model_function = functools.partial(apply_model,
                                                 image_fn=image_fn,
                                                 num_outputs=num_angles,
                                                 make_signature=True)
        tf_hub_module_spec = hub.create_module_spec(apply_model_function,
                                                    [(utils.TAGS_IS_TRAINING, {
                                                        'is_training': True
                                                    }),
                                                     (set(), {
                                                         'is_training': False
                                                     })])
        tf_hub_module = hub.Module(tf_hub_module_spec,
                                   trainable=False,
                                   tags=set())
        hub.register_module_for_export(tf_hub_module, export_name='module')
        logits = tf_hub_module(images)

        return trainer.make_estimator(mode, predictions=logits)

    labels = tf.reshape(data['label'], [-1])

    # build loss and accuracy
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                          logits=logits)
    loss = tf.reduce_mean(loss)

    eval_metrics = (
        lambda labels, logits: {  # pylint: disable=g-long-lambda
            'accuracy':
            tf.metrics.accuracy(labels=labels,
                                predictions=tf.argmax(logits, axis=-1))
        },
        [labels, logits])
    return trainer.make_estimator(mode, loss, eval_metrics, logits)
Example #11
0
def _mdn_inv_model_fn(features,
                      labels,
                      nchannels,
                      n_y,
                      n_mixture,
                      dropout,
                      optimizer,
                      mode,
                      logoffset=1e-3,
                      log=True):
    '''Train inverse model i.e. go from the halo field to matter overdensity
    '''
    # Check for training mode
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    def _module_fn():
        """
        Function building the module
        """

        feature_layer = tf.placeholder(
            tf.float32,
            shape=[None, None, None, None, nchannels],
            name='input')
        obs_layer = tf.placeholder(tf.float32,
                                   shape=[None, None, None, None, n_y],
                                   name='observations')

        # Builds the neural network
        net = slim.conv3d(feature_layer,
                          16,
                          5,
                          activation_fn=tf.nn.leaky_relu,
                          padding='same')
        #net = wide_resnet(feature_layer, 8, activation_fn=tf.nn.leaky_relu, is_training=is_training)
        net = wide_resnet(net,
                          16,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = slim.conv3d(net, 32, 3, activation_fn=tf.nn.leaky_relu)

        # Define the probabilistic layer
        net = slim.conv3d(net,
                          3 * n_mixture * nchannels,
                          1,
                          activation_fn=None)
        cube_size = tf.shape(obs_layer)[1]
        net = tf.reshape(
            net,
            [-1, cube_size, cube_size, cube_size, nchannels, n_mixture * 3])

        logits, loc, unconstrained_scale = tf.split(net,
                                                    num_or_size_splits=3,
                                                    axis=-1)
        print('\nloc :\n', loc)
        scale = tf.nn.softplus(unconstrained_scale[...]) + 1e-3

        distribution = tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(logits=logits[...]),
            #components_distribution=tfd.MultivariateNormalDiag(loc=loc[...,0], scale_diag=scale))
            components_distribution=tfd.Normal(loc=loc[...], scale=scale))
        print('\ngmm\n', distribution)

        # Define a function for sampling, and a function for estimating the log likelihood
        if log:
            print('Logged it')
            sample = tf.exp(distribution.sample()) - logoffset
            print('\ninf dist sample :\n', distribution.sample())
            logfeature = tf.log(tf.add(logoffset, obs_layer), 'logfeature')
            print('\nlogfeature :\n', logfeature)
            prob = distribution.prob(logfeature[...])
            loglik = distribution.log_prob(logfeature[...])
        else:
            print('UnLogged it')
            sample = distribution.sample()
            print('\ninf dist sample :\n', distribution.sample())
            loglik = distribution.log_prob(obs_layer[...])

        hub.add_signature(inputs={
            'features': feature_layer,
            'labels': obs_layer
        },
                          outputs={
                              'sample': sample,
                              'loglikelihood': loglik,
                              'sigma': scale,
                              'mean': loc,
                              'logits': logits
                          })

    # Create model and register module if necessary
    spec = hub.create_module_spec(_module_fn)
    module = hub.Module(spec, trainable=True)

    if isinstance(features, dict):
        predictions = module(features, as_dict=True)
        features_ = features['features']
    else:
        predictions = module({
            'features': features,
            'labels': labels
        },
                             as_dict=True)
        features_ = features

    samples = predictions['sample']
    print('\nsamples :\n', samples)

    if mode == tf.estimator.ModeKeys.PREDICT:
        hub.register_module_for_export(module, "likelihood")
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loglik = predictions['loglikelihood']
    print('\nloglik :\n', loglik)
    ####Compute and register loss function
    neg_log_likelihood = -tf.reduce_sum(loglik, axis=-1)
    neg_log_likelihood = tf.reduce_mean(neg_log_likelihood)

    tf.losses.add_loss(neg_log_likelihood)
    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    train_op = None
    eval_metric_ops = None

    # Define optimizer
    if mode == tf.estimator.ModeKeys.TRAIN:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            global_step = tf.train.get_global_step()
            boundaries = list(np.array([1e4, 2e4, 4e4, 5e4]).astype(int))
            values = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5]
            learning_rate = tf.train.piecewise_constant(
                global_step, boundaries, values)
            train_op = optimizer(learning_rate=learning_rate).minimize(
                loss=total_loss, global_step=global_step)
            tf.summary.scalar('rate', learning_rate)
        tf.summary.scalar('total_loss', total_loss)
        tf.summary.scalar('loss', neg_log_likelihood)
    elif mode == tf.estimator.ModeKeys.EVAL:

        eval_metric_ops = {"log_p": neg_log_likelihood}

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=total_loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
Example #12
0
def _mdn_model_fn(features, labels, n_y, n_mixture, dropout, optimizer, mode):

    # Check for training mode
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    def _module_fn():
        """
        Function building the module
        """

        feature_layer = tf.placeholder(
            tf.float32,
            shape=[None, None, None, None, nchannels],
            name='input')
        obs_layer = tf.placeholder(tf.float32,
                                   shape=[None, None, None, None, n_y],
                                   name='observations')

        # Builds the neural network
        net = slim.conv3d(feature_layer,
                          16,
                          5,
                          activation_fn=tf.nn.leaky_relu,
                          padding='valid')
        #net = wide_resnet(feature_layer, 8, activation_fn=tf.nn.leaky_relu, is_training=is_training)
        net = wide_resnet(net,
                          16,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = slim.conv3d(net, 32, 3, activation_fn=tf.nn.tanh)

        # Define the probabilistic layer
        net = slim.conv3d(net, n_mixture * 3 * n_y, 1, activation_fn=None)
        cube_size = tf.shape(obs_layer)[1]
        net = tf.reshape(
            net, [-1, cube_size, cube_size, cube_size, n_y, n_mixture * 3])
        #         net = tf.reshape(net, [None, None, None, None, n_y, n_mixture*3])
        loc, unconstrained_scale, logits = tf.split(net,
                                                    num_or_size_splits=3,
                                                    axis=-1)
        scale = tf.nn.softplus(unconstrained_scale)

        # Form mixture of discretized logistic distributions. Note we shift the
        # logistic distribution by -0.5. This lets the quantization capture "rounding"
        # intervals, `(x-0.5, x+0.5]`, and not "ceiling" intervals, `(x-1, x]`.
        discretized_logistic_dist = tfd.QuantizedDistribution(
            distribution=tfd.TransformedDistribution(
                distribution=tfd.Logistic(loc=loc, scale=scale),
                bijector=tfb.AffineScalar(shift=-0.5)),
            low=0.,
            high=2.**4 - 1)

        mixture_dist = tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(logits=logits),
            components_distribution=discretized_logistic_dist)

        # Define a function for sampling, and a function for estimating the log likelihood
        sample = tf.squeeze(mixture_dist.sample())
        loglik = mixture_dist.log_prob(obs_layer)
        hub.add_signature(inputs={
            'features': feature_layer,
            'labels': obs_layer
        },
                          outputs={
                              'sample': sample,
                              'loglikelihood': loglik
                          })

    # Create model and register module if necessary
    spec = hub.create_module_spec(_module_fn)
    module = hub.Module(spec, trainable=True)
    if isinstance(features, dict):
        predictions = module(features, as_dict=True)
    else:
        predictions = module({
            'features': features,
            'labels': labels
        },
                             as_dict=True)

    if mode == tf.estimator.ModeKeys.PREDICT:
        hub.register_module_for_export(module, "likelihood")
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loglik = predictions['loglikelihood']
    # Compute and register loss function
    neg_log_likelihood = -tf.reduce_sum(loglik, axis=-1)
    neg_log_likelihood = tf.reduce_mean(neg_log_likelihood)

    tf.losses.add_loss(neg_log_likelihood)
    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    train_op = None
    eval_metric_ops = None

    # Define optimizer
    if mode == tf.estimator.ModeKeys.TRAIN:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            global_step = tf.train.get_global_step()
            boundaries = [15000, 30000, 45000, 60000]
            values = [0.00001, 0.000005, 0.000001, 0.0000005, 0.0000001]
            learning_rate = tf.train.piecewise_constant(
                global_step, boundaries, values)
            train_op = optimizer(learning_rate=learning_rate).minimize(
                loss=total_loss, global_step=global_step)

        tf.summary.scalar('loss', neg_log_likelihood)
        tf.summary.scalar('rate', learning_rate)
    elif mode == tf.estimator.ModeKeys.EVAL:

        eval_metric_ops = {"log_p": neg_log_likelihood}

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=total_loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
Example #13
0
def _mdn_nozero_model_fn(features,
                         labels,
                         nchannels,
                         n_y,
                         n_mixture,
                         dropout,
                         optimizer,
                         mode,
                         pad,
                         lr0=1e-3,
                         dsitribution='logistic'):

    # Check for training mode
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    def _module_fn():
        """
        Function building the module
        """

        feature_layer = tf.placeholder(
            tf.float32,
            shape=[None, None, None, None, nchannels],
            name='input')
        obs_layer = tf.placeholder(tf.float32,
                                   shape=[None, None, None, None, n_y],
                                   name='observations')
        mask_layer = tf.clip_by_value(obs_layer, 0, 1)
        #

        # Builds the neural network
        net = slim.conv3d(feature_layer,
                          16,
                          3,
                          activation_fn=tf.nn.leaky_relu,
                          padding='valid')
        subnet = tf.identity(net[:, 3:-3, 3:-3, 3:-3, :])
        net = valid_resnet(net,
                           16,
                           activation_fn=tf.nn.leaky_relu,
                           keep_prob=dropout,
                           is_training=is_training)
        net = slim.conv3d(net,
                          16,
                          3,
                          activation_fn=tf.nn.leaky_relu,
                          padding='valid')
        net = net + subnet

        # Define the probabilistic layer
        likenet = slim.conv3d(net, 64, 1, activation_fn=tf.nn.leaky_relu)
        net = slim.conv3d(likenet, n_mixture * 3 * n_y, 1, activation_fn=None)

        # Define the probabilistic layer
        cube_size = tf.shape(obs_layer)[1]
        net = tf.reshape(
            net, [-1, cube_size, cube_size, cube_size, n_y, n_mixture * 3])
        #         net = tf.reshape(net, [None, None, None, None, n_y, n_mixture*3])
        loc, unconstrained_scale, logits = tf.split(net,
                                                    num_or_size_splits=3,
                                                    axis=-1)
        scale = tf.nn.softplus(unconstrained_scale) + 1e-3

        # Form mixture of discretized logistic distributions. Note we shift the
        # logistic distribution by -0.5. This lets the quantization capture "rounding"
        # intervals, `(x-0.5, x+0.5]`, and not "ceiling" intervals, `(x-1, x]`.
        if distribution == 'logistic':
            discretized_logistic_dist = tfd.QuantizedDistribution(
                distribution=tfd.TransformedDistribution(
                    distribution=tfd.Logistic(loc=loc, scale=scale),
                    bijector=tfb.AffineScalar(shift=-0.5)),
                low=0.,
                high=2.**3 - 1)

            mixture_dist = tfd.MixtureSameFamily(
                mixture_distribution=tfd.Categorical(logits=logits),
                components_distribution=discretized_logistic_dist)

        elif distribution == 'normal':

            mixture_dist = tfd.MixtureSameFamily(
                mixture_distribution=tfd.Categorical(logits=logits),
                components_distribution=tfd.Normal(loc=loc, scale=scale))

        # Define a function for sampling, and a function for estimating the log likelihood
        #sample = tf.squeeze(mixture_dist.sample())
        sample = mixture_dist.sample()
        loglik = mixture_dist.log_prob(obs_layer)

        hub.add_signature(inputs={
            'features': feature_layer,
            'labels': obs_layer
        },
                          outputs={
                              'sample': sample,
                              'loglikelihood': loglik,
                              'loc': loc,
                              'scale': scale,
                              'logits': logits
                          })

    # Create model and register module if necessary
    spec = hub.create_module_spec(_module_fn)
    module = hub.Module(spec, trainable=True)
    if isinstance(features, dict):
        predictions = module(features, as_dict=True)
    else:
        predictions = module({
            'features': features,
            'labels': labels
        },
                             as_dict=True)

    if mode == tf.estimator.ModeKeys.PREDICT:
        hub.register_module_for_export(module, "likelihood")
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loss = -predictions['loglikelihood']
    # Compute and register loss function
    loss = tf.reduce_mean(loss)
    tf.losses.add_loss(loss)

    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    train_op = None
    eval_metric_ops = None

    # Define optimizer
    if mode == tf.estimator.ModeKeys.TRAIN:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            global_step = tf.train.get_global_step()
            boundaries = list(np.array([1e4, 2e4, 4e4, 5e4, 6e4]).astype(int))
            values = [lr0, lr0 / 2, lr0 / 10, lr0 / 20, lr0 / 100, lr0 / 1000]
            #values = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 1e-6]
            learning_rate = tf.train.piecewise_constant(
                global_step, boundaries, values)
            train_op = optimizer(learning_rate=learning_rate).minimize(
                loss=total_loss, global_step=global_step)
            tf.summary.scalar('rate', learning_rate)
            logging_hook = tf.train.LoggingTensorHook(
                {
                    "iter": global_step,
                    "loss": loss
                }, every_n_iter=50)

        tf.summary.scalar('loss', loss)
    elif mode == tf.estimator.ModeKeys.EVAL:

        eval_metric_ops = {"log_p": neg_log_likelihood}

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=total_loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops,
                                      training_hooks=[logging_hook])
Example #14
0
def _mdn_unetmodel_fn(features,
                      labels,
                      nchannels,
                      n_y,
                      n_mixture,
                      dropout,
                      optimizer,
                      mode,
                      pad,
                      fsize=8,
                      nsub=2,
                      distribution='logistic'):

    # Check for training mode
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    def _module_fn():
        """
        Function building the module
        """

        feature_layer = tf.placeholder(
            tf.float32,
            shape=[None, None, None, None, nchannels],
            name='input')
        obs_layer = tf.placeholder(tf.float32,
                                   shape=[None, None, None, None, n_y],
                                   name='observations')

        # Builds the neural network

        if pad == 0:
            d00 = slim.conv3d(feature_layer,
                              fsize,
                              5,
                              activation_fn=tf.nn.leaky_relu,
                              padding='same')
        elif pad == 2:
            d00 = slim.conv3d(feature_layer,
                              fsize,
                              5,
                              activation_fn=tf.nn.leaky_relu,
                              padding='valid')
        if pad == 4:
            d00 = slim.conv3d(feature_layer,
                              fsize,
                              5,
                              activation_fn=tf.nn.leaky_relu,
                              padding='valid')
            d00 = slim.conv3d(d00,
                              fsize * 2,
                              5,
                              activation_fn=tf.nn.leaky_relu,
                              padding='valid')
##        #downsample
##        dd = [[d00]]
##        cfsize = fsize
##        for i in range(nsub):
##            d0 = dd[-1][-1]
##            d1 = wide_resnet(d0, cfsize, activation_fn=tf.nn.leaky_relu)
##            d2 = wide_resnet(d1, cfsize, activation_fn=tf.nn.leaky_relu)
##            dsub = slim.max_pool3d(d2, kernel_size=3, stride=2, padding='SAME')
##            dd.append([d1, d2, dsub])
##            cfsize  *= 2
##
##        #lower layer
##        d0 = dd[-1][-1]
##        d1 = wide_resnet(d0, cfsize, activation_fn=tf.nn.leaky_relu)
##        d2 = wide_resnet(d1, cfsize, activation_fn=tf.nn.leaky_relu)
##
##        up = [[d1, d2]]
##        #upsample
##        for i in range(nsub):
##            cfsize = cfsize // 2
##            usub = up[-1][-1]
##            dup = dd.pop()
##            u0 = dynamic_deconv3d('up%d'%i, usub, shape=[3,3,3,cfsize], activation=tf.nn.leaky_relu)
##            #u0 = slim.conv3d_transpose(usub, fsize, kernel_size=3, stride=2)
##            uc = tf.concat([u0, dup[1]], axis=-1)
##            u1 = wide_resnet(uc, cfsize, activation_fn=tf.nn.leaky_relu)
##            u2 = wide_resnet(u1, cfsize, activation_fn=tf.nn.leaky_relu)
##            up.append([u0, u1, u1c, u2])
##
##        u0 = up[-1][-1]
##        net = slim.conv3d(u0, 1, 3, activation_fn=tf.nn.tanh)
##
#downsample #restructure code while doubling filter size
        cfsize = fsize
        d1 = wide_resnet(d00, cfsize, activation_fn=tf.nn.leaky_relu)
        d2 = wide_resnet(d1, cfsize, activation_fn=tf.nn.leaky_relu)
        dd = [d2]
        for i in range(nsub):
            cfsize *= 2
            print(i, cfsize)
            dsub = slim.max_pool3d(dd[-1],
                                   kernel_size=3,
                                   stride=2,
                                   padding='SAME')
            d1 = wide_resnet(dsub, cfsize, activation_fn=tf.nn.leaky_relu)
            d2 = wide_resnet(d1, cfsize, activation_fn=tf.nn.leaky_relu)
            dd.append(d2)

        print(len(dd))
        #upsample
        usub = dd.pop()
        for i in range(nsub):
            u0 = dynamic_deconv3d('up%d' % i,
                                  usub,
                                  shape=[3, 3, 3, cfsize],
                                  activation=tf.identity)
            cfsize = cfsize // 2
            print(i, cfsize)
            u0 = slim.conv3d(u0,
                             cfsize,
                             1,
                             activation_fn=tf.identity,
                             padding='same')
            #u0 = slim.conv3d_transpose(usub, fsize, kernel_size=3, stride=2)
            uc = tf.concat([u0, dd.pop()], axis=-1)
            u1 = wide_resnet(uc, cfsize, activation_fn=tf.nn.leaky_relu)
            u2 = wide_resnet(u1, cfsize, activation_fn=tf.nn.leaky_relu)
            usub = u2

        print(len(dd))
        net = slim.conv3d(usub, 1, 3, activation_fn=tf.nn.tanh)

        # Define the probabilistic layer
        net = slim.conv3d(net, n_mixture * 3 * n_y, 1, activation_fn=None)
        cube_size = tf.shape(obs_layer)[1]
        net = tf.reshape(
            net, [-1, cube_size, cube_size, cube_size, n_y, n_mixture * 3])
        #         net = tf.reshape(net, [None, None, None, None, n_y, n_mixture*3])
        loc, unconstrained_scale, logits = tf.split(net,
                                                    num_or_size_splits=3,
                                                    axis=-1)
        scale = tf.nn.softplus(unconstrained_scale) + 1e-3

        # Form mixture of discretized logistic distributions. Note we shift the
        # logistic distribution by -0.5. This lets the quantization capture "rounding"
        # intervals, `(x-0.5, x+0.5]`, and not "ceiling" intervals, `(x-1, x]`.
        if distribution == 'logistic':
            discretized_logistic_dist = tfd.QuantizedDistribution(
                distribution=tfd.TransformedDistribution(
                    distribution=tfd.Logistic(loc=loc, scale=scale),
                    bijector=tfb.AffineScalar(shift=-0.5)),
                low=0.,
                high=2.**3 - 1)

            mixture_dist = tfd.MixtureSameFamily(
                mixture_distribution=tfd.Categorical(logits=logits),
                components_distribution=discretized_logistic_dist)

        elif distribution == 'normal':

            mixture_dist = tfd.MixtureSameFamily(
                mixture_distribution=tfd.Categorical(logits=logits),
                components_distribution=tfd.Normal(loc=loc, scale=scale))

        # Define a function for sampling, and a function for estimating the log likelihood
        #sample = tf.squeeze(mixture_dist.sample())
        sample = mixture_dist.sample()
        loglik = mixture_dist.log_prob(obs_layer)
        hub.add_signature(inputs={
            'features': feature_layer,
            'labels': obs_layer
        },
                          outputs={
                              'sample': sample,
                              'loglikelihood': loglik,
                              'loc': loc,
                              'scale': scale,
                              'logits': logits
                          })

    # Create model and register module if necessary
    spec = hub.create_module_spec(_module_fn)
    module = hub.Module(spec, trainable=True)
    if isinstance(features, dict):
        predictions = module(features, as_dict=True)
    else:
        predictions = module({
            'features': features,
            'labels': labels
        },
                             as_dict=True)

    if mode == tf.estimator.ModeKeys.PREDICT:
        hub.register_module_for_export(module, "likelihood")
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loglik = predictions['loglikelihood']
    # Compute and register loss function
    neg_log_likelihood = -tf.reduce_sum(loglik, axis=-1)
    neg_log_likelihood = tf.reduce_mean(neg_log_likelihood)

    tf.losses.add_loss(neg_log_likelihood)
    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    train_op = None
    eval_metric_ops = None

    # Define optimizer
    if mode == tf.estimator.ModeKeys.TRAIN:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            global_step = tf.train.get_global_step()
            boundaries = list(np.array([1e4, 2e4, 4e4, 5e4, 6e4]).astype(int))
            values = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 1e-6]
            learning_rate = tf.train.piecewise_constant(
                global_step, boundaries, values)
            train_op = optimizer(learning_rate=learning_rate).minimize(
                loss=total_loss, global_step=global_step)
            tf.summary.scalar('rate', learning_rate)
        tf.summary.scalar('loss', neg_log_likelihood)
    elif mode == tf.estimator.ModeKeys.EVAL:

        eval_metric_ops = {"log_p": neg_log_likelihood}

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=total_loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
Example #15
0
def _mdn_pixmodel_fn(features,
                     labels,
                     nchannels,
                     n_y,
                     n_mixture,
                     dropout,
                     optimizer,
                     mode,
                     pad,
                     cfilter_size=None,
                     f_map=8):

    # Check for training mode
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    def _module_fn():
        """                                                                                                                     
        Function building the module                                                                                            
        """

        feature_layer = tf.placeholder(
            tf.float32,
            shape=[None, None, None, None, nchannels],
            name='input')
        obs_layer = tf.placeholder(tf.float32,
                                   shape=[None, None, None, None, n_y],
                                   name='observations')

        conditional_im = wide_resnet(feature_layer,
                                     16,
                                     activation_fn=tf.nn.leaky_relu,
                                     keep_prob=dropout,
                                     is_training=is_training)
        conditional_im = wide_resnet(conditional_im,
                                     16,
                                     activation_fn=tf.nn.leaky_relu,
                                     keep_prob=dropout,
                                     is_training=is_training)
        conditional_im = wide_resnet(conditional_im,
                                     1,
                                     activation_fn=tf.nn.leaky_relu,
                                     keep_prob=dropout,
                                     is_training=is_training)
        conditional_im = tf.concat((feature_layer, conditional_im), -1)

        # Builds the neural network
        ul = [[obs_layer]]
        for i in range(10):
            ul.append(
                PixelCNN3Dlayer(i,
                                ul[i],
                                f_map=f_map,
                                full_horizontal=True,
                                h=None,
                                conditional_im=conditional_im,
                                cfilter_size=cfilter_size,
                                gatedact='sigmoid'))

        h_stack_in = ul[-1][-1]

        with tf.variable_scope("fc_1"):
            fc1 = GatedCNN([1, 1, 1, 1],
                           h_stack_in,
                           orientation=None,
                           gated=False,
                           mask='b').output()

        with tf.variable_scope("fc_2"):
            fc2 = GatedCNN([1, 1, 1, n_mixture * 3 * n_y],
                           fc1,
                           orientation=None,
                           gated=False,
                           mask='b',
                           activation=False).output()

        cube_size = tf.shape(obs_layer)[1]
        net = tf.reshape(
            fc2, [-1, cube_size, cube_size, cube_size, n_y, n_mixture * 3])

        loc, unconstrained_scale, logits = tf.split(net,
                                                    num_or_size_splits=3,
                                                    axis=-1)
        scale = tf.nn.softplus(unconstrained_scale) + 1e-3

        # Form mixture of discretized logistic distributions. Note we shift the
        # logistic distribution by -0.5. This lets the quantization capture "rounding"
        # intervals, `(x-0.5, x+0.5]`, and not "ceiling" intervals, `(x-1, x]`.
        #         discretized_logistic_dist = tfd.QuantizedDistribution(
        #             distribution=tfd.TransformedDistribution(
        #                 distribution=tfd.Logistic(loc=loc, scale=scale),
        #                 bijector=tfb.AffineScalar(shift=-0.5)),
        #             low=0.,
        #             high=2.**3-1)

        mixture_dist = tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(logits=logits),
            components_distribution=tfd.Normal(loc, scale))

        # Define a function for sampling, and a function for estimating the log likelihood
        #sample = tf.squeeze(mixture_dist.sample())
        sample = mixture_dist.sample()
        loglik = mixture_dist.log_prob(obs_layer)
        hub.add_signature(inputs={
            'features': feature_layer,
            'labels': obs_layer
        },
                          outputs={
                              'sample': sample,
                              'loglikelihood': loglik,
                              'loc': loc,
                              'scale': scale,
                              'logits': logits
                          })

    # Create model and register module if necessary
    spec = hub.create_module_spec(_module_fn)
    module = hub.Module(spec, trainable=True)
    if isinstance(features, dict):
        predictions = module(features, as_dict=True)
    else:
        predictions = module({
            'features': features,
            'labels': labels
        },
                             as_dict=True)

    if mode == tf.estimator.ModeKeys.PREDICT:
        hub.register_module_for_export(module, "likelihood")
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loglik = predictions['loglikelihood']
    # Compute and register loss function
    neg_log_likelihood = -tf.reduce_sum(loglik, axis=-1)
    neg_log_likelihood = tf.reduce_mean(neg_log_likelihood)

    tf.losses.add_loss(neg_log_likelihood)
    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    train_op = None
    eval_metric_ops = None

    # Define optimizer
    if mode == tf.estimator.ModeKeys.TRAIN:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            global_step = tf.train.get_global_step()
            boundaries = list(
                np.array([500, 1e3, 3e3, 1e4, 2e4, 3e4, 4e4]).astype(int))
            values = [1e-3, 1e-3, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 1e-6]
            learning_rate = tf.train.piecewise_constant(
                global_step, boundaries, values)
            train_op = optimizer(learning_rate=learning_rate).minimize(
                loss=total_loss, global_step=global_step)
            tf.summary.scalar('rate', learning_rate)
        tf.summary.scalar('loss', neg_log_likelihood)
    elif mode == tf.estimator.ModeKeys.EVAL:

        eval_metric_ops = {"log_p": neg_log_likelihood}

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=total_loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
Example #16
0
def model_fn(features, labels, mode, params):
  """Model function."""
  del labels

  # ==============================
  # Input features
  # ==============================
  # [batch_size, query_seq_len]
  query_inputs = features["query_inputs"]

  # [batch_size, num_candidates, candidate_seq_len]
  candidate_inputs = features["candidate_inputs"]

  # [batch_size, num_candidates, query_seq_len + candidate_seq_len]
  joint_inputs = features["joint_inputs"]

  # [batch_size, num_masks]
  mlm_targets = features["mlm_targets"]
  mlm_positions = features["mlm_positions"]
  mlm_mask = features["mlm_mask"]

  # ==============================
  # Create modules.
  # ==============================
  bert_module = hub.Module(
      spec=params["bert_hub_module_handle"],
      name="locbert",
      tags={"train"} if mode == tf.estimator.ModeKeys.TRAIN else {},
      trainable=True)
  hub.register_module_for_export(bert_module, "locbert")

  embedder_module = hub.Module(
      spec=params["embedder_hub_module_handle"],
      name="embedder",
      tags={"train"} if mode == tf.estimator.ModeKeys.TRAIN else {},
      trainable=True)
  hub.register_module_for_export(embedder_module, "embedder")

  if params["share_embedders"]:
    query_embedder_module = embedder_module
  else:
    query_embedder_module = hub.Module(
        spec=params["embedder_hub_module_handle"],
        name="embedder",
        tags={"train"} if mode == tf.estimator.ModeKeys.TRAIN else {},
        trainable=True)
    hub.register_module_for_export(embedder_module, "query_embedder")

  # ==============================
  # Retrieve.
  # ==============================
  # [batch_size, projected_size]
  query_emb = query_embedder_module(
      inputs=dict(
          input_ids=query_inputs.token_ids,
          input_mask=query_inputs.mask,
          segment_ids=query_inputs.segment_ids),
      signature="projected")

  # [batch_size * num_candidates, candidate_seq_len]
  flat_candidate_inputs, unflatten = flatten_bert_inputs(
      candidate_inputs)

  # [batch_size * num_candidates, projected_size]
  flat_candidate_emb = embedder_module(
      inputs=dict(
          input_ids=flat_candidate_inputs.token_ids,
          input_mask=flat_candidate_inputs.mask,
          segment_ids=flat_candidate_inputs.segment_ids),
      signature="projected")

  # [batch_size, num_candidates, projected_size]
  unflattened_candidate_emb = unflatten(flat_candidate_emb)

  # [batch_size, num_candidates]
  retrieval_score = tf.einsum("BD,BND->BN", query_emb,
                              unflattened_candidate_emb)

  # ==============================
  # Read.
  # ==============================
  # [batch_size * num_candidates, query_seq_len + candidate_seq_len]
  flat_joint_inputs, unflatten = flatten_bert_inputs(joint_inputs)

  # [batch_size * num_candidates, num_masks]
  flat_mlm_positions, _ = tensor_utils.flatten(
      tf.tile(
          tf.expand_dims(mlm_positions, 1), [1, params["num_candidates"], 1]))

  batch_size, num_masks = tensor_utils.shape(mlm_targets)

  # [batch_size * num_candidates, query_seq_len + candidates_seq_len]
  flat_joint_bert_outputs = bert_module(
      inputs=dict(
          input_ids=flat_joint_inputs.token_ids,
          input_mask=flat_joint_inputs.mask,
          segment_ids=flat_joint_inputs.segment_ids,
          mlm_positions=flat_mlm_positions),
      signature="mlm",
      as_dict=True)

  # [batch_size, num_candidates]
  candidate_score = retrieval_score

  # [batch_size, num_candidates]
  candidate_log_probs = tf.math.log_softmax(candidate_score)

  # ==============================
  # Compute marginal log-likelihood.
  # ==============================
  # [batch_size * num_candidates, num_masks]
  flat_mlm_logits = flat_joint_bert_outputs["mlm_logits"]

  # [batch_size, num_candidates, num_masks, vocab_size]
  mlm_logits = tf.reshape(
      flat_mlm_logits, [batch_size, params["num_candidates"], num_masks, -1])
  mlm_log_probs = tf.math.log_softmax(mlm_logits)

  # [batch_size, num_candidates, num_masks]
  tiled_mlm_targets = tf.tile(
      tf.expand_dims(mlm_targets, 1), [1, params["num_candidates"], 1])

  # [batch_size, num_candidates, num_masks, 1]
  tiled_mlm_targets = tf.expand_dims(tiled_mlm_targets, -1)

  # [batch_size, num_candidates, num_masks, 1]
  gold_log_probs = tf.batch_gather(mlm_log_probs, tiled_mlm_targets)

  # [batch_size, num_candidates, num_masks]
  gold_log_probs = tf.squeeze(gold_log_probs, -1)

  # [batch_size, num_candidates, num_masks]
  joint_gold_log_probs = (
      tf.expand_dims(candidate_log_probs, -1) + gold_log_probs)

  # [batch_size, num_masks]
  marginal_gold_log_probs = tf.reduce_logsumexp(joint_gold_log_probs, 1)

  # [batch_size, num_masks]
  float_mlm_mask = tf.cast(mlm_mask, tf.float32)

  # []
  loss = -tf.div_no_nan(
      tf.reduce_sum(marginal_gold_log_probs * float_mlm_mask),
      tf.reduce_sum(float_mlm_mask))

  # ==============================
  # Optimization
  # ==============================
  num_warmup_steps = min(10000, max(100, int(params["num_train_steps"] / 10)))
  train_op = optimization.create_optimizer(
      loss=loss,
      init_lr=params["learning_rate"],
      num_train_steps=params["num_train_steps"],
      num_warmup_steps=num_warmup_steps,
      use_tpu=params["use_tpu"])

  # ==============================
  # Evaluation
  # ==============================
  eval_metric_ops = None if params["use_tpu"] else dict()
  if mode != tf.estimator.ModeKeys.PREDICT:
    # [batch_size, num_masks]
    retrieval_utility = marginal_gold_log_probs - gold_log_probs[:, 0]
    retrieval_utility *= tf.cast(features["mlm_mask"], tf.float32)

    # []
    retrieval_utility = tf.div_no_nan(
        tf.reduce_sum(retrieval_utility), tf.reduce_sum(float_mlm_mask))
    add_mean_metric("retrieval_utility", retrieval_utility, eval_metric_ops)

    has_timestamp = tf.cast(
        tf.greater(features["export_timestamp"], 0), tf.float64)
    off_policy_delay_secs = (
        tf.timestamp() - tf.cast(features["export_timestamp"], tf.float64))
    off_policy_delay_mins = off_policy_delay_secs / 60.0
    off_policy_delay_mins *= tf.cast(has_timestamp, tf.float64)

    add_mean_metric("off_policy_delay_mins", off_policy_delay_mins,
                    eval_metric_ops)

  # Create empty predictions to avoid errors when running in prediction mode.
  predictions = dict()

  if params["use_tpu"]:
    return tf.estimator.tpu.TPUEstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=train_op,
        predictions=predictions)
  else:
    if eval_metric_ops is not None:
      # Make sure the eval metrics are updated during training so that we get
      # quick feedback from tensorboard summaries when debugging locally.
      with tf.control_dependencies([u for _, u in eval_metric_ops.values()]):
        loss = tf.identity(loss)
    return tf.estimator.EstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
        predictions=predictions)
def vae_model_fn(features, labels, mode, params, config):
    """
    Model function to create a VAE estimator
    """
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    # Extract input images
    x = features['x']

    # Build model functions
    encoder_model = make_encoder_fn(params['encoder_fn'],
                                    params['latent_size'],
                                    params['iaf_size'],
                                    is_training=is_training)
    decoder_model = partial(params['decoder_fn'], is_training=is_training)

    # Define latent prior
    prior = tfd.MultivariateNormalDiag(loc=tf.zeros([params['latent_size']]),
                                       scale_identity_multiplier=1.0)

    # In predict mode, we encapsulate the model inside a module for exporting
    # This is because of a weird bug that makes training MAF unstable inside a
    # module
    if mode == tf.estimator.ModeKeys.PREDICT:
        image_size = x.shape[-2]
        n_channels = x.shape[-1]
        latent_size = params['latent_size']

        def make_encoder_spec():
            input_layer = tf.placeholder(
                tf.float32, shape=[None, image_size, image_size, n_channels])
            encoding = encoder_model(input_layer)
            sample = encoding.sample()
            log_prob = encoding.log_prob(sample)
            hub.add_signature(inputs=input_layer,
                              outputs={
                                  'sample': sample,
                                  'log_prob': log_prob
                              })

        encoder_spec = hub.create_module_spec(make_encoder_spec)
        encoder = hub.Module(encoder_spec, name="encoder_module")

        def make_decoder_spec():
            code = tf.placeholder(tf.float32, shape=[None, latent_size])
            output = decoder_model(code)
            if not tf.contrib.framework.is_tensor(output):
                output = output.sample()
            hub.add_signature(inputs=code, outputs=output)

        decoder_spec = hub.create_module_spec(make_decoder_spec)
        decoder = hub.Module(decoder_spec, name="decoder_module")

        # Register and export encoder and decoder modules
        hub.register_module_for_export(encoder, "encoder")
        hub.register_module_for_export(decoder, "decoder")

        code = encoder(x, as_dict=True)
        recon = decoder(code['sample'])
        predictions = {
            'code': code['sample'],
            'reconstruction': recon,
            'log_prob': code['log_prob'],
            'input': x
        }
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    with tf.variable_scope("encoder_module") as sc:
        encoding = encoder_model(x)
        code = encoding.sample()
        log_prob = encoding.log_prob(code)

    with tf.variable_scope("decoder_module") as sc:
        decoder_output = decoder_model(code)

    if tf.contrib.framework.is_tensor(decoder_output):
        recon = decoder_output
        loglikelihood = params['loglikelihood_fn'](x, recon, features)
    else:
        # In this case, the decoder is actually returning a distribution
        # which we can use to sample from and estimate the lihelihood function
        recon = decoder_output.sample()
        loglikelihood = decoder_output.log_prob(x)

    image_tile_summary("image", tf.to_float(x[:16]), rows=4, cols=4)
    if 'psf' in features.keys():
        r = tf.expand_dims(tf.spectral.irfft2d(
            tf.spectral.rfft2d(recon[:, :, :, 0]) * features['psf']),
                           axis=-1)
    else:
        r = recon
    image_tile_summary("recon", tf.to_float(r[:16]), rows=4, cols=4)
    image_tile_summary("diff", tf.to_float(x[:16] - r[:16]), rows=4, cols=4)

    tf.summary.scalar('loglikelihood', tf.reduce_mean(loglikelihood))

    kl = log_prob - prior.log_prob(code)
    tf.summary.scalar('kl', tf.reduce_mean(kl))

    elbo = loglikelihood - kl * params['kl_weight']

    loss = -tf.reduce_mean(elbo)
    tf.summary.scalar("elbo", tf.reduce_mean(elbo))

    # Training of the model
    global_step = tf.train.get_or_create_global_step()
    learning_rate = tf.train.cosine_decay(params["learning_rate"], global_step,
                                          params["max_steps"])
    tf.summary.scalar("learning_rate", learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate,
                                       epsilon=params['adam_epsilon'])
    grads_and_vars = optimizer.compute_gradients(loss)
    clipped_grads_and_vars = [
        (tf.clip_by_norm(grad, params["gradient_clipping"]), var)
        for grad, var in grads_and_vars
    ]

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.apply_gradients(clipped_grads_and_vars,
                                             global_step=global_step)

    eval_metric_ops = {
        "elbo": tf.metrics.mean(tf.reduce_mean(elbo)),
        "kl": tf.metrics.mean(tf.reduce_mean(kl)),
        "loglikelihood": tf.metrics.mean(tf.reduce_mean(loglikelihood))
    }

    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
def autoencoder_body(self, features):
  """ Customized body function for autoencoders acting on continuous images.
  This is based on tensor2tensor.models.research.AutoencoderBasic.body
  and should be compatible with most derived classes.

  The original autoencoder class relies on embedding the channels to a discrete
  vocabulary and defines the loss on that vocab. It's cool and all, but here we
  prefer expressing the reconstruction loss as an actual continuous likelihood
  function.
  """
  hparams = self.hparams
  is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN

  output_activation = tf.nn.softplus if hparams.output_activation == 'softplus' else None
  input_shape =  [None, ] + common_layers.shape_list(features["inputs"])[1:]

  if hparams.mode == tf.estimator.ModeKeys.PREDICT:
    # In predict mode, we also define TensorFlow Hub modules for all pieces of
    # the autoencoder
    if hparams.encode_psf and 'psf' in features:
      psf_shape =  [None, ] + common_layers.shape_list(features["psf"])[1:]
    # First build encoder spec
    def make_model_spec():
      input_layer = tf.placeholder(tf.float32, shape=input_shape)
      x = self.embed(tf.expand_dims(input_layer, -1))
      x, encoder_layers = self.encoder(x)
      b, b_loss = self.bottleneck(x)
      hub.add_signature(inputs=input_layer, outputs=b)

    def make_model_spec_psf():
      input_layer = tf.placeholder(tf.float32, shape=input_shape)
      psf_layer = tf.placeholder(tf.float32, shape=psf_shape)
      x = self.embed(tf.expand_dims(input_layer, -1))

      # If we have access to the PSF, we add this information to the encoder
      if hparams.encode_psf and 'psf' in features:
        psf_image = tf.expand_dims(tf.signal.irfft2d(tf.cast(psf_layer[...,0], tf.complex64)), axis=-1)
        # Roll the image to undo the fftshift, assuming x1 zero padding and x2 subsampling
        psf_image = tf.roll(psf_image, shift=[input_shape[1], input_shape[2]], axis=[1,2])
        psf_image = tf.image.resize_with_crop_or_pad(psf_image, input_shape[1], input_shape[2])
        net_psf = tf.layers.conv2d(psf_image,
                                   hparams.hidden_size // 4, 5,
                                   padding='same', name="psf_embed_1")
        net_psf = common_layers.layer_norm(net_psf, name="psf_norm")
        x, encoder_layers = self.encoder(tf.concat([x, net_psf], axis=-1))
      else:
        x, encoder_layers = self.encoder(x)
      b, b_loss = self.bottleneck(x)
      hub.add_signature(inputs={'input':input_layer, 'psf':psf_layer}, outputs=b)

    spec = hub.create_module_spec(make_model_spec_psf if hparams.encode_psf else make_model_spec, drop_collections=['checkpoints'])
    encoder = hub.Module(spec, name="encoder_module")
    hub.register_module_for_export(encoder, "encoder")

    if hparams.encode_psf:
      code = encoder({'input':features["inputs"], 'psf': features['psf']})
    else:
      code = encoder(features["inputs"])
    b_shape = [None, ] + common_layers.shape_list(code)[1:]
    res_size = self.hparams.hidden_size * 2**self.hparams.num_hidden_layers
    res_size = min(res_size, hparams.max_hidden_size)

    # Second build decoder spec
    def make_model_spec():
      input_layer = tf.placeholder(tf.float32, shape=b_shape)
      x = self.unbottleneck(input_layer, res_size)
      x = self.decoder(x, None)
      reconstr = tf.layers.dense(x, input_shape[-1], name="autoencoder_final",
                                 activation=output_activation)
      hub.add_signature(inputs=input_layer, outputs=reconstr)
      hub.attach_message("stamp_size", tf.train.Int64List(value=[hparams.problem_hparams.img_len]))
      try:
        hub.attach_message("pixel_size", tf.train.FloatList(value=[hparams.problem_hparams.pixel_scale[res] for res in hparams.problem_hparams.resolutions]))
      except AttributeError:
        hub.attach_message("pixel_size", tf.train.FloatList(value=[hparams.problem_hparams.pixel_scale]))
    spec = hub.create_module_spec(make_model_spec, drop_collections=['checkpoints'])
    decoder = hub.Module(spec, name="decoder_module")
    hub.register_module_for_export(decoder, "decoder")

    reconstr = decoder(code)
    return reconstr , {"bottleneck_loss": 0.0}

  encoder_layers = None
  self.is1d = hparams.sample_width == 1
  if (hparams.mode != tf.estimator.ModeKeys.PREDICT
      or self._encode_on_predict):
    labels = features["targets_raw"]
    labels_shape = common_layers.shape_list(labels)

    shape = common_layers.shape_list(labels)
    with tf.variable_scope('encoder_module'):
      x = self.embed(tf.expand_dims(labels, -1))

    if shape[2] == 1:
      self.is1d = True

    # Run encoder.
    with tf.variable_scope('encoder_module'):
      # If we have access to the PSF, we add this information to the encoder
      # Note that we only support single band images so far...
      if hparams.encode_psf and 'psf' in features:
        psf_image = tf.expand_dims(tf.signal.irfft2d(tf.cast(features['psf'][...,0], tf.complex64)), axis=-1)
        # Roll the image to undo the fftshift, assuming x1 zero padding and x2 subsampling
        psf_image = tf.roll(psf_image, shift=[input_shape[1], input_shape[2]], axis=[1,2])
        psf_image = tf.image.resize_with_crop_or_pad(psf_image, input_shape[1], input_shape[2])
        net_psf = tf.layers.conv2d(psf_image,
                                   hparams.hidden_size // 4, 5,
                                   padding='same', name="psf_embed_1")
        net_psf = common_layers.layer_norm(net_psf, name="psf_norm")
        x, encoder_layers = self.encoder(tf.concat([x, net_psf], axis=-1))
      else:
        x, encoder_layers = self.encoder(x)

    # Bottleneck.
    with tf.variable_scope('encoder_module'):
      b, b_loss = self.bottleneck(x)

    xb_loss = 0.0
    b_shape = common_layers.shape_list(b)
    self._cur_bottleneck_tensor = b
    res_size = common_layers.shape_list(x)[-1]
    with tf.variable_scope('decoder_module'):
      b = self.unbottleneck(b, res_size)
    if not is_training:
      x = b
    else:
      l = 2**hparams.num_hidden_layers
      warm_step = int(hparams.bottleneck_warmup_steps * 0.25 * l)
      nomix_p = common_layers.inverse_lin_decay(warm_step) + 0.01
      if common_layers.should_generate_summaries():
        tf.summary.scalar("nomix_p_bottleneck", nomix_p)
      rand = tf.random_uniform(common_layers.shape_list(x))
      # This is the distance between b and x. Having this as loss helps learn
      # the bottleneck function, but if we back-propagated to x it would be
      # minimized by just setting x=0 and b=0 -- so we don't want too much
      # of the influence of this, and we stop-gradient to not zero-out x.
      x_stop = tf.stop_gradient(x)
      xb_loss = tf.reduce_mean(tf.reduce_sum(
          tf.squared_difference(x_stop, b), axis=-1))
      # To prevent this loss from exploding we clip at 1, but anneal clipping.
      clip_max = 1.0 / common_layers.inverse_exp_decay(
          warm_step, min_value=0.001)
      xb_clip = tf.maximum(tf.stop_gradient(xb_loss), clip_max)
      xb_loss *= clip_max / xb_clip
      x = tf.where(tf.less(rand, nomix_p), b, x)
  else:
    if self._cur_bottleneck_tensor is None:
      b = self.sample()
    else:
      b = self._cur_bottleneck_tensor
    self._cur_bottleneck_tensor = b
    res_size = self.hparams.hidden_size * 2**self.hparams.num_hidden_layers
    res_size = min(res_size, hparams.max_hidden_size)

    with tf.variable_scope('decoder_module'):
      x = self.unbottleneck(b, res_size)

  # Run decoder.
  with tf.variable_scope('decoder_module'):
    x = self.decoder(x, encoder_layers)

  # Cut to the right size and mix before returning.
  res = x
  if hparams.mode != tf.estimator.ModeKeys.PREDICT:
    res = x[:, :shape[1], :shape[2], :]

  with tf.variable_scope('decoder_module'):
    reconstr = tf.layers.dense(res, shape[-1], name="autoencoder_final",
                               activation=output_activation)

  # We apply an optional apodization of the output before taking the
  if hparams.output_apodization > 0:
    nx = reconstr.get_shape().as_list()[1]
    alpha = 2 * hparams.output_apodization / nx
    from scipy.signal.windows import tukey
    # Create a tukey window
    w = tukey(nx, alpha)
    w = np.outer(w,w).reshape((1, nx, nx,1)).astype('float32')
    # And penalize non zero things at the border
    apo_loss = tf.reduce_mean(tf.reduce_sum(((1.- w)*reconstr)**2, axis=[1,2,3]))
  else:
    w = 1.0
    apo_loss = 0.

  # We apply the window
  reconstr = reconstr * w

  # Optionally regularizes further the output
  # Anisotropic TV:
  tv = tf.reduce_mean(tf.image.total_variation(reconstr))
  # Smoothed Isotropic TV:
  #im_dx, im_dy = tf.image.image_gradients(reconstr)
  #tv = tf.reduce_sum(tf.sqrt(im_dx**2 + im_dy**2 + 1e-6), axis=[1,2,3])
  #tv = tf.reduce_mean(tv)

  image_summary("without_psf",tf.reshape(reconstr, labels_shape))
  # Apply channel-wise convolution with the PSF if requested
  if hparams.apply_psf and 'psf' in features:
    output_list = []
    for i in range(shape[3]):
      output_list.append(tf.squeeze(convolve(tf.expand_dims(reconstr[...,i],-1), tf.cast(features['psf'][...,i], tf.complex64),
                          zero_padding_factor=1)))
    reconstr = tf.stack(output_list,axis=-1)
    reconstr = tf.reshape(reconstr,shape)

  # Losses.
  losses = {
      "bottleneck_extra": b_loss,
      "bottleneck_l2": hparams.bottleneck_l2_factor * xb_loss,
      "total_variation": hparams.total_variation_loss * tv,
      "apodization_loss": hparams.apodization_loss * apo_loss,
  }

  loglik = loglikelihood_fn(labels, reconstr, features, hparams)
  targets_loss = tf.reduce_mean(- loglik)

  tf.summary.scalar("negloglik", targets_loss)
  tf.summary.scalar("bottleneck_loss", b_loss)

  # Compute final loss
  losses["training"] = targets_loss + b_loss + hparams.bottleneck_l2_factor * xb_loss + hparams.total_variation_loss * tv +  hparams.apodization_loss * apo_loss
  logits = tf.reshape(reconstr, labels_shape)

  image_summary("ae", reconstr)
  image_summary("input", labels)

  return logits, losses
Example #19
0
    def estimator_model_fn(cls,
                           hparams,
                           features,
                           labels,
                           mode,
                           config=None,
                           params=None,
                           decode_hparams=None,
                           use_tpu=False):

        if mode not in [
                model_fn_lib.ModeKeys.TRAIN, model_fn_lib.ModeKeys.EVAL,
                model_fn_lib.ModeKeys.PREDICT
        ]:
            raise ValueError('Mode not recognized: %s' % mode)

        if mode is model_fn_lib.ModeKeys.TRAIN:
            is_training = True
        else:
            is_training = False

        hparams = hparams_lib.copy_hparams(hparams)

        # Instantiate model
        data_parallelism = None
        if not use_tpu and config:
            data_parallelism = config.data_parallelism
        reuse = tf.get_variable_scope().reuse

        # Instantiate model
        self = cls(hparams,
                   mode,
                   data_parallelism=data_parallelism,
                   decode_hparams=decode_hparams,
                   _reuse=reuse)

        generator_inputs = self.sample_noise()
        # rename inputs for clarity
        real_data = features['inputs']
        img_shape = common_layers.shape_list(real_data)[1:4]
        real_data.set_shape([hparams.batch_size] + img_shape)

        # To satify the TFGAN API setting real data to none on predict
        if mode == tf.estimator.ModeKeys.PREDICT:
            real_data = None

        optimizers = Optimizers(
            tf.compat.v1.train.AdamOptimizer(hparams.generator_lr,
                                             hparams.beta1),
            tf.compat.v1.train.AdamOptimizer(hparams.discriminator_lr,
                                             hparams.beta1))

        # Creates tfhub modules for both generator and discriminator
        def make_discriminator_spec():
            input_layer = tf.placeholder(tf.float32, shape=[None] + img_shape)
            disc_output = self.discriminator(input_layer, None, mode)
            hub.add_signature(inputs=input_layer, outputs=disc_output)

        disc_spec = hub.create_module_spec(make_discriminator_spec)

        def make_generator_spec():
            input_layer = tf.placeholder(
                tf.float32,
                shape=[None] + common_layers.shape_list(generator_inputs)[1:])
            gen_output = self.generator(input_layer, mode)
            hub.add_signature(inputs=input_layer, outputs=gen_output)

        gen_spec = hub.create_module_spec(make_generator_spec)

        # Create the modules
        discriminator_module = hub.Module(disc_spec,
                                          name="Discriminator_Module",
                                          trainable=True)
        generator_module = hub.Module(gen_spec,
                                      name="Generator_Module",
                                      trainable=True)

        # Wraps the modules into functions expected by TF-GAN
        def generator(code, mode):
            p = hparams
            out = generator_module(code)
            shape = common_layers.shape_list(out)
            # Applying convolution by PSF convolution
            if p.apply_psf and 'psf' in features:
                out = convolve(out,
                               tf.cast(features['psf'][..., 0], tf.complex64))

            # Adds noise according to the provided power spectrum
            noise = tf.spectral.rfft2d(tf.random_normal(out.get_shape()[:3]))
            thresholded_ps = tf.where(features['ps'] >= 9,
                                      tf.zeros_like(features['ps']),
                                      tf.sqrt(tf.exp(features['ps'])))
            noise = noise * tf.cast(thresholded_ps, tf.complex64)
            out = out + tf.expand_dims(tf.spectral.irfft2d(noise), axis=-1)
            return out

        discriminator = lambda image, conditioning, mode: discriminator_module(
            image)

        # Make GANModel, which encapsulates the GAN model architectures.
        gan_model = get_gan_model(mode,
                                  generator,
                                  discriminator,
                                  real_data,
                                  generator_inputs,
                                  add_summaries=self.summaries)

        # Make GANLoss, which encapsulates the losses.
        if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]:
            gan_loss = tfgan_train.gan_loss(gan_model,
                                            self.generator_loss,
                                            self.discriminator_loss,
                                            add_summaries=True)

        # Make the EstimatorSpec, which incorporates the GANModel, losses, eval
        # metrics, and optimizers (if required).
        if mode == tf.estimator.ModeKeys.TRAIN:
            get_hooks_fn = tfgan_train.get_sequential_train_hooks(
                namedtuples.GANTrainSteps(hparams.gen_steps,
                                          hparams.disc_steps))
            estimator_spec = get_train_estimator_spec(gan_model,
                                                      gan_loss,
                                                      optimizers,
                                                      get_hooks_fn,
                                                      is_chief=True)
        elif mode == tf.estimator.ModeKeys.EVAL:
            estimator_spec = get_eval_estimator_spec(gan_model, gan_loss)
        else:  # tf.estimator.ModeKeys.PREDICT
            # Register hub modules for export
            hub.register_module_for_export(generator_module, "generator")
            hub.register_module_for_export(discriminator_module,
                                           "discriminator")
            estimator_spec = get_predict_estimator_spec(gan_model)
        return estimator_spec
Example #20
0
def _mdn_model_fn(features,
                  labels,
                  nchannels,
                  n_y,
                  dropout,
                  optimizer,
                  mode,
                  loss,
                  softplus=False):

    # Check for training mode
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    def _module_fn():
        """
        Function building the module
        """

        feature_layer = tf.placeholder(
            tf.float32,
            shape=[None, None, None, None, nchannels],
            name='input')
        obs_layer = tf.placeholder(tf.float32,
                                   shape=[None, None, None, None, n_y],
                                   name='observations')
        shift = tf.Variable(1., dtype=tf.float32, name='shift')
        scale = tf.Variable(1., dtype=tf.float32, name='scale')

        # Builds the neural network
        # ! ny and nchannel need to be the same

        cube_size = tf.shape(feature_layer)[1]
        #         print(cube_size)
        chain = tfb.Chain([
            tfp.bijectors.Affine(shift=shift, scale_identity_multiplier=scale),
            tfb.Invert(
                Squeeze3d(event_shape_in=[
                    cube_size, cube_size, cube_size, nchannels
                ])),
            iRevNetsimple(name='layer1', h=h),
            iRevNetsimple(name='layer1b', h=h),
            iRevNetsimple(name='layer2', h=h),
            iRevNetsimple(name='layer2b', h=h),
            #tfb.Permute(np.arange(8)[::-1],axis=-1),
            tfb.Permute(np.arange(8)[::-1], axis=-1),
            iRevNetsimple(name='layer3', h=h),
            iRevNetsimple(name='layer3b', h=h),
            iRevNetsimple(name='layer4', h=h),
            iRevNetsimple(name='layer4b', h=h),
            tfb.Invert(
                Squeeze3d(event_shape_in=[
                    cube_size // 2, cube_size // 2, cube_size // 2, nchannels *
                    8
                ])),
            iRevNetsimple(name='layer5', h=h),
            iRevNetsimple(name='layer5b', h=h),
            iRevNetsimple(name='layer6', h=h),
            iRevNetsimple(name='layer6b', h=h),
            tfb.Permute(np.arange(64)[::-1], axis=-1),
            iRevNetsimple(name='layer7', h=h),
            iRevNetsimple(name='layer7b', h=h),
            iRevNetsimple(name='layer8', h=h),
            iRevNetsimple(name='layer8b', h=h),
            tfb.Invert(
                Squeeze3d(event_shape_in=[
                    cube_size // 4, cube_size // 4, cube_size // 4, nchannels *
                    64
                ])),
            iRevNetsimple(name='layer9', h=h, kernel_size=1),
            iRevNetsimple(name='layer9b', h=h, kernel_size=1),
            iRevNetsimple(name='layer10', h=h, kernel_size=1),
            iRevNetsimple(name='layer10b', h=h, kernel_size=1),
            tfb.Permute(np.arange(64 * 8)[::-1], axis=-1),
            iRevNetsimple(name='layer11', h=h, kernel_size=1),
            iRevNetsimple(name='layer11b', h=h, kernel_size=1),
            iRevNetsimple(name='layer12', h=h, kernel_size=1),
            iRevNetsimple(name='layer12b', h=h, kernel_size=1),
            Squeeze3d(event_shape_in=[
                cube_size // 4, cube_size // 4, cube_size // 4, nchannels * 64
            ]),
            iRevNetsimple(name='layer13', h=h),
            iRevNetsimple(name='layer13b', h=h),
            iRevNetsimple(name='layer14', h=h),
            iRevNetsimple(name='layer14b', h=h),
            tfb.Permute(np.arange(64)[::-1], axis=-1),
            iRevNetsimple(name='layer15', h=h),
            iRevNetsimple(name='layer15b', h=h),
            iRevNetsimple(name='layer16', h=h),
            iRevNetsimple(name='layer16b', h=h),
            Squeeze3d(event_shape_in=[
                cube_size // 2, cube_size // 2, cube_size // 2, nchannels * 8
            ]),
            iRevNetsimple(name='layer17', h=h),
            iRevNetsimple(name='layer17b', h=h),
            iRevNetsimple(name='layer18', h=h),
            iRevNetsimple(name='layer18b', h=h),
            tfb.Permute(np.arange(8)[::-1], axis=-1),
            iRevNetsimple(name='layer19', h=h),
            iRevNetsimple(name='layer19b', h=h),
            iRevNetsimple(name='layer20', h=h),
            iRevNetsimple(name='layer20b', h=h),
            Squeeze3d(
                event_shape_in=[cube_size, cube_size, cube_size, nchannels])
        ])

        bijection = chain

        # Define the probabilistic layer
        net = bijection.forward(feature_layer, name='lambda')
        if softplus:
            net = tf.nn.softplus(net, name='lambda')
        dist = tfd.Poisson(net + 1e-3)

        sample = tf.squeeze(dist.sample())
        #         loglik = dist.log_prob(obs_layer+1)
        loglik = dist.log_prob(obs_layer)

        #l2 = tf.losses.mean_squared_error(obs_layer, net)
        l2 = (tf.square(tf.subtract(obs_layer, net)))
        l1 = (tf.abs(tf.subtract(obs_layer, net)))

        hub.add_signature(inputs={
            'features': feature_layer,
            'labels': obs_layer
        },
                          outputs={
                              'sample': sample,
                              'loglikelihood': loglik,
                              'lambda': net,
                              'l2': l2,
                              'l1': l1
                          })
        #,'shift':shift, 'scale':scale})

    # Create model and register module if necessary
    spec = hub.create_module_spec(_module_fn)
    module = hub.Module(spec, trainable=True)
    if isinstance(features, dict):
        predictions = module(features, as_dict=True)
    else:
        predictions = module({
            'features': features,
            'labels': labels
        },
                             as_dict=True)

    if mode == tf.estimator.ModeKeys.PREDICT:
        hub.register_module_for_export(module, "likelihood")
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    if loss == 'loglikelihood':
        neg_log_likelihood = -predictions['loglikelihood']

    elif loss == 'l2':
        neg_log_likelihood = predictions['l2']
    elif loss == 'l1':
        neg_log_likelihood = predictions['l1']
    else:
        print('Loss not specified')

    neg_log_likelihood = tf.reduce_sum(neg_log_likelihood, axis=-1)
    neg_log_likelihood = tf.reduce_mean(neg_log_likelihood)

    tf.losses.add_loss(neg_log_likelihood)
    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    train_op = None
    eval_metric_ops = None

    # Define optimizer
    if mode == tf.estimator.ModeKeys.TRAIN:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            global_step = tf.train.get_global_step()
            boundaries = list(np.array([1e4, 2e4, 4e4, 5e4, 6e4]).astype(int))
            values = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 1e-6]
            learning_rate = tf.train.piecewise_constant(
                global_step, boundaries, values)
            train_op = optimizer(learning_rate=learning_rate).minimize(
                loss=total_loss, global_step=global_step)
            tf.summary.scalar('rate', learning_rate)
        #tf.summary.scalar('shift', predictions['shift'])
        #tf.summary.scalar('scale', predictions['scale'])
        tf.summary.scalar('loss', neg_log_likelihood)
    elif mode == tf.estimator.ModeKeys.EVAL:

        eval_metric_ops = {"log_p": neg_log_likelihood}

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=total_loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
Example #21
0
    def body(self, features):
        hparams = self.hparams
        hparamsp = hparams.problem.get_hparams()

        x = features['inputs']
        cond = {k: features[k] for k in hparamsp.attributes}

        # Load the encoder and decoder modules
        encoder = hub.Module(hparams.encoder_module, trainable=False)

        latent_shape = encoder.get_output_info_dict()['default'].get_shape(
        )[1:]
        latent_size = latent_shape[0].value * latent_shape[
            1].value * latent_shape[2].value
        code_shape = encoder.get_output_info_dict()['default'].get_shape()
        code_shape = [
            -1, code_shape[1].value, code_shape[2].value, code_shape[3].value
        ]

        def get_flow(inputs, is_training=True):
            y = tf.concat([
                tf.expand_dims(inputs[k], axis=1) for k in hparamsp.attributes
            ],
                          axis=1)
            y = tf.layers.batch_normalization(y,
                                              name="y_norm",
                                              training=is_training)
            flow = self.normalizing_flow(y, latent_size)
            return flow

        if hparams.mode == tf.estimator.ModeKeys.PREDICT:
            # Export the latent flow alone
            def flow_module_spec():
                inputs = {
                    k: tf.placeholder(tf.float32, shape=[None])
                    for k in hparamsp.attributes
                }
                random_normal = tf.placeholder(tf.float32,
                                               shape=[None, latent_size])
                print(
                    f'\n \n \n !!!!!!!!!!! {random_normal} !!!!!!!! \n \n \n')
                flow = get_flow(inputs, is_training=False)
                samples = flow._bijector.forward(random_normal)
                samples = tf.reshape(samples, code_shape)
                hub.add_signature(inputs={
                    **inputs, 'random_normal': random_normal
                },
                                  outputs=samples)

            flow_spec = hub.create_module_spec(flow_module_spec)
            flow = hub.Module(flow_spec, name='flow_module')
            hub.register_module_for_export(flow, "code_sampler")
            cond['random_normal'] = tf.random_normal(
                shape=[tf.shape(cond[hparamsp.attributes[0]])[0], latent_size])
            samples = flow(cond)
            return samples, {'loglikelihood': 0}

        # Encode the input image
        if hparams.encode_psf and 'psf' in features:
            code = encoder({'input': x, 'psf': features['psf']})
        else:
            code = encoder(x)

        with tf.variable_scope("flow_module"):
            flow = get_flow(cond)
            loglikelihood = flow.log_prob(tf.layers.flatten(code))

        # This is the loglikelihood of a batch of images
        tf.summary.scalar('loglikelihood', tf.reduce_mean(loglikelihood))
        loss = -tf.reduce_mean(loglikelihood)
        return code, {'training': loss}
Example #22
0
def _mdn_model_fn(features, labels, n_y, n_mixture, dropout, optimizer, mode):

    # Check for training mode
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    def _module_fn():
        """
        Function building the module
        """

        feature_layer = tf.placeholder(
            tf.float32,
            shape=[None, None, None, None, nchannels],
            name='input')
        obs_layer = tf.placeholder(tf.float32,
                                   shape=[None, None, None, None, n_y],
                                   name='observations')

        # Builds the neural network
        net = slim.conv3d(feature_layer,
                          16,
                          5,
                          activation_fn=tf.nn.leaky_relu,
                          padding='valid')
        #net = wide_resnet(feature_layer, 8, activation_fn=tf.nn.leaky_relu, is_training=is_training)
        net = wide_resnet(net,
                          16,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = slim.conv3d(net, 32, 3, activation_fn=tf.nn.tanh)

        # Define the probabilistic layer
        #out_rate = slim.conv3d(net, 1, 1, activation_fn=tf.nn.relu)
        #out_rate = tf.math.add(out_rate, 1e-6, name='rate')
        net = slim.conv3d(net, n_mixture * n_y, 1, activation_fn=tf.nn.relu)
        cube_size = tf.shape(obs_layer)[1]
        out_rate = tf.reshape(net, [-1, cube_size, cube_size, cube_size, n_y])
        out_rate = tf.math.add(out_rate, 1e-6, name='rate')
        pdf = tfd.Poisson(rate=out_rate)

        # Define a function for sampling, and a function for estimating the log likelihood
        sample = tf.squeeze(pdf.sample())
        loglik = pdf.log_prob(obs_layer)
        hub.add_signature(inputs={
            'features': feature_layer,
            'labels': obs_layer
        },
                          outputs={
                              'sample': sample,
                              'loglikelihood': loglik
                          })

    # Create model and register module if necessary
    spec = hub.create_module_spec(_module_fn)
    module = hub.Module(spec, trainable=True)
    if isinstance(features, dict):
        predictions = module(features, as_dict=True)
    else:
        predictions = module({
            'features': features,
            'labels': labels
        },
                             as_dict=True)

    if mode == tf.estimator.ModeKeys.PREDICT:
        hub.register_module_for_export(module, "likelihood")
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loglik = predictions['loglikelihood']
    # Compute and register loss function
    neg_log_likelihood = -tf.reduce_sum(loglik, axis=-1)
    neg_log_likelihood = tf.reduce_mean(neg_log_likelihood)

    tf.losses.add_loss(neg_log_likelihood)
    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    train_op = None
    eval_metric_ops = None

    # Define optimizer
    if mode == tf.estimator.ModeKeys.TRAIN:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            global_step = tf.train.get_global_step()
            boundaries = [10000, 20000, 30000, 40000]
            values = [0.00001, 0.000005, 0.000001, 0.0000005, 0.0000001]
            learning_rate = tf.train.piecewise_constant(
                global_step, boundaries, values)
            train_op = optimizer(learning_rate=learning_rate).minimize(
                loss=total_loss, global_step=global_step)

        tf.summary.scalar('loss', neg_log_likelihood)
        tf.summary.scalar('rate', learning_rate)
    elif mode == tf.estimator.ModeKeys.EVAL:

        eval_metric_ops = {"log_p": neg_log_likelihood}

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=total_loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
Example #23
0
def autoencoder_body(self, features):
    """ Customized body function for autoencoders acting on continuous images.
  This is based on tensor2tensor.models.research.AutoencoderBasic.body
  and should be compatible with most derived classes.

  The original autoencoder class relies on embedding the channels to a discrete
  vocabulary and defines the loss on that vocab. It's cool and all, but here we
  prefer expressing the reconstruction loss as an actual continuous likelihood
  function.
  """
    hparams = self.hparams
    is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN

    output_activation = tf.nn.softplus if hparams.output_activation == 'softplus' else None
    input_shape = [
        None,
    ] + common_layers.shape_list(features["inputs"])[1:]

    if hparams.mode == tf.estimator.ModeKeys.PREDICT:
        # In predict mode, we also define TensorFlow Hub modules for all pieces of
        # the autoencoder
        # First build encoder spec
        def make_model_spec():
            input_layer = tf.placeholder(tf.float32, shape=input_shape)
            x = self.embed(tf.expand_dims(input_layer, -1))
            x, encoder_layers = self.encoder(x)
            b, b_loss = self.bottleneck(x)
            hub.add_signature(inputs=input_layer, outputs=b)

        def make_model_spec_psf():
            input_layer = tf.placeholder(tf.float32, shape=input_shape)
            psf_layer = tf.placeholder(tf.float32, shape=input_shape)
            x = self.embed(tf.expand_dims(input_layer, -1))

            # If we have access to the PSF, we add this information to the encoder
            if hparams.encode_psf and 'psf' in features:
                net_psf = tf.layers.conv2d(psf_layer,
                                           hparams.hidden_size // 4,
                                           5,
                                           padding='same',
                                           name="psf_embed_1")
                net_psf = common_layers.layer_norm(net_psf, name="psf_norm")
                x, encoder_layers = self.encoder(
                    tf.concat([x, net_psf], axis=-1))
            else:
                x, encoder_layers = self.encoder(x)
            b, b_loss = self.bottleneck(x)
            hub.add_signature(inputs={
                'input': input_layer,
                'psf': psf_layer
            },
                              outputs=b)

        spec = hub.create_module_spec(
            make_model_spec_psf if hparams.encode_psf else make_model_spec,
            drop_collections=['checkpoints'])
        encoder = hub.Module(spec, name="encoder_module")
        hub.register_module_for_export(encoder, "encoder")

        if hparams.encode_psf:
            code = encoder({
                'input': features["inputs"],
                'psf': features['psf']
            })
        else:
            code = encoder(features["inputs"])
        b_shape = [
            None,
        ] + common_layers.shape_list(code)[1:]
        res_size = self.hparams.hidden_size * 2**self.hparams.num_hidden_layers
        res_size = min(res_size, hparams.max_hidden_size)

        # Second build decoder spec
        def make_model_spec():
            input_layer = tf.placeholder(tf.float32, shape=b_shape)
            x = self.unbottleneck(input_layer, res_size)
            x = self.decoder(x, None)
            reconstr = tf.layers.dense(x,
                                       self.num_channels,
                                       name="autoencoder_final",
                                       activation=output_activation)
            hub.add_signature(inputs=input_layer, outputs=reconstr)
            hub.attach_message(
                "stamp_size",
                tf.train.Int64List(value=[hparams.problem_hparams.img_len]))
            hub.attach_message(
                "pixel_size",
                tf.train.FloatList(
                    value=[hparams.problem_hparams.pixel_scale]))

        spec = hub.create_module_spec(make_model_spec,
                                      drop_collections=['checkpoints'])
        decoder = hub.Module(spec, name="decoder_module")
        hub.register_module_for_export(decoder, "decoder")

        reconstr = decoder(code)
        return reconstr, {"bottleneck_loss": 0.0}

    encoder_layers = None
    self.is1d = hparams.sample_width == 1
    if (hparams.mode != tf.estimator.ModeKeys.PREDICT
            or self._encode_on_predict):
        labels = features["targets_raw"]
        labels_shape = common_layers.shape_list(labels)

        shape = common_layers.shape_list(labels)
        with tf.variable_scope('encoder_module'):
            x = self.embed(tf.expand_dims(labels, -1))

        if shape[2] == 1:
            self.is1d = True

        # Run encoder.
        with tf.variable_scope('encoder_module'):
            # If we have access to the PSF, we add this information to the encoder
            if hparams.encode_psf and 'psf' in features:
                net_psf = tf.layers.conv2d(features['psf'],
                                           hparams.hidden_size // 4,
                                           5,
                                           padding='same',
                                           name="psf_embed_1")
                net_psf = common_layers.layer_norm(net_psf, name="psf_norm")
                x, encoder_layers = self.encoder(
                    tf.concat([x, net_psf], axis=-1))
            else:
                x, encoder_layers = self.encoder(x)

        # Bottleneck.
        with tf.variable_scope('encoder_module'):
            b, b_loss = self.bottleneck(x)

        xb_loss = 0.0
        b_shape = common_layers.shape_list(b)
        self._cur_bottleneck_tensor = b
        res_size = common_layers.shape_list(x)[-1]
        with tf.variable_scope('decoder_module'):
            b = self.unbottleneck(b, res_size)
        if not is_training:
            x = b
        else:
            l = 2**hparams.num_hidden_layers
            warm_step = int(hparams.bottleneck_warmup_steps * 0.25 * l)
            nomix_p = common_layers.inverse_lin_decay(warm_step) + 0.01
            if common_layers.should_generate_summaries():
                tf.summary.scalar("nomix_p_bottleneck", nomix_p)
            rand = tf.random_uniform(common_layers.shape_list(x))
            # This is the distance between b and x. Having this as loss helps learn
            # the bottleneck function, but if we back-propagated to x it would be
            # minimized by just setting x=0 and b=0 -- so we don't want too much
            # of the influence of this, and we stop-gradient to not zero-out x.
            x_stop = tf.stop_gradient(x)
            xb_loss = tf.reduce_mean(
                tf.reduce_sum(tf.squared_difference(x_stop, b), axis=-1))
            # To prevent this loss from exploding we clip at 1, but anneal clipping.
            clip_max = 1.0 / common_layers.inverse_exp_decay(warm_step,
                                                             min_value=0.001)
            xb_clip = tf.maximum(tf.stop_gradient(xb_loss), clip_max)
            xb_loss *= clip_max / xb_clip
            x = tf.where(tf.less(rand, nomix_p), b, x)
    else:
        if self._cur_bottleneck_tensor is None:
            b = self.sample()
        else:
            b = self._cur_bottleneck_tensor
        self._cur_bottleneck_tensor = b
        res_size = self.hparams.hidden_size * 2**self.hparams.num_hidden_layers
        res_size = min(res_size, hparams.max_hidden_size)

        with tf.variable_scope('decoder_module'):
            x = self.unbottleneck(b, res_size)

    # Run decoder.
    with tf.variable_scope('decoder_module'):
        x = self.decoder(x, encoder_layers)

    # Cut to the right size and mix before returning.
    res = x
    if hparams.mode != tf.estimator.ModeKeys.PREDICT:
        res = x[:, :shape[1], :shape[2], :]

    with tf.variable_scope('decoder_module'):
        reconstr = tf.layers.dense(res,
                                   self.num_channels,
                                   name="autoencoder_final",
                                   activation=output_activation)

    # Apply channel-wise convolution with the PSF if requested
    # TODO: Handle multiple bands
    if hparams.apply_psf and 'psf' in features:
        if self.num_channels > 1:
            raise NotImplementedError
        rec_padded = tf.pad(
            reconstr[:, :, :, 0],
            [[0, 0], [0, int(hparams.psf_convolution_pad_factor * shape[1])],
             [0, int(hparams.psf_convolution_pad_factor * shape[2])]])
        psf_padded = tf.pad(
            features['psf'][..., 0],
            [[0, 0], [0, int(hparams.psf_convolution_pad_factor * shape[1])],
             [0, int(hparams.psf_convolution_pad_factor * shape[2])]])
        reconstr = tf.expand_dims(tf.spectral.irfft2d(
            tf.spectral.rfft2d(rec_padded) *
            tf.cast(tf.abs(tf.spectral.rfft2d(psf_padded)), tf.complex64)),
                                  axis=-1)
        reconstr = reconstr[:, :shape[1], :shape[2], :]

    # Losses.
    losses = {
        "bottleneck_extra": b_loss,
        "bottleneck_l2": hparams.bottleneck_l2_factor * xb_loss
    }

    loglik = loglikelihood_fn(labels, reconstr, features, hparams)
    targets_loss = tf.reduce_mean(-loglik)

    tf.summary.scalar("negloglik", targets_loss)
    tf.summary.scalar("bottleneck_loss", b_loss)

    losses["training"] = targets_loss
    logits = tf.reshape(reconstr, labels_shape)

    image_summary("ae", reconstr)
    image_summary("input", labels)

    return logits, losses
def creates_estimator_model(images, labels, perms, num_classes, mode):
    """Creates EstimatorSpec for the patch based self supervised models.

  Args:
    images: images
    labels: self supervised labels (class indices)
    perms: patch permutations
    num_classes: number of different permutations
    mode: model's mode: training, eval or prediction

  Returns:
    EstimatorSpec
  """
    print('   +++ Mode: %s, images: %s, labels: %s' % (mode, images, labels))

    images = tf.reshape(images, shape=[-1] + images.get_shape().as_list()[-3:])
    if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]:
        with tf.variable_scope('module'):
            image_fn = lambda: images
            logits = apply_model(
                image_fn=image_fn,
                is_training=(mode == tf.estimator.ModeKeys.TRAIN),
                num_outputs=num_classes,
                perms=perms,
                make_signature=False)
    else:
        input_shape = utils.str2intlist(
            FLAGS.get_flag_value('serving_input_shape', 'None,None,None,3'))
        image_fn = lambda: tf.placeholder(  # pylint: disable=g-long-lambda
            shape=input_shape,
            dtype=tf.float32)

        apply_model_function = functools.partial(apply_model,
                                                 image_fn=image_fn,
                                                 num_outputs=num_classes,
                                                 perms=perms,
                                                 make_signature=True)

        tf_hub_module_spec = hub.create_module_spec(
            apply_model_function, [(utils.TAGS_IS_TRAINING, {
                'is_training': True
            }), (set(), {
                'is_training': False
            })],
            drop_collections=['summaries'])
        tf_hub_module = hub.Module(tf_hub_module_spec,
                                   trainable=False,
                                   tags=set())
        hub.register_module_for_export(tf_hub_module, export_name='module')
        logits = tf_hub_module(images)
        return make_estimator(mode, predictions=logits)

    # build loss and accuracy
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                          logits=logits)
    loss = tf.reduce_mean(loss)

    eval_metrics = (
        lambda labels, logits: {  # pylint: disable=g-long-lambda
            'accuracy':
            tf.metrics.accuracy(labels=labels,
                                predictions=tf.argmax(logits, axis=-1))
        },
        [labels, logits])
    return make_estimator(mode, loss, eval_metrics, logits)