Beispiel #1
0
plt.xlim(lims)
plt.ylim(lims)
plt.plot(lims, lims)
plt.show()




# Save model to SavedModel format
tf.saved_model.save(model, "./models_mpg")


# Convert Keras model to ConcreteFunction
full_model = tf.function(lambda x: model(x))
full_model = full_model.get_concrete_function(
    tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype))

# Get frozen ConcreteFunction
frozen_func = convert_variables_to_constants_v2(full_model)
frozen_func.graph.as_graph_def()

layers = [op.name for op in frozen_func.graph.get_operations()]
print("-" * 50)
print("Frozen model layers: ")
for layer in layers:
    print(layer)

print("-" * 50)
print("Frozen model inputs: ")
print(frozen_func.inputs)
print("Frozen model outputs: ")
Beispiel #2
0
class BMSHJ2018Model(tf.keras.Model):
    """Main model class."""
    def __init__(self, lmbda, num_filters, num_scales, scale_min, scale_max):
        super().__init__()
        self.lmbda = lmbda
        self.num_scales = num_scales
        offset = tf.math.log(scale_min)
        factor = (tf.math.log(scale_max) -
                  tf.math.log(scale_min)) / (num_scales - 1.)
        self.scale_fn = lambda i: tf.math.exp(offset + factor * i)
        self.analysis_transform = AnalysisTransform(num_filters)
        self.synthesis_transform = SynthesisTransform(num_filters)
        self.hyper_analysis_transform = HyperAnalysisTransform(num_filters)
        self.hyper_synthesis_transform = HyperSynthesisTransform(num_filters)
        self.hyperprior = tfc.NoisyDeepFactorized(batch_shape=(num_filters, ))
        self.build((None, None, None, 3))

    def call(self, x, training):
        """Computes rate and distortion losses."""
        entropy_model = tfc.LocationScaleIndexedEntropyModel(tfc.NoisyNormal,
                                                             self.num_scales,
                                                             self.scale_fn,
                                                             coding_rank=3,
                                                             compression=False)
        side_entropy_model = tfc.ContinuousBatchedEntropyModel(
            self.hyperprior, coding_rank=3, compression=False)

        y = self.analysis_transform(x)
        z = self.hyper_analysis_transform(abs(y))
        z_hat, side_bits = side_entropy_model(z, training=training)
        indexes = self.hyper_synthesis_transform(z_hat)
        y_hat, bits = entropy_model(y, indexes, training=training)
        x_hat = self.synthesis_transform(y_hat)

        # Total number of bits divided by total number of pixels.
        num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), bits.dtype)
        bpp = (tf.reduce_sum(bits) + tf.reduce_sum(side_bits)) / num_pixels
        # Mean squared error across pixels.
        mse = tf.reduce_mean(tf.math.squared_difference(x, x_hat))
        # The rate-distortion Lagrangian.
        loss = bpp + self.lmbda * mse
        return loss, bpp, mse

    def train_step(self, x):
        with tf.GradientTape() as tape:
            loss, bpp, mse = self(x, training=True)
        variables = self.trainable_variables
        gradients = tape.gradient(loss, variables)
        self.optimizer.apply_gradients(zip(gradients, variables))
        self.loss.update_state(loss)
        self.bpp.update_state(bpp)
        self.mse.update_state(mse)
        return {m.name: m.result() for m in [self.loss, self.bpp, self.mse]}

    def test_step(self, x):
        loss, bpp, mse = self(x, training=False)
        self.loss.update_state(loss)
        self.bpp.update_state(bpp)
        self.mse.update_state(mse)
        return {m.name: m.result() for m in [self.loss, self.bpp, self.mse]}

    def predict_step(self, x):
        raise NotImplementedError("Prediction API is not supported.")

    def compile(self, **kwargs):
        super().compile(
            loss=None,
            metrics=None,
            loss_weights=None,
            weighted_metrics=None,
            **kwargs,
        )
        self.loss = tf.keras.metrics.Mean(name="loss")
        self.bpp = tf.keras.metrics.Mean(name="bpp")
        self.mse = tf.keras.metrics.Mean(name="mse")

    def fit(self, *args, **kwargs):
        retval = super().fit(*args, **kwargs)
        # After training, fix range coding tables.
        self.entropy_model = tfc.LocationScaleIndexedEntropyModel(
            tfc.NoisyNormal,
            self.num_scales,
            self.scale_fn,
            coding_rank=3,
            compression=True)
        self.side_entropy_model = tfc.ContinuousBatchedEntropyModel(
            self.hyperprior, coding_rank=3, compression=True)
        return retval

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8),
    ])
    def compress(self, x):
        """Compresses an image."""
        # Add batch dimension and cast to float.
        x = tf.expand_dims(x, 0)
        x = tf.cast(x, dtype=tf.float32)
        y = self.analysis_transform(x)
        z = self.hyper_analysis_transform(abs(y))
        # Preserve spatial shapes of image and latents.
        x_shape = tf.shape(x)[1:-1]
        y_shape = tf.shape(y)[1:-1]
        z_shape = tf.shape(z)[1:-1]
        z_hat = self.side_entropy_model.quantize(z)
        indexes = self.hyper_synthesis_transform(z_hat)
        indexes = indexes[:, :y_shape[0], :y_shape[1], :]
        side_string = self.side_entropy_model.compress(z)
        string = self.entropy_model.compress(y, indexes)
        return string, side_string, x_shape, y_shape, z_shape

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(1, ), dtype=tf.string),
        tf.TensorSpec(shape=(1, ), dtype=tf.string),
        tf.TensorSpec(shape=(2, ), dtype=tf.int32),
        tf.TensorSpec(shape=(2, ), dtype=tf.int32),
        tf.TensorSpec(shape=(2, ), dtype=tf.int32),
    ])
    def decompress(self, string, side_string, x_shape, y_shape, z_shape):
        """Decompresses an image."""
        z_hat = self.side_entropy_model.decompress(side_string, z_shape)
        indexes = self.hyper_synthesis_transform(z_hat)
        indexes = indexes[:, :y_shape[0], :y_shape[1], :]
        y_hat = self.entropy_model.decompress(string, indexes)
        x_hat = self.synthesis_transform(y_hat)
        # Remove batch dimension, and crop away any extraneous padding.
        x_hat = x_hat[0, :x_shape[0], :x_shape[1], :]
        # Then cast back to 8-bit integer.
        return tf.saturate_cast(tf.round(x_hat), tf.uint8)
Beispiel #3
0
        [0, 4530, 489, 1862, 78, 4956, 1]
        -> [4530, 489, 1862, 78, 4956]
    '''
    source = input_tokens[1:-1]

    return source, label_tokens


def tf_encode(data):
    result_input, result_label = tf.py_function(encode, [data[0], data[1]],
                                                [tf.int64, tf.int64])
    return result_input, result_label


train_step_signature = [
    tf.TensorSpec(shape=(None, None), dtype=tf.int64),
    tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]


def create_padding_mask(seq):
    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)

    # add extra dimensions to add the padding
    # to the attention logits.
    return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)


def create_look_ahead_mask(size):
    mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
    return mask  # (seq_len, seq_len)
def main():
    # load data from the data files
    jpn_data = get_data(jpn_txt_path)
    en_data = get_data(en_txt_path)
    #train_jpn, val_jpn, train_en, val_en = train_test_split(jpn_data,
    #                                                        en_data,
    #                                                        test_size=TR_TE_RATIO)
    JPN_MAX_LEN = get_max_len(jpn_data)
    EN_MAX_LEN = get_max_len(en_data)
    # include [BOS] and [EOS] to each max len above
    JPN_MAX_LEN += 2
    EN_MAX_LEN += 2

    test_jpn_data = [
        "今日は夜ごはん何にしようかな?", "ここ最近暑い日がずっと続きますね。", "来年は本当にオリンピックが開催されるでしょうか?",
        "将来の夢はエンジニアになることです。", "子供のころはあの公園でたくさん遊んだなー。", "今日は早く帰りたいな。",
        "明日は父の日だ。", "試験勉強はなかなか大変です。", "来年はおいしいお店に行きたいです。",
        "あそこの家にはまだ誰か住んでいますか?"
    ]

    #test_en_data = [[""],
    #                [""],
    #                [""],
    #                [""],
    #                [""]]

    # preprocess for the train dataset
    train_dataset = tf.data.Dataset.from_tensor_slices((jpn_data, en_data))
    train_dataset = train_dataset.map(tf_encode)
    train_dataset = train_dataset.cache()
    train_dataset = train_dataset.shuffle(
        len(jpn_data)).padded_batch(BATCH_SIZE)
    train_dataset = train_dataset.prefetch(AUTOTUNE)
    ## preprocess for the validation dataset
    #val_dataset = tf.data.Dataset.from_tensor_slices((val_jpn, val_en))
    #val_dataset = val_dataset.map(tf_encode)
    #val_dataset = val_dataset.padded_batch(BATCH_SIZE)
    # preprocess for the test data
    #test_dataset = tf.data.Dataset.from_tensor_slices((test_jpn_data, test_en_data))
    #test_dataset = test_dataset.map(tf_encode)
    #test_dataset = test_dataset.cache()
    #test_dataset = test_dataset.padded_batch(len(test_jpn_data))
    #test_dataset = test_dataset.prefetch(AUTOTUNE)

    # instantiate the Transformer model
    transformer = Transformer(num_layers=num_layers,
                              d_model=d_model,
                              num_heads=num_heads,
                              dff=dff,
                              input_vocab_size=jpn_vocab_size,
                              target_vocab_size=en_vocab_size,
                              pe_input=JPN_MAX_LEN,
                              pe_target=EN_MAX_LEN)
    # set learning rate, optimizer, loss and matrics
    learning_rate = CustomSchedule(d_model)
    optimizer = Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

    loss_object = SparseCategoricalCrossentropy(from_logits=True,
                                                reduction="none")

    def loss_function(label, pred):
        mask = tf.math.logical_not(tf.math.equal(label, 0))
        loss_ = loss_object(label, pred)
        mask = tf.cast(mask, dtype=loss_.dtype)
        loss_ *= mask
        return tf.reduce_sum(loss_) / tf.reduce_sum(mask)

    train_loss = Mean(name="train_loss")
    train_accuracy = SparseCategoricalAccuracy(name="train_accuracy")
    """
    The @tf.function trace-compiles train_step into a TF graph for faster
    execution. The function specializes to the precise shape of the argument
    tensors. To avoid re-tracing due to the variable sequence lengths or
    variable batch sizes(usually the last batch is smaller), use input_signature
    to specify more generic shapes.
    """
    train_step_signature = [
        tf.TensorSpec(shape=(None, None), dtype=tf.int64),
        tf.TensorSpec(shape=(None, None), dtype=tf.int64)
    ]

    @tf.function(input_signature=train_step_signature)
    def train_step(inp, tar):
        tar_inp = tar[:, :-1]
        tar_label = tar[:, 1:]
        training = True

        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            inp, tar_inp)
        with tf.GradientTape() as tape:
            predictions, _ = transformer(inp, tar_inp, training,
                                         enc_padding_mask, combined_mask,
                                         dec_padding_mask)
            loss = loss_function(tar_label, predictions)

        gradients = tape.gradient(loss, transformer.trainable_variables)
        optimizer.apply_gradients(
            zip(gradients, transformer.trainable_variables))

        train_loss(loss)
        train_accuracy(tar_label, predictions)

    # set the checkpoint and the checkpoint manager
    ckpt = tf.train.Checkpoint(epoch=tf.Variable(0),
                               transformer=transformer,
                               optimizer=optimizer)
    ckpt_manager = tf.train.CheckpointManager(ckpt, ckpt_path, max_to_keep=5)
    # if a checkpoint exists, restore the latest checkpoint.
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        print("Latest checkpoint restored.")

    # set up summary writers
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    log_dir = os.path.join(log_path, current_time, "train")
    #test_log_dir = os.path.join(log_path, current_time, "validation")
    summary_writer = tf.summary.create_file_writer(log_dir)
    #test_summary_writer = tf.summary.create_file_writer(test_log_dir)

    for ckpt.epoch in range(EPOCHS):
        start = time.time()
        ckpt.epoch.assign_add(1)
        train_loss.reset_states()
        train_accuracy.reset_states()

        # inp: Japanese, tar: English
        for (batch, (inp, tar)) in enumerate(train_dataset):
            train_step(inp, tar)

            if batch % 100 == 0:
                print("Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}".format(
                    ckpt.epoch, batch, train_loss.result(),
                    train_accuracy.result()))

        # output the training log for every epoch
        print("Epoch {} Loss {:.4f} Accuracy {:.4f}".format(
            ckpt.epoch, train_loss.result(), train_accuracy.result()))
        print("Time taken for 1 epoch: {:.3f} secs\n".format(time.time() -
                                                             start))

        # check how the model performs for every epoch
        test_summary_log = test_translate(test_jpn_data, EN_MAX_LEN,
                                          transformer)

        with summary_writer.as_default():
            tf.summary.scalar("loss", train_loss.result(), step=ckpt.epoch)
            tf.summary.scalar("accuracy",
                              train_accuracy.result(),
                              step=ckpt.epoch)
            tf.summary.text("test_text", test_summary_log, step=ckpt.epoch)

        if (ckpt.epoch) % 5 == 0:
            ckpt_save_path = ckpt_manager.save()
            print("Saving checkpoint for epoch {} at {}".format(
                ckpt.epoch, ckpt_save_path))
 def input_spec(self):
   return collections.OrderedDict([('temp', tf.TensorSpec([None],
                                                          tf.float32))])
def _create_input_spec():
  return _Batch(
      x=tf.TensorSpec(shape=[None, 784], dtype=tf.float32),
      y=tf.TensorSpec(dtype=tf.int64, shape=[None, 1]))
Beispiel #7
0
    def load_img_and_make_heatmaps(filename, corners):
        image = tf.io.read_file(filename)
        image = tf.io.decode_jpeg(image, channels=3)
        image = tf.image.convert_image_dtype(image, tf.uint8)

        image_shape = tf.shape(image)

        # Get padding offsets
        resized_height, resized_width, padding_y, padding_x = get_padding_params(
            image_shape[0], image_shape[1], target_height, target_width)

        # Add slice index to corners
        corners = tf.reverse(corners, axis=[1])
        corners = tf.cast(corners, tf.float32)

        # Adjust corner coordinates
        corners_y = tf.cast(
            corners[:, 0] * tf.cast(
                (resized_height / image_shape[0]), tf.float32) +
            tf.cast(padding_y, tf.float32), tf.int32)
        corners_x = tf.cast(
            corners[:, 1] * tf.cast(
                (resized_width / image_shape[1]), tf.float32) +
            tf.cast(padding_x, tf.float32), tf.int32)

        corner_slice_indices = tf.constant([0, 1, 2, 3])
        corner_slice_indices = tf.expand_dims(corner_slice_indices, axis=1)
        corners = tf.stack([corners_y, corners_x], axis=1)

        # corners = tf.expand_dims(corners, axis=0)

        def create_heatmap(corner):
            # Make heatmap indices
            heatmap_y = tf.range(0, target_height)
            heatmap_x = tf.range(0, target_width)

            heatmap_indices = tf.meshgrid(heatmap_y, heatmap_x, indexing='ij')
            heatmap_indices = tf.stack(heatmap_indices, axis=2)

            sub = heatmap_indices - corner
            d = tf.norm(tf.cast(sub, tf.float32), axis=2)**2
            heatmap = tf.exp(-d / (tf.cast(sigma, tf.float32)**2))

            return heatmap

        heatmaps = tf.map_fn(create_heatmap,
                             corners,
                             fn_output_signature=tf.TensorSpec(
                                 shape=(target_height, target_width),
                                 dtype=tf.float32))

        heatmaps = tf.stack(tf.unstack(heatmaps, axis=0), axis=2)

        # Make a polygon
        corners_reorded = tf.gather(corners, [0, 1, 3, 2])
        corners_reorded = tf.reverse(corners_reorded, axis=[1])
        poly = np.zeros((target_height, target_width), np.float32)
        [
            poly,
        ] = tf.py_function(draw_poly, [poly, corners_reorded], [tf.float32])

        poly = tf.convert_to_tensor(poly)
        poly = tf.expand_dims(poly, axis=2)
        heatmaps = tf.concat([heatmaps, poly], axis=2)

        heatmaps = heatmaps / tf.reduce_max(heatmaps)
        heatmaps = tf.image.resize(heatmaps,
                                   (target_height // 2, target_width // 2))

        heatmaps = tf.gather(heatmaps, [4], axis=2)

        image = tf.image.resize_with_pad(image, target_height, target_width)

        return image, heatmaps
Beispiel #8
0
 def add_batch_size(ts):
     return tf.TensorSpec([FLAGS.inference_batch_size] + list(ts.shape),
                          ts.dtype, ts.name)
Beispiel #9
0
 def inputs(self):
     return [
         tf.TensorSpec([None, self.size, self.size, 3], tf.float32,
                       'input'),
         tf.TensorSpec([None], tf.int32, 'label')
     ]
Beispiel #10
0
class ActivityTracker(tf.Module):
    def __init__(self):
        super().__init__()

        self.num_classes = 6  # activities in the training set
        self.mapping = tf.lookup.StaticHashTable(
            tf.lookup.KeyValueTensorInitializer(
                keys=tf.range(self.num_classes, dtype=tf.int32),
                values=[
                    "Walking",
                    "Jogging",
                    "Upstairs",
                    "Downstairs",
                    "Sitting",
                    "Standing",
                ],
            ),
            "Unknown",
        )

        self.num_features = 3  # sensor (x,y,z)
        self.batch_size = 32

        # 33,Jogging,49106062271000,5.012288,11.264028,0.95342433;
        self._model = k.Sequential(
            [
                k.layers.Input(
                    shape=(1, self.num_features),
                    batch_size=self.batch_size,
                ),
                # Note the stateful=True
                k.layers.LSTM(64, stateful=True),
                k.layers.Dense(self.num_classes),
            ]
        )

        self._global_step = tf.Variable(0, dtype=tf.int32, trainable=False)
        self._optimizer = k.optimizers.SGD(learning_rate=1e-4)
        # Sparse, so we can feed the scalar and get the one hot representation
        # From logits so we can feed the unscaled (linear activation fn)
        # directly to the loss
        self._loss = k.losses.SparseCategoricalCrossentropy(from_logits=True)

        self._last_tracked_activity = tf.Variable(-1, dtype=tf.int32, trainable=False)

    @tf.function(
        input_signature=[
            tf.TensorSpec(shape=(None, 1, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(None,), dtype=tf.int32),
        ]
    )
    def learn(self, sensor_data, labels):
        # All the sensor data should be about the same activity
        tf.assert_equal(labels, tf.zeros_like(labels) + labels[0])

        # If the activity changes, we must reset the RNN state since the last update
        # and the current update are not related.

        if tf.not_equal(self._last_tracked_activity, labels[0]):
            tf.print(
                "Resetting states. Was: ",
                self._last_tracked_activity,
                " is ",
                labels[0],
            )
            self._last_tracked_activity.assign(labels[0])
            self._model.reset_states()

        self._global_step.assign_add(1)
        with tf.GradientTape() as tape:
            loss = self._loss(labels, self._model(sensor_data))
            tf.print(self._global_step, ": loss: ", loss)

        gradient = tape.gradient(loss, self._model.trainable_variables)
        self._optimizer.apply_gradients(zip(gradient, self._model.trainable_variables))
        return {"loss": loss}

    @tf.function(input_signature=[tf.TensorSpec(shape=(None, 1, 3), dtype=tf.float32)])
    def predict(self, sensor_data):
        predictions = self._model(sensor_data)
        predicted = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
        tf.print(self.mapping.lookup(predicted))
        return {"predictions": predicted}
Beispiel #11
0
def learner_loop(create_env_fn, create_agent_fn, create_optimizer_fn):
    """Main learner loop.

  Args:
    create_env_fn: Callable that must return a newly created environment. The
      callable takes the task ID as argument - an arbitrary task ID of 0 will be
      passed by the learner. The returned environment should follow GYM's API.
      It is only used for infering tensor shapes. This environment will not be
      used to generate experience.
    create_agent_fn: Function that must create a new tf.Module with the neural
      network that outputs actions and new agent state given the environment
      observations and previous agent state. See dmlab.agents.ImpalaDeep for an
      example. The factory function takes as input the environment output specs
      and the number of possible actions in the env.
    create_optimizer_fn: Function that takes the final iteration as argument
      and must return a tf.keras.optimizers.Optimizer and a
      tf.keras.optimizers.schedules.LearningRateSchedule.
  """
    logging.info('Starting learner loop')
    validate_config()
    settings = utils.init_learner(FLAGS.num_training_tpus)
    strategy, inference_devices, training_strategy, encode, decode = settings
    env = create_env_fn(0)
    env_output_specs = utils.EnvOutput(
        tf.TensorSpec([], tf.float32, 'reward'),
        tf.TensorSpec([], tf.bool, 'done'),
        tf.TensorSpec(env.observation_space.shape, env.observation_space.dtype,
                      'observation'),
    )
    action_specs = tf.TensorSpec([], tf.int32, 'action')
    num_actions = env.action_space.n
    agent_input_specs = (action_specs, env_output_specs)

    # Initialize agent and variables.
    agent = create_agent_fn(env_output_specs, num_actions)
    target_agent = create_agent_fn(env_output_specs, num_actions)
    initial_agent_state = agent.initial_state(1)
    agent_state_specs = tf.nest.map_structure(
        lambda t: tf.TensorSpec(t.shape[1:], t.dtype), initial_agent_state)
    input_ = tf.nest.map_structure(
        lambda s: tf.zeros([1] + list(s.shape), s.dtype), agent_input_specs)
    input_ = encode(input_)

    with strategy.scope():

        @tf.function
        def create_variables(*args):
            return agent(*decode(args))

        @tf.function
        def create_target_agent_variables(*args):
            return target_agent(*decode(args))

        # The first call to Keras models to create varibales for agent and target.
        initial_agent_output, _ = create_variables(input_, initial_agent_state)
        create_target_agent_variables(input_, initial_agent_state)

        @tf.function
        def update_target_agent():
            """Synchronizes training and target agent variables."""
            variables = agent.trainable_variables
            target_variables = target_agent.trainable_variables
            assert len(target_variables) == len(variables), (
                'Mismatch in number of net tensors: {} != {}'.format(
                    len(target_variables), len(variables)))
            for target_var, source_var in zip(target_variables, variables):
                target_var.assign(source_var)

        # Create optimizer.
        iter_frame_ratio = (get_replay_insertion_batch_size() *
                            FLAGS.unroll_length * FLAGS.num_action_repeats)
        final_iteration = int(
            math.ceil(FLAGS.total_environment_frames / iter_frame_ratio))
        optimizer, learning_rate_fn = create_optimizer_fn(final_iteration)

        iterations = optimizer.iterations
        optimizer._create_hypers()
        optimizer._create_slots(agent.trainable_variables)

        # ON_READ causes the replicated variable to act as independent variables for
        # each replica.
        temp_grads = [
            tf.Variable(tf.zeros_like(v),
                        trainable=False,
                        synchronization=tf.VariableSynchronization.ON_READ)
            for v in agent.trainable_variables
        ]

    @tf.function
    def minimize(iterator):
        """Computes and applies gradients.

    Args:
      iterator: An iterator of distributed dataset that produces `PerReplica`.

    Returns:
      A tuple:
        - priorities, the new priorities. Shape <float32>[batch_size].
        - indices, the indices for updating priorities. Shape
        <int32>[batch_size].
        - gradient_norm_before_clip, a scalar.
    """
        data = next(iterator)

        def compute_gradients(args):
            """A function to pass to `Strategy` for gradient computation."""
            args = decode(args, data)
            args = tf.nest.pack_sequence_as(SampledUnrolls(unroll_specs, 0, 0),
                                            args)
            with tf.GradientTape() as tape:
                # loss: [batch_size]
                # priorities: [batch_size]
                loss, priorities = compute_loss_and_priorities(
                    agent,
                    target_agent,
                    args.unrolls.agent_state,
                    args.unrolls.prev_actions,
                    args.unrolls.env_outputs,
                    args.unrolls.agent_outputs,
                    gamma=FLAGS.discounting,
                    burn_in=FLAGS.burn_in)
                loss = tf.reduce_mean(loss * args.importance_weights)
            grads = tape.gradient(loss, agent.trainable_variables)
            gradient_norm_before_clip = tf.linalg.global_norm(grads)
            if FLAGS.clip_norm:
                grads, _ = tf.clip_by_global_norm(
                    grads, FLAGS.clip_norm, use_norm=gradient_norm_before_clip)

            for t, g in zip(temp_grads, grads):
                t.assign(g)

            return loss, priorities, args.indices, gradient_norm_before_clip

        loss, priorities, indices, gradient_norm_before_clip = (
            training_strategy.experimental_run_v2(compute_gradients, (data, )))
        loss = training_strategy.experimental_local_results(loss)[0]

        def apply_gradients(loss):
            optimizer.apply_gradients(
                zip(temp_grads, agent.trainable_variables))
            return loss

        loss = strategy.experimental_run_v2(apply_gradients, (loss, ))

        # convert PerReplica to a Tensor
        if not isinstance(priorities, tf.Tensor):

            priorities = tf.reshape(tf.stack(priorities.values), [-1])
            indices = tf.reshape(tf.stack(indices.values), [-1])
            gradient_norm_before_clip = tf.reshape(
                tf.stack(gradient_norm_before_clip.values), [-1])
            gradient_norm_before_clip = tf.reduce_max(
                gradient_norm_before_clip)

        return loss, priorities, indices, gradient_norm_before_clip

    agent_output_specs = tf.nest.map_structure(
        lambda t: tf.TensorSpec(t.shape[1:], t.dtype), initial_agent_output)
    # Logging.
    summary_writer = tf.summary.create_file_writer(FLAGS.logdir,
                                                   flush_millis=20000,
                                                   max_queue=1000)

    # Setup checkpointing and restore checkpoint.

    ckpt = tf.train.Checkpoint(agent=agent,
                               target_agent=target_agent,
                               optimizer=optimizer)
    manager = tf.train.CheckpointManager(ckpt,
                                         FLAGS.logdir,
                                         max_to_keep=1,
                                         keep_checkpoint_every_n_hours=6)
    last_ckpt_time = 0  # Force checkpointing of the initial model.
    if manager.latest_checkpoint:
        logging.info('Restoring checkpoint: %s', manager.latest_checkpoint)
        ckpt.restore(manager.latest_checkpoint).assert_consumed()
        last_ckpt_time = time.time()

    server = grpc.Server([FLAGS.server_address])

    # Buffer of incomplete unrolls. Filled during inference with new transitions.
    # This only contains data from training actors.
    store = utils.UnrollStore(
        get_num_training_actors(),
        FLAGS.unroll_length,
        (action_specs, env_output_specs, agent_output_specs),
        num_overlapping_steps=FLAGS.burn_in)
    actor_run_ids = utils.Aggregator(FLAGS.num_actors,
                                     tf.TensorSpec([], tf.int64, 'run_ids'))
    info_specs = (
        tf.TensorSpec([], tf.int64, 'episode_num_frames'),
        tf.TensorSpec([], tf.float32, 'episode_returns'),
        tf.TensorSpec([], tf.float32, 'episode_raw_returns'),
    )
    actor_infos = utils.Aggregator(FLAGS.num_actors, info_specs)

    # First agent state in an unroll.
    first_agent_states = utils.Aggregator(FLAGS.num_actors, agent_state_specs)

    # Current agent state and action.
    agent_states = utils.Aggregator(FLAGS.num_actors, agent_state_specs)
    actions = utils.Aggregator(FLAGS.num_actors, action_specs)

    unroll_specs = Unroll(agent_state_specs,
                          tf.TensorSpec([], tf.float32, 'priority'),
                          *store.unroll_specs)
    # Queue of complete unrolls. Filled by the inference threads, and consumed by
    # the tf.data.Dataset thread.
    unroll_queue = utils.StructuredFIFOQueue(FLAGS.unroll_queue_max_size,
                                             unroll_specs)
    episode_info_specs = EpisodeInfo(
        *(info_specs + (tf.TensorSpec([], tf.int32, 'actor_ids'), )))
    info_queue = utils.StructuredFIFOQueue(-1, episode_info_specs)

    replay_buffer = utils.PrioritizedReplay(FLAGS.replay_buffer_size,
                                            unroll_specs,
                                            FLAGS.importance_sampling_exponent)

    def add_batch_size(ts):
        return tf.TensorSpec([FLAGS.inference_batch_size] + list(ts.shape),
                             ts.dtype, ts.name)

    inference_iteration = tf.Variable(-1)
    inference_specs = (
        tf.TensorSpec([], tf.int32, 'actor_id'),
        tf.TensorSpec([], tf.int64, 'run_id'),
        env_output_specs,
        tf.TensorSpec([], tf.float32, 'raw_reward'),
    )
    inference_specs = tf.nest.map_structure(add_batch_size, inference_specs)

    @tf.function(input_signature=inference_specs)
    def inference(actor_ids, run_ids, env_outputs, raw_rewards):
        """Agent inference.

    This evaluates the agent policy on the provided environment data (reward,
    done, observation), and store appropriate data to feed the main training
    loop.

    Args:
      actor_ids: <int32>[inference_batch_size], the actor task IDs (in range
        [0, num_tasks)).
      run_ids: <int64>[inference_batch_size], the actor run IDs. Actor
        generate a random int64 run id at startup, so this can be used to detect
        the actors jobs that restarted.
      env_outputs: Follows env_output_specs, but with the inference_batch_size
        added as first dimension. These are the actual environment outputs
        (reward, done, observation).
      raw_rewards: <float32>[inference_batch_size], representing the raw reward
        of each step.

    Returns:
      A tensor <int32>[inference_batch_size] with one action for each actor.
    """
        # Reset the actors that had their first run or crashed.
        previous_run_ids = actor_run_ids.read(actor_ids)
        actor_run_ids.replace(actor_ids, run_ids)
        reset_indices = tf.where(tf.not_equal(previous_run_ids, run_ids))[:, 0]
        actors_needing_reset = tf.gather(actor_ids, reset_indices)
        if tf.not_equal(tf.shape(actors_needing_reset)[0], 0):
            tf.print('Actors needing reset:', actors_needing_reset)
        actor_infos.reset(actors_needing_reset)
        store.reset(
            tf.gather(actors_needing_reset,
                      tf.where(is_training_actor(actors_needing_reset))[:, 0]))
        initial_agent_states = agent.initial_state(
            tf.shape(actors_needing_reset)[0])
        first_agent_states.replace(actors_needing_reset, initial_agent_states)
        agent_states.replace(actors_needing_reset, initial_agent_states)
        actions.reset(actors_needing_reset)

        # Update steps and return.
        actor_infos.add(actor_ids, (0, env_outputs.reward, raw_rewards))
        done_ids = tf.gather(actor_ids, tf.where(env_outputs.done)[:, 0])
        done_episodes_info = actor_infos.read(done_ids)
        info_queue.enqueue_many(
            EpisodeInfo(*(done_episodes_info + (done_ids, ))))
        actor_infos.reset(done_ids)
        actor_infos.add(actor_ids, (FLAGS.num_action_repeats, 0., 0.))

        # Inference.
        prev_actions = actions.read(actor_ids)
        input_ = encode((prev_actions, env_outputs))
        prev_agent_states = agent_states.read(actor_ids)

        def make_inference_fn(inference_device):
            def device_specific_inference_fn():
                with tf.device(inference_device):

                    @tf.function
                    def agent_inference(*args):
                        return agent(*decode(args))

                    return agent_inference(input_, prev_agent_states)

            return device_specific_inference_fn

        # Distribute the inference calls among the inference cores.
        branch_index = inference_iteration.assign_add(1) % len(
            inference_devices)
        agent_outputs, curr_agent_states = tf.switch_case(
            branch_index, {
                i: make_inference_fn(inference_device)
                for i, inference_device in enumerate(inference_devices)
            })

        agent_outputs = agent_outputs._replace(action=apply_epsilon_greedy(
            agent_outputs.action, actor_ids, get_num_training_actors(),
            FLAGS.num_eval_actors, FLAGS.eval_epsilon, num_actions))

        # Append the latest outputs to the unroll, only for experience coming from
        # training actors (IDs < num_training_actors), and insert completed unrolls
        # in queue.
        # <int64>[num_training_actors]
        training_indices = tf.where(is_training_actor(actor_ids))[:, 0]
        training_actor_ids = tf.gather(actor_ids, training_indices)
        training_prev_actions, training_env_outputs, training_agent_outputs = (
            tf.nest.map_structure(lambda s: tf.gather(s, training_indices),
                                  (prev_actions, env_outputs, agent_outputs)))

        append_to_store = (training_prev_actions, training_env_outputs,
                           training_agent_outputs)
        completed_ids, completed_unrolls = store.append(
            training_actor_ids, append_to_store)
        _, unrolled_env_outputs, unrolled_agent_outputs = completed_unrolls
        unrolled_agent_states = first_agent_states.read(completed_ids)

        # Only use the suffix of the unrolls that is actually used for training. The
        # prefix is only used for burn-in of agent state at training time.
        _, agent_outputs_suffix = utils.split_structure(
            utils.make_time_major(unrolled_agent_outputs), FLAGS.burn_in)
        _, env_outputs_suffix = utils.split_structure(
            utils.make_time_major(unrolled_env_outputs), FLAGS.burn_in)
        _, initial_priorities = compute_loss_and_priorities_from_agent_outputs(
            # We don't use the outputs from a separated target network for computing
            # initial priorities.
            agent_outputs_suffix,
            agent_outputs_suffix,
            env_outputs_suffix,
            agent_outputs_suffix,
            gamma=FLAGS.discounting)

        unrolls = Unroll(unrolled_agent_states, initial_priorities,
                         *completed_unrolls)
        unroll_queue.enqueue_many(unrolls)
        first_agent_states.replace(completed_ids,
                                   agent_states.read(completed_ids))

        # Update current state.
        agent_states.replace(actor_ids, curr_agent_states)
        actions.replace(actor_ids, agent_outputs.action)

        # Return environment actions to actors.
        return agent_outputs.action

    with strategy.scope():
        server.bind(inference, batched=True)
    server.start()

    # Execute learning and track performance.
    with summary_writer.as_default(), \
      concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        log_future = executor.submit(lambda: None)  # No-op future.
        tf.summary.experimental.set_step(iterations * iter_frame_ratio)
        dataset = create_dataset(unroll_queue, replay_buffer,
                                 training_strategy, FLAGS.batch_size,
                                 FLAGS.priority_exponent, encode)
        it = iter(dataset)

        last_num_env_frames = iterations * iter_frame_ratio
        last_log_time = time.time()
        max_gradient_norm_before_clip = 0.
        while iterations < final_iteration:
            num_env_frames = iterations * iter_frame_ratio
            tf.summary.experimental.set_step(num_env_frames)

            if iterations.numpy() % FLAGS.update_target_every_n_step == 0:
                update_target_agent()

            # Save checkpoint.
            current_time = time.time()
            if current_time - last_ckpt_time >= FLAGS.save_checkpoint_secs:
                manager.save()
                last_ckpt_time = current_time

            def log(num_env_frames):
                """Logs actor summaries."""
                summary_writer.set_as_default()
                tf.summary.experimental.set_step(num_env_frames)
                episode_info = info_queue.dequeue_many(info_queue.size())
                for n, r, _, actor_id in zip(*episode_info):
                    is_training = is_training_actor(actor_id)
                    logging.info(
                        'Return: %f Frames: %i Actor id: %i (%s) Iteration: %i',
                        r, n, actor_id, 'training' if is_training else 'eval',
                        iterations.numpy())
                    if not is_training:
                        tf.summary.scalar('eval/episode_return', r)
                        tf.summary.scalar('eval/episode_frames', n)

            log_future.result()  # Raise exception if any occurred in logging.
            log_future = executor.submit(log, num_env_frames)

            _, priorities, indices, gradient_norm = minimize(it)

            replay_buffer.update_priorities(indices, priorities)
            # Max of gradient norms (before clipping) since last tf.summary export.
            max_gradient_norm_before_clip = max(gradient_norm.numpy(),
                                                max_gradient_norm_before_clip)
            if current_time - last_log_time >= 120:
                df = tf.cast(num_env_frames - last_num_env_frames, tf.float32)
                dt = time.time() - last_log_time
                tf.summary.scalar('num_environment_frames/sec (actors)',
                                  df / dt)
                tf.summary.scalar('num_environment_frames/sec (learner)',
                                  df / dt * FLAGS.replay_ratio)

                tf.summary.scalar('learning_rate',
                                  learning_rate_fn(iterations))
                tf.summary.scalar('replay_buffer_num_inserted',
                                  replay_buffer.num_inserted)
                tf.summary.scalar('unroll_queue_size', unroll_queue.size())

                last_num_env_frames, last_log_time = num_env_frames, time.time(
                )
                tf.summary.histogram('updated_priorities', priorities)
                tf.summary.scalar('max_gradient_norm_before_clip',
                                  max_gradient_norm_before_clip)
                max_gradient_norm_before_clip = 0.

    manager.save()
    server.shutdown()
    unroll_queue.close()
Beispiel #12
0
class CtcModel(tf.keras.Model):
    def __init__(self,
                 base_model: tf.keras.Model,
                 num_classes: int,
                 name="ctc_model",
                 **kwargs):
        super(CtcModel, self).__init__(name=name, **kwargs)
        self.base_model = base_model
        # Fully connected layer
        self.fc = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(
            units=num_classes, activation="linear", use_bias=True),
                                                  name="fully_connected")

    def _build(self, sample_shape):
        features = tf.random.normal(shape=sample_shape)
        self(features, training=False)

    def summary(self, line_length=None, **kwargs):
        self.base_model.summary(line_length=line_length, **kwargs)
        super(CtcModel, self).summary(line_length, **kwargs)

    def add_featurizers(self, speech_featurizer: TFSpeechFeaturizer,
                        text_featurizer: TextFeaturizer):
        self.speech_featurizer = speech_featurizer
        self.text_featurizer = text_featurizer

    @tf.function(experimental_relax_shapes=True)
    def call(self, inputs, training=False, **kwargs):
        outputs = self.base_model(inputs, training=training)
        outputs = self.fc(outputs, training=training)
        return outputs

    @tf.function(experimental_relax_shapes=True,
                 input_signature=[
                     tf.TensorSpec([None, None, None, None], dtype=tf.float32)
                 ])
    def recognize(self, features):
        logits = self.call(features, training=False)
        probs = tf.nn.softmax(logits)

        def map_fn(prob):
            return tf.numpy_function(self.perform_greedy,
                                     inp=[prob],
                                     Tout=tf.string)

        return tf.map_fn(map_fn, probs, dtype=tf.string)

    def perform_greedy(self, probs: np.ndarray):
        decoded = ctc_greedy_decoder(
            probs, vocabulary=self.text_featurizer.vocab_array)
        return tf.convert_to_tensor(decoded, dtype=tf.string)

    @tf.function(experimental_relax_shapes=True,
                 input_signature=[tf.TensorSpec([None], dtype=tf.float32)])
    def recognize_tflite(self, signal):
        """
        Function to convert to tflite using greedy decoding
        Args:
            signal: tf.Tensor with shape [None] indicating a single audio signal

        Return:
            transcript: tf.Tensor of Unicode Code Points with shape [None] and dtype tf.int32
        """
        signal = tf.expand_dims(signal, axis=0)
        features = self.speech_featurizer.tf_extract(signal)
        input_length = shape_list(features)[1]
        input_length = input_length // self.base_model.time_reduction_factor
        input_length = tf.expand_dims(input_length, axis=0)
        logits = self.call(features, training=False)
        probs = tf.nn.softmax(logits)
        decoded = tf.keras.backend.ctc_decode(y_pred=probs,
                                              input_length=input_length,
                                              greedy=True)
        transcript = self.text_featurizer.index2upoints(
            tf.cast(decoded[0][0], dtype=tf.int32))
        return tf.squeeze(transcript, axis=0)

    @tf.function(experimental_relax_shapes=True,
                 input_signature=[
                     tf.TensorSpec([None, None, None, None], dtype=tf.float32),
                     tf.TensorSpec([], dtype=tf.bool)
                 ])
    def recognize_beam(self, features, lm=False):
        logits = self.call(features, training=False)
        probs = tf.nn.softmax(logits)

        def map_fn(prob):
            return tf.numpy_function(self.perform_beam_search,
                                     inp=[prob, lm],
                                     Tout=tf.string)

        return tf.map_fn(map_fn, probs, dtype=tf.string)

    def perform_beam_search(self, probs: np.ndarray, lm: bool = False):
        decoded = ctc_beam_search_decoder(
            probs_seq=probs,
            vocabulary=self.text_featurizer.vocab_array,
            beam_size=self.text_featurizer.decoder_config["beam_width"],
            ext_scoring_func=self.text_featurizer.scorer if lm else None)
        decoded = decoded[0][-1]

        return tf.convert_to_tensor(decoded, dtype=tf.string)

    @tf.function(experimental_relax_shapes=True,
                 input_signature=[tf.TensorSpec([None], dtype=tf.float32)])
    def recognize_beam_tflite(self, signal):
        signal = tf.expand_dims(signal, axis=0)
        features = self.speech_featurizer.tf_extract(signal)
        input_length = shape_list(features)[1]
        input_length = input_length // self.base_model.time_reduction_factor
        input_length = tf.expand_dims(input_length, axis=0)
        logits = self.call(features, training=False)
        probs = tf.nn.softmax(logits)
        decoded = tf.keras.backend.ctc_decode(
            y_pred=probs,
            input_length=input_length,
            greedy=False,
            beam_width=self.text_featurizer.decoder_config["beam_width"])
        transcript = self.text_featurizer.index2upoints(
            tf.cast(decoded[0][0], dtype=tf.int32))
        return tf.squeeze(transcript, axis=0)

    def get_config(self):
        config = self.base_model.get_config()
        config.update(self.fc.get_config())
        return config
Beispiel #13
0
 def test_tf_tensorspec(self):
     s = tf.TensorSpec([None, 3], dtype=tf.float32)
     t = computation_types.to_type(s)
     self.assertIsInstance(t, computation_types.TensorType)
     self.assertEqual(str(t), 'float32[?,3]')
Beispiel #14
0
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)
tf.config.optimizer.set_jit(config.enable_jit)

(train_output_sequence_writer, _, _) = create_tensorboard_parms()
train_loss, train_accuracy = get_loss_and_accuracy()
optimizer = get_optimizer()
optimizer = mixed_precision.LossScaleOptimizer(optimizer, loss_scale='dynamic')
avg_task_score = tf.keras.metrics.Mean(name='avg_task_score')
avg_bert_score = tf.keras.metrics.Mean(name='bert_f1_mean')
batch_zero = 'Time taken to feed the input data to the model {} seconds'
batch_run_details = 'Train_Loss {:.4f} Train_Accuracy {:.4f}'
gradient_accumulators = []

train_step_signature = [
    tf.TensorSpec(shape=(None, None), dtype=tf.int32),
    tf.TensorSpec(shape=(None, None), dtype=tf.int32),
    tf.TensorSpec(shape=(None), dtype=tf.bool)
]

val_step_signature = [
    tf.TensorSpec(shape=(None, None), dtype=tf.int32),
    tf.TensorSpec(shape=(None, None), dtype=tf.int32),
    tf.TensorSpec(shape=(None), dtype=tf.string),
    tf.TensorSpec(shape=(None), dtype=tf.bool)
]


@tf.function(input_signature=train_step_signature)
def train_step(input_ids, target_ids, grad_accum_flag):
Beispiel #15
0
    def test_mi_estimator(self,
                          estimator='DV',
                          sampler='buffer',
                          rho=0.9,
                          eps=1000.0,
                          buffer_size=65536,
                          dim=20):
        mi_estimator = MIEstimator(
            x_spec=[
                tf.TensorSpec(shape=(dim // 3, ), dtype=tf.float32),
                tf.TensorSpec(shape=(dim - dim // 3, ), dtype=tf.float32)
            ],
            y_spec=[
                tf.TensorSpec(shape=(dim // 2, ), dtype=tf.float32),
                tf.TensorSpec(shape=(dim // 2, ), dtype=tf.float32)
            ],
            fc_layers=(512, ),
            buffer_size=buffer_size,
            estimator_type=estimator,
            sampler=sampler,
            averager=ScalarAdaptiveAverager(),
            optimizer=tf.optimizers.Adam(learning_rate=1e-4))

        a = 0.5 * (math.sqrt(1 + rho) + math.sqrt(1 - rho))
        b = 0.5 * (math.sqrt(1 + rho) - math.sqrt(1 - rho))
        # This matrix transforms standard Gaussian to a Gaussian with variance
        # [[1, rho], [rho, 1]]
        w = tf.constant([[a, b], [b, a]], dtype=tf.float32)
        var = tf.matmul(w, w)
        entropy = 0.5 * tf.math.log(tf.linalg.det(2 * math.pi * math.e * var))
        entropy_x = 0.5 * tf.math.log(2 * math.pi * math.e * var[0, 0])
        entropy_y = 0.5 * tf.math.log(2 * math.pi * math.e * var[1, 1])
        mi = float(dim * (entropy_x + entropy_y - entropy))

        def _get_batch(batch_size):
            xy = tf.random.normal(shape=(batch_size * dim, 2))
            xy = tf.matmul(xy, w)
            x = xy[:, 0]
            y = xy[:, 1]
            x = tf.reshape(x, (-1, dim))
            y = tf.reshape(y, (-1, dim))
            x = [x[..., :dim // 3], x[..., dim // 3:]]
            y = [y[..., :dim // 2], y[..., dim // 2:]]
            return x, y

        def _calc_estimated_mi(i, mi_samples):
            estimated_mi, var = tf.nn.moments(mi_samples, axes=(0, ))
            estimated_mi = float(estimated_mi)
            # For DV estimator, the following std is an approximated std.
            logging.info(
                "%s estimated mi=%s std=%s" %
                (i, estimated_mi, math.sqrt(var / mi_samples.shape[0])))
            return estimated_mi

        batch_size = 512
        info = "mi=%s estimator=%s buffer_size=%s sampler=%s dim=%s" % (
            float(mi), estimator, buffer_size, sampler, dim)

        @tf.function
        def _train():
            x, y = _get_batch(batch_size)
            with tf.GradientTape() as tape:
                alg_step = mi_estimator.train_step((x, y))
            mi_estimator.train_complete(tape, alg_step.info)
            return alg_step

        for i in range(5000):
            alg_step = _train()
            if i % 1000 == 0:
                _calc_estimated_mi(i, alg_step.outputs)
        x, y = _get_batch(16384)
        log_ratio = mi_estimator.calc_pmi(x, y)
        estimated_mi = _calc_estimated_mi(info, log_ratio)
        if estimator == 'JSD':
            self.assertAlmostEqual(estimated_mi, mi, delta=eps)
        else:
            self.assertLess(estimated_mi, mi)
            self.assertGreater(estimated_mi, mi - eps)
        return mi, estimated_mi
Beispiel #16
0
    def get_feature_signatures(self):
#         names = self.get_feature_names()
        shapes = self.get_feature_shapes()
        types = self.get_feature_types()
        sig = {key: tf.TensorSpec(shape=val, dtype=types[key], name=self.key_map[key]) for key,val in shapes.items()}
        return sig
Beispiel #17
0
class GGNN(tf.keras.layers.Layer):
    def __init__(self, model_config, shared_embedding=None, vocab_dim=None):
        super(GGNN, self).__init__()
        self.num_edge_types = model_config['num_edge_types']
        # The main GGNN configuration is provided as a list of 'time-steps', which describes how often each layer is
        # repeated.
        # E.g., an 8-step GGNN with 4 distinct layers repeated 3 and 1 times alternatingly can represented as [3, 1,
        # 3, 1]
        self.time_steps = model_config['time_steps']
        self.num_layers = len(self.time_steps)
        # The residuals index in the time-steps above offset by one (index 0 refers to the node embeddings).
        # They describe short-cuts formatted as receiving layer: [sending layer] entries, e.g., {1: [0], 3: [0, 1]}
        self.residuals = model_config['residuals']
        self.hidden_dim = model_config['hidden_dim']
        self.add_type_bias = model_config['add_type_bias']
        self.dropout_rate = model_config['dropout_rate']

        # Initialize embedding variable in constructor to allow reuse by other models
        if shared_embedding is not None:
            self.embed = shared_embedding
        elif vocab_dim is None:
            raise ValueError(
                'Pass either a vocabulary dimension or an embedding Variable')
        else:
            random_init = tf.random_normal_initializer(
                stddev=self.hidden_dim**-0.5)
            self.embed = tf.Variable(random_init([vocab_dim, self.hidden_dim]),
                                     dtype=tf.float32)

    def build(self, _):
        # Small util functions
        random_init = tf.random_normal_initializer(
            stddev=self.hidden_dim**-0.5)

        def make_weight(name=None):
            return tf.Variable(random_init([self.hidden_dim, self.hidden_dim]),
                               name=name)

        def make_bias(name=None):
            return tf.Variable(random_init([self.hidden_dim]), name=name)

        # Set up type-transforms and GRUs
        self.type_weights = [[
            make_weight('type-' + str(j) + '-' + str(i))
            for i in range(self.num_edge_types)
        ] for j in range(self.num_layers)]
        self.type_biases = [[
            make_bias('bias-' + str(j) + '-' + str(i))
            for i in range(self.num_edge_types)
        ] for j in range(self.num_layers)]
        self.rnns = [
            tf.keras.layers.GRUCell(self.hidden_dim)
            for _ in range(self.num_layers)
        ]
        for ix, rnn in enumerate(self.rnns):
            # Initialize the GRUs input dimension based on whether any residuals will be passed in.
            if str(ix) in self.residuals:
                rnn.build(self.hidden_dim * (1 + len(self.residuals[str(ix)])))
            else:
                rnn.build(self.hidden_dim)

    # Assume 'inputs' is an embedded batched sequence, 'edge_ids' is a sparse list of indices formatted as: [
    # edge_type, batch_index, source_index, target_index].
    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, None, None), dtype=tf.float32),
        tf.TensorSpec(shape=(None, 4), dtype=tf.int32),
        tf.TensorSpec(shape=(), dtype=tf.bool)
    ])
    def call(self, states, edge_ids, training):
        # Collect some basic details about the graphs in the batch.
        edge_type_ids = tf.dynamic_partition(edge_ids[:, 1:], edge_ids[:, 0],
                                             self.num_edge_types)
        message_sources = [type_ids[:, 0:2] for type_ids in edge_type_ids]
        message_targets = [
            tf.stack([type_ids[:, 0], type_ids[:, 2]], axis=1)
            for type_ids in edge_type_ids
        ]

        # Initialize the node_states with embeddings; then, propagate through layers and number of time steps for
        # each layer.
        layer_states = [states]
        for layer_no, steps in enumerate(self.time_steps):
            for step in range(steps):
                if str(layer_no) in self.residuals:
                    residuals = [
                        layer_states[ix]
                        for ix in self.residuals[str(layer_no)]
                    ]
                else:
                    residuals = None
                new_states = self.propagate(layer_states[-1],
                                            layer_no,
                                            edge_type_ids,
                                            message_sources,
                                            message_targets,
                                            residuals=residuals)
                if training:
                    new_states = tf.nn.dropout(new_states,
                                               rate=self.dropout_rate)
                # Add or overwrite states for this layer number, depending on the step.
                if step == 0:
                    layer_states.append(new_states)
                else:
                    layer_states[-1] = new_states
        # Return the final layer state.
        return layer_states[-1]

    def propagate(self,
                  in_states,
                  layer_no,
                  edge_type_ids,
                  message_sources,
                  message_targets,
                  residuals=None):
        # Collect messages across all edge types.
        messages = tf.zeros_like(in_states)
        for type_index in range(self.num_edge_types):
            type_ids = edge_type_ids[type_index]
            if tf.shape(type_ids)[0] == 0:
                continue
            # Retrieve source states and compute type-transformation.
            edge_source_states = tf.gather_nd(in_states,
                                              message_sources[type_index])
            type_messages = tf.matmul(edge_source_states,
                                      self.type_weights[layer_no][type_index])
            if self.add_type_bias:
                type_messages += self.type_biases[layer_no][type_index]
            messages = tf.tensor_scatter_nd_add(messages,
                                                message_targets[type_index],
                                                type_messages)

        # Concatenate residual messages, if applicable.
        if residuals is not None:
            messages = tf.concat(residuals + [messages], axis=2)

        # Run GRU for each node.
        new_states, _ = self.rnns[layer_no](messages,
                                            tf.expand_dims(in_states, 0))
        return new_states[0]

    # Embed inputs. Note, does not add positional encoding.
    @tf.function(
        input_signature=[tf.TensorSpec(shape=(None, None), dtype=tf.int32)])
    def embed_inputs(self, inputs):
        states = tf.nn.embedding_lookup(self.embed, inputs)
        states *= tf.math.sqrt(tf.cast(tf.shape(states)[-1], 'float32'))
        return states
Beispiel #18
0
 def get_target_signature(self):
     shapes = self.get_target_shape()
     types = self.get_target_type()
     names = self.get_target_name()
     sig = tf.TensorSpec(shape=shapes, dtype=types, name=names)
     return sig
Beispiel #19
0
X_3 = GlobalSumPool()([X_2, I_in])
output = Dense(n_out, activation="sigmoid")(X_3)

# Build model
model = Model(inputs=[X_in, A_in, E_in, I_in], outputs=output)
opt = Adam(lr=learning_rate)
loss_fn = BinaryCrossentropy()


################################################################################
# FIT MODEL
################################################################################
@tf.function(
    input_signature=(
        (
            tf.TensorSpec((None, F), dtype=tf.float64),
            tf.SparseTensorSpec((None, None), dtype=tf.int64),
            tf.TensorSpec((None, S), dtype=tf.float64),
            tf.TensorSpec((None,), dtype=tf.int64),
        ),
        tf.TensorSpec((None, n_out), dtype=tf.float64),
    ),
    experimental_relax_shapes=True,
)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions)
        loss += sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    opt.apply_gradients(zip(gradients, model.trainable_variables))
    def test_parse_tflite_graph(self):
        def func(a, b, c):
            alpha = tf.constant(1.1, dtype=tf.float32)
            beta = tf.constant(2.3, dtype=tf.float32)
            mul1 = tf.multiply(alpha, tf.matmul(a, b))
            mul2 = tf.multiply(beta, c)
            x_ = mul1 + mul2
            return tf.identity(x_, name="output")

        inp_shapes = [[2, 3], [3, 1], [2, 1]]
        inp_dtypes = [tf.float32, tf.float32, tf.float32]
        names = ['a', 'b', 'c']
        names_with_port = ['a:0', 'b:0', 'c:0']
        output_names = ['output']
        output_names_with_port = ['output:0']

        input_tensors = [
            tf.TensorSpec(shape=s, dtype=d, name=n)
            for s, d, n in zip(inp_shapes, inp_dtypes, names)
        ]

        concrete_func = tf.function(func, input_signature=tuple(input_tensors))
        concrete_func = concrete_func.get_concrete_function()
        graph_def = from_function(concrete_func,
                                  input_names=names_with_port,
                                  output_names=output_names_with_port)
        with tf_session() as sess:
            tf.import_graph_def(graph_def, name='')
            sess_inputs = [
                sess.graph.get_tensor_by_name(k) for k in names_with_port
            ]
            sess_outputs = [
                sess.graph.get_tensor_by_name(n)
                for n in output_names_with_port
            ]
            converter = tf.compat.v1.lite.TFLiteConverter.from_session(
                sess, sess_inputs, sess_outputs)

        tflite_model = converter.convert()
        tflite_path = os.path.join(self.test_data_directory,
                                   self._testMethodName + ".tflite")
        dir_name = os.path.dirname(tflite_path)
        tflite_model = converter.convert()
        os.makedirs(dir_name, exist_ok=True)
        with open(tflite_path, 'wb') as f:
            f.write(tflite_model)

        tflite_graphs, opcodes_map, model = read_tflite_model(tflite_path)
        self.assertEqual(1, len(tflite_graphs))
        onnx_nodes, op_cnt, attr_cnt, output_shapes, dtypes, inputs, outputs, _ = \
            parse_tflite_graph(tflite_graphs[0], opcodes_map, model)
        self.assertEqual(2, op_cnt['MUL'])
        self.assertEqual(1, op_cnt['ADD'])
        self.assertEqual(1, op_cnt['FULLY_CONNECTED'])

        self.assertEqual(1, attr_cnt['WeightsFormat'])
        self.assertEqual(names, inputs)
        self.assertEqual(output_names, outputs)

        for name, shape, dtype in zip(names, inp_shapes, inp_dtypes):
            self.assertEqual(shape, output_shapes[name])
            self.assertEqual(dtype, dtypes[name])

        self.assertTrue(len(onnx_nodes) >= 4)
Beispiel #21
0
 def input_signature(self):
   return {
       "tensor": tf.TensorSpec([None, None, self.input_depth], self.dtype),
       "length": tf.TensorSpec([None], tf.int32)
   }
Beispiel #22
0
    return grads, loss, predictions


@tf.function
def train_step(images, labels):
    gradients, loss, predictions = get_grads(images, labels)

    gradients = [g.accumulate() for g in gradients]
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    return loss.reduce_mean()


@smp.step(input_signature=[
    tf.TensorSpec(shape=[8, 1], dtype=tf.float32),
    tf.TensorSpec(shape=[8, 1], dtype=tf.float32),
])
def test_step(images, labels):
    predictions = model(images, training=False)
    t_loss = loss_object(labels, predictions)
    t_loss = tf.reduce_mean(t_loss)

    return t_loss


test_variable = tf.Variable(0.0)

checkpoint_directory = "/tmp/tf2_ckpt_test/"

checkpoint = tf.train.Checkpoint(optimizer=optimizer,
Beispiel #23
0
import typing as _ty

import tensorflow as _tf
import tf_agents as _tfa
from tf_agents.typing import types as _types

# used when scalar-tensor or plain scalars are allowed
Int = _types.Int
Float = _types.Float

Seed = _types.Seed

Tensor = _tf.Tensor
TensorSpec = _tf.TensorSpec

Nested = _types.Nested
NestedTensor = _types.NestedTensor
NestedTensorSpec = _types.NestedTensorSpec
NestedText = Nested[_ty.Text, 'NestedText']

TimeStep = _tfa.trajectories.time_step.TimeStep
StepType = _tfa.trajectories.time_step.StepType
PolicyStep = _tfa.trajectories.policy_step.PolicyStep
LossInfo = _tfa.agents.tf_agent.LossInfo  # namedtuple(["loss", "extra"])

scalar_tensor_spec = _tf.TensorSpec(shape=(), )

# to represent rectangular shape boundaries
Shape = _ty.Union[Tensor, TensorSpec, _tf.TensorShape]
Beispiel #24
0
 def inputs(self):
     return [
         tf.TensorSpec([args.batch, INPUT_SHAPE, INPUT_SHAPE, 3],
                       IMAGE_DTYPE, 'input'),
         tf.TensorSpec([args.batch], tf.int32, 'label')
     ]
def _create_input_spec():
    return collections.OrderedDict(x=tf.TensorSpec(shape=[None, 784],
                                                   dtype=tf.float32),
                                   y=tf.TensorSpec(dtype=tf.int64,
                                                   shape=[None, 1]))
Beispiel #26
0
    def build(self, tensor_shapes: GNNInput):
        """Build the various layers in the model.

        Args:
            tensor_shapes: A GNNInput of tensor shapes.

        Returns:
            Nothing, but initialises the layers in the model based on the tensor shapes given.
        """
        # First, we go through the input shapes and make sure that anything which might vary batch
        # to batch (number of nodes / number of edges) is set to None.
        initial_node_features_shape: tf.TensorShape = tensor_shapes.node_features
        variable_node_features_shape = tf.TensorShape(
            (None, initial_node_features_shape[1]))
        adjacency_list_shapes = tensor_shapes.adjacency_lists
        embedded_shape = tf.TensorShape((None, self._hidden_dim))

        with tf.name_scope(f"{self._message_passing_class.__name__}_GNN"):
            # Then we construct the layers themselves:
            with tf.name_scope("gnn_initial_node_projection"):
                self._initial_projection_layer = tf.keras.layers.Dense(
                    units=self._hidden_dim,
                    use_bias=False,
                    activation=self._initial_node_representation_activation_fn,
                )
                self._initial_projection_layer.build(
                    variable_node_features_shape)

            # Construct the graph message passing layers.
            for layer_idx in range(self._num_layers):
                with tf.name_scope(f"Layer_{layer_idx}"):
                    with tf.name_scope("MessagePassing"):
                        self._mp_layers.append(
                            self._message_passing_class(self._params))
                        self._mp_layers[-1].build(
                            MessagePassingInput(embedded_shape,
                                                adjacency_list_shapes))

                    # If required, prepare for a LayerNorm:
                    if self._use_inter_layer_layernorm:
                        with tf.name_scope(f"LayerNorm"):
                            self._inter_layer_layernorms.append(
                                tf.keras.layers.LayerNormalization())
                            self._inter_layer_layernorms[-1].build(
                                embedded_shape)

                    # Construct the per-node dense layers.
                    if layer_idx % self._dense_every_num_layers == 0:
                        with tf.name_scope(f"Dense"):
                            self._dense_layers[str(
                                layer_idx)] = tf.keras.layers.Dense(
                                    units=self._hidden_dim,
                                    use_bias=False,
                                    activation=self.
                                    _dense_intermediate_layer_activation_fn,
                                )
                            self._dense_layers[str(layer_idx)].build(
                                embedded_shape)

                    if (layer_idx and layer_idx %
                            self._global_exchange_every_num_layers == 0):
                        with tf.name_scope(f"Global_Exchange"):
                            if self._global_exchange_mode.lower() == "mean":
                                exchange_layer_class = GraphGlobalMeanExchange
                            elif self._global_exchange_mode.lower() == "gru":
                                exchange_layer_class = GraphGlobalGRUExchange
                            elif self._global_exchange_mode.lower() == "mlp":
                                exchange_layer_class = GraphGlobalMLPExchange
                            exchange_layer = exchange_layer_class(
                                hidden_dim=self._hidden_dim,
                                weighting_fun=self.
                                _global_exchange_weighting_fun,
                                num_heads=self._global_exchange_num_heads,
                                dropout_rate=self.
                                _global_exchange_dropout_rate,
                            )
                            exchange_layer.build(
                                GraphGlobalExchangeInput(
                                    node_embeddings=tf.TensorShape(
                                        (None, self._hidden_dim)),
                                    node_to_graph_map=tf.TensorShape((None, )),
                                    num_graphs=tf.TensorShape(()),
                                ))
                            self._global_exchange_layers[str(
                                layer_idx)] = exchange_layer

        super().build(tensor_shapes)

        # The following is needed to work around a limitation in the @tf.function annotation.
        # (See https://github.com/tensorflow/tensorflow/issues/32457 for a related issue,
        #  though there are many more).
        # Our aim is to trace the `call` function once and for all. However, as the first
        # dimension of node features and adjacency lists keeps changing between batches (with
        # the number of nodes/edges in the batch), generalisation doesn't work automatically.
        # Instead, we have to specify the input spec explicitly; but as this depends on a
        # build-time constant (the number of edges), we cannot do that by just using @tf.function.
        # Instead, we construct the TensorSpec explicitly, and then use setattr to wrap
        # our function using tf.function.
        #
        # Finally, the `return_all_representations` option changes the shape of the return values,
        # but a tf.function-traced function must return the same shape on all code paths. To
        # handle this, we let the core function _always_ return all representations (and trace
        # that for performance reasons), and then use a thin wrapper `call` function to drop
        # the unneeded return value if needed.
        internal_call_input_spec = (GNNInput(
            node_features=tf.TensorSpec(shape=variable_node_features_shape,
                                        dtype=tf.float32),
            adjacency_lists=tuple(
                tf.TensorSpec(shape=(None, 2), dtype=tf.int32)
                for _ in range(len(adjacency_list_shapes))),
            node_to_graph_map=tf.TensorSpec(shape=(None, ), dtype=tf.int32),
            num_graphs=tf.TensorSpec(shape=(), dtype=tf.int32),
        ), tf.TensorSpec(shape=(), dtype=tf.bool))
        setattr(
            self, "_internal_call",
            tf.function(func=self._internal_call,
                        input_signature=internal_call_input_spec))
Beispiel #27
0
import researchflow as rf
import tensorflow as tf
import tensorflow.keras.layers as tfkl

from models import infomax as InfoMax

input_signature = (tf.TensorSpec(shape=[None, None], dtype=tf.float32), )


def shifted_tanh(x):
    return tf.math.sigmoid(2.0 * x)


@rf.export
class InfoMaxGAN(InfoMax):
    """Jensen-Shannon-divergence-based MI maximizer."""
    @staticmethod
    def hparams(hp):
        InfoMax.hparams(hp)
        hp.Fixed("disc_hidden_size", 1024)
        hp.Fixed("disc_num_hidden", 2)
        hp.Choice("disc_activation", ["tanh", "relu"], default="tanh")
        hp.Boolean("use_nlog", default=False)
        hp.Boolean("disc_iter", default=False)
        hp.Float("disc_lr", 1e-3, 0.5, default=0.1, sampling="log")
        hp.Fixed("disc_clipnorm", 10.0)

    def __init__(self, **kw):
        super().__init__(**kw)

        # Discriminator neural network.
Beispiel #28
0
    def test_conditional_mi_estimator(self,
                                      estimator='ML',
                                      switch_xy=False,
                                      use_default_model=True,
                                      eps=0.02,
                                      dim=2):
        """Estimate the conditional mutual information MI(X;Y|Z)

        X, Y and Z are generated by the following procedure:
            Z ~ N(0, 1)
            X|z ~ N(z, 1)
            if z >= 0:
                Y|x,z ~ N(z + xz, e^2)
            else:
                Y|x,z ~ N(0, 1)
        When z>0,
            [X, Y] ~ N([z, z+z^2], [[1, z], [z, e^2+z^2]])
            MI(X;Y|z) = 0.5 * log(1+z^2/e^2)
        """
        tf.random.set_seed(123)
        x_spec = [
            tf.TensorSpec(shape=(dim, ), dtype=tf.float32),
            tf.TensorSpec(shape=(dim, ), dtype=tf.float32)
        ]
        y_spec = tf.TensorSpec(shape=(dim, ), dtype=tf.float32)
        if use_default_model:
            model = None
        elif estimator == 'ML':
            model = NetML(x_spec)
        else:
            model = NetJSD([x_spec, y_spec])
        mi_estimator = MIEstimator(
            x_spec=x_spec,
            y_spec=y_spec,
            fc_layers=(256, 256),
            model=model,
            estimator_type=estimator,
            optimizer=tf.optimizers.Adam(learning_rate=2e-4))

        z = tf.random.normal((10000, ))
        e = 0.5
        mi = 0.25 * dim * tf.reduce_mean(tf.math.log(1 + (z / e)**2))

        def _get_batch(batch_size, z=None):
            if z is None:
                z = tf.random.normal(shape=(batch_size, dim))
            x_dist = tfp.distributions.Normal(loc=z, scale=tf.ones_like(z))
            mask = tf.cast(z > 0, tf.float32)
            y_dist = tfp.distributions.Normal(
                loc=(z + z * z) * mask,
                scale=1 - mask + mask * tf.sqrt(e * e + z * z))
            x = x_dist.sample()
            y = (z + x * z) * mask + (1 - mask + e * mask) * tf.random.normal(
                shape=(batch_size, dim))
            if not switch_xy:
                X = [z, x]
                Y = y
                Y_dist = y_dist
            else:
                X = [z, y]
                Y = x
                Y_dist = x_dist
            return dict(x=x, y=y, z=z, X=X, Y=Y, Y_dist=Y_dist)

        def _estimate_mi(i, batch):
            estimated_pmi = mi_estimator.calc_pmi(batch['X'], batch['Y'],
                                                  batch['Y_dist'])
            batch_size = estimated_pmi.shape[0]
            x, y, z = batch['x'], batch['y'], batch['z']
            pmi = 0.5 * (tf.square(y - z - z * z) /
                         (e * e + z * z) - tf.square(y - z - x * z) /
                         (e * e) + tf.math.log(1 + (z / e)**2))
            pmi = pmi * tf.cast(z > 0, tf.float32)
            pmi = tf.reduce_sum(pmi, axis=-1)
            pmi_rmse = tf.sqrt(tf.reduce_mean(tf.square(pmi - estimated_pmi)))
            estimated_mi, var = tf.nn.moments(estimated_pmi, axes=(0, ))
            estimated_mi = float(estimated_mi)
            logging.info("%s estimated_mi=%s std=%s pmi_rmse=%s" % (
                i, estimated_mi, math.sqrt(var / batch_size), float(pmi_rmse)))
            return estimated_mi

        batch_size = 512

        info = "mi=%s estimator=%s use_default_model=%s switch_xy=%s dim=%s" % (
            float(mi), estimator, use_default_model, switch_xy, dim)

        @tf.function
        def _train():
            batch = _get_batch(batch_size)
            with tf.GradientTape() as tape:
                alg_step = mi_estimator.train_step(
                    (batch['X'], batch['Y']), y_distribution=batch['Y_dist'])
            mi_estimator.train_complete(tape, alg_step.info)
            return alg_step

        for i in range(20000):
            _train()
            if i % 1000 == 0:
                batch = _get_batch(batch_size)
                _estimate_mi(i, batch)

        batch_size = 16384
        batch = _get_batch(batch_size)
        estimated_mi = _estimate_mi(info, batch)
        self.assertAlmostEqual(estimated_mi, mi, delta=eps)

        # Set detail_reault=True to show the conditional mutual information for
        # different values of z
        detail_result = False
        if detail_result:
            for z in tf.range(-2., 2.001, 0.125):
                batch = _get_batch(batch_size, z * tf.ones((batch_size, dim)))
                info = "z={z} mi={mi}".format(
                    z=float(z),
                    mi=float(
                        0.5 * tf.math.log(1 + tf.square(tf.nn.relu(z / e)))))
                _estimate_mi(info, batch)

        return mi, estimated_mi
Beispiel #29
0
        for y in range(1, size_y):
            if seq1[x - 1] == seq2[y - 1]:
                matrix[x, y] = min(matrix[x - 1, y] + 1, matrix[x - 1, y - 1],
                                   matrix[x, y - 1] + 1)
            else:
                matrix[x,
                       y] = min(matrix[x - 1, y] + 1, matrix[x - 1, y - 1] + 1,
                                matrix[x, y - 1] + 1)
    return matrix[size_x - 1, size_y - 1]


np.set_printoptions(threshold=np.inf)

coco_batch_spec = [{
    'image_indicators':
    tf.TensorSpec(shape=[None, None], dtype=tf.float32),
    'image_path':
    tf.TensorSpec(shape=[None], dtype=tf.string),
    'tags':
    tf.TensorSpec(shape=[None, None], dtype=tf.int32),
    'words':
    tf.TensorSpec(shape=[None, None], dtype=tf.int32),
    'token_indicators':
    tf.TensorSpec(shape=[None, None], dtype=tf.float32),
    'global_features':
    tf.TensorSpec(shape=[None, None], dtype=tf.float32),
    'scores':
    tf.TensorSpec(shape=[None, None], dtype=tf.float32),
    'boxes':
    tf.TensorSpec(shape=[None, None, None], dtype=tf.float32),
    'labels':
Beispiel #30
0
 def in_shape_to_tensorspec(in_shape: str) -> tf.TensorSpec:
     in_spec = masking.parse_spec(in_shape)
     return tf.TensorSpec(tuple(
         int(dim_spec) if dim_spec.is_constant else None
         for dim_spec in in_spec),
                          dtype=tf.float32)