Esempio n. 1
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 rate=0.1,
                 return_sequences=True,
                 output_size=0,
                 config_loss=None,
                 run_mode=C.RUN_STATIC,
                 scale=False,
                 pos_encoding_len=200,
                 autoregressive=False,
                 **kwargs):
        super(TransformerEmbedding, self).__init__(config_loss=config_loss,
                                                   run_mode=run_mode,
                                                   **kwargs)
        self.return_sequences = return_sequences
        self.autoregressive = autoregressive
        self.d_model = d_model
        self.pos_encoding_len = pos_encoding_len
        self.scale = scale
        self.pen_threshold = 0.5
        self.output_size = output_size

        self.init_embedding = tf.keras.layers.Dense(units=d_model,
                                                    name="stroke")
        self.encoder = TransformerEncoder(num_layers, d_model, num_heads, dff,
                                          rate)

        self.pos_encoding = None
        if pos_encoding_len > 0:
            self.pos_encoding = positional_encoding(pos_encoding_len, d_model)

        # Deterministic or stochastic outputs.
        self.output_layer = None
        if self.output_size > 0:
            if config_loss is not None:
                if config_loss["loss_type"] == C.NLL_NORMAL:
                    self.output_layer = OutputModelNormal(self.output_size,
                                                          logvar=True)
                elif config_loss["loss_type"] == C.NLL_BINORMAL:
                    self.output_layer = OutputModelNormal2DDense(
                        sigma_activation=tf.keras.activations.exponential)
                elif config_loss["loss_type"] == C.NLL_GMM:
                    self.output_layer = OutputModelGMMDense(
                        out_units=self.output_size,
                        num_components=config_loss["num_components"],
                        sigma_activation=tf.keras.activations.exponential)
                else:
                    self.output_layer = OutputModelDeterministic(
                        self.output_size, 0, 0)
            else:
                self.output_layer = OutputModelDeterministic(
                    self.output_size, 0, 0)
Esempio n. 2
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 rate=0.1,
                 output_size=0,
                 config_loss=None,
                 run_mode=C.RUN_ESTIMATOR,
                 return_sequence=True,
                 scale=False,
                 pos_encoding_len=200,
                 **kwargs):
        super(TransformerAR, self).__init__(config_loss=config_loss,
                                            run_mode=run_mode,
                                            **kwargs)
        self.pos_encoding_len = pos_encoding_len
        self.scale = scale
        self.return_sequence = return_sequence
        self.d_model = d_model
        self.output_size = output_size
        self.decoder = TransformerDecoder(num_layers, d_model, num_heads, dff,
                                          rate)

        self.pos_encoding = None
        if pos_encoding_len > 0:
            self.pos_encoding = positional_encoding(pos_encoding_len, d_model)

        self.input_layer = OutputModelDeterministic(d_model, 0, 0)

        # Deterministic or stochastic outputs.
        self.output_layer = None
        if self.output_size > 0:
            if config_loss is not None:
                if config_loss["loss_type"] == C.NLL_NORMAL:
                    self.output_layer = OutputModelNormal(self.output_size,
                                                          logvar=True)
                elif config_loss["loss_type"] == C.NLL_BINORMAL:
                    self.output_layer = OutputModelNormal2DDense(
                        sigma_activation=tf.keras.activations.exponential)
                elif config_loss["loss_type"] == C.NLL_GMM:
                    self.output_layer = OutputModelGMMDense(
                        out_units=self.output_size,
                        num_components=config_loss["num_components"],
                        sigma_activation=tf.keras.activations.exponential)
                elif config_loss["loss_type"] == C.KLD:
                    self.output_layer = OutputModelNormal(self.output_size,
                                                          logvar=True)
                else:
                    self.output_layer = OutputModelDeterministic(
                        self.output_size, 0, 0)
            else:
                self.output_layer = OutputModelDeterministic(
                    self.output_size, 0, 0)
Esempio n. 3
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 rate=0.1,
                 config_loss=None,
                 output_model=C.OUT_DETERMINISTIC,
                 run_mode=C.RUN_ESTIMATOR,
                 scale=False,
                 pos_encoding_len=0,
                 **kwargs):
        super(TransformerSeq2Seq, self).__init__(config_loss=config_loss,
                                                 run_mode=run_mode,
                                                 **kwargs)

        self.output_model = output_model
        self.pos_encoding_len = pos_encoding_len
        self.scale = scale
        self.encoder = TransformerEncoder(num_layers, d_model, num_heads, dff,
                                          rate)
        self.decoder = TransformerDecoder(num_layers, d_model, num_heads, dff,
                                          rate)

        self.pos_encoding = None
        if pos_encoding_len > 0:
            self.pos_encoding = positional_encoding(pos_encoding_len, d_model)

        if self.output_model == C.OUT_NORMAL:
            self.output_layer = OutputModelNormal(d_model, 0, 0, logvar=True)
        else:
            self.output_layer = OutputModelDeterministic(d_model, 0, 0)
Esempio n. 4
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 rate=0.1,
                 config_loss=None,
                 output_model=C.OUT_DETERMINISTIC,
                 run_mode=C.RUN_ESTIMATOR,
                 scale=False,
                 pos_encoding_len=None,
                 **kwargs):

        super(TransformerPredictive, self).__init__(config_loss=config_loss,
                                                    run_mode=run_mode,
                                                    **kwargs)
        self.output_model = output_model
        self.pos_encoding_len = pos_encoding_len
        self.scale = scale
        self.seq_len = pos_encoding_len
        self.pen_threshold = 0.5
        self.decoder_embedding_units = 16

        self.embedding_size = d_model
        self.encoder_embedding = OutputModelDeterministic(d_model, 0, 0)
        self.decoder_embedding = OutputModelDeterministic(
            self.seq_len * self.decoder_embedding_units, 0, 0)
        self.decoder = TransformerDecoder(num_layers, d_model, num_heads, dff,
                                          rate)

        self.pos_encoding = None
        if pos_encoding_len > 0:
            self.pos_encoding = positional_encoding(pos_encoding_len, d_model)

        self.decoder_out_pen = tf.keras.layers.Dense(1,
                                                     activation=None,
                                                     name="out_pen")

        # Build output model depending on the loss type.
        if self.config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
            self.decoder_out_stroke = OutputModelNormal(out_units=2,
                                                        hidden_units=0,
                                                        hidden_layers=0)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
            self.decoder_out_stroke = OutputModelNormal2DDense(
                sigma_activation=tf.keras.activations.exponential)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_GMM:
            self.decoder_out_stroke = OutputModelGMMDense(
                out_units=2,
                num_components=self.config_loss["stroke"]["num_components"],
                sigma_activation=tf.keras.activations.exponential)
        else:
            self.decoder_out_stroke = OutputModelDeterministic(out_units=2,
                                                               hidden_units=0,
                                                               hidden_layers=0)
Esempio n. 5
0
    def __init__(self,
                 config_rnn,
                 config_loss=None,
                 name="ink_rnn",
                 run_mode=C.RUN_STATIC,
                 **kwargs):
        """Constructor.

    Args:
      cell_type (str): 'lstm' or 'gru'.
      cell_units: number of cell units.
      cell_layers: number of encoder/decoder rnn cells.
      bidirectional:
      return_state:
      return_sequences:
      output_size: encoder/decoder rnn cell/output size.
      config_loss: loss configuration.
      name:
      run_mode: eager, static or estimator.
      **kwargs:
    """
        super(InkRNN, self).__init__(config_loss=config_loss,
                                     run_mode=run_mode,
                                     **kwargs)

        self.config_rnn = config_rnn
        self.cell_units = self.config_rnn["cell_units"]
        self.cell_layers = self.config_rnn["cell_layers"]
        self.cell_type = self.config_rnn["cell_type"]
        self.config_loss = config_loss
        self.pen_threshold = 0.5

        self._rnn_layer = RNNUtils.get_rnn_layer(self.cell_type,
                                                 self.cell_units,
                                                 return_state=True,
                                                 return_sequences=True,
                                                 stateful=False,
                                                 name=name)

        self._rnn_layer2 = None
        if self.cell_layers > 1:
            self._rnn_layer2 = RNNUtils.get_rnn_layer(self.cell_type,
                                                      self.cell_units,
                                                      return_state=True,
                                                      return_sequences=True,
                                                      stateful=False,
                                                      name=name)

        self._rnn_layer3 = None
        if self.cell_layers > 2:
            self._rnn_layer3 = RNNUtils.get_rnn_layer(self.cell_type,
                                                      self.cell_units,
                                                      return_state=True,
                                                      return_sequences=True,
                                                      stateful=False,
                                                      name=name)

        # Pen, stroke and end-of-sequence outputs.
        self.out_eos = True
        if "eos" in config_loss:
            self.out_eos = tf.keras.layers.Dense(
                1,
                name="out_eos",
                kernel_regularizer=self.kernel_regularizer,
                bias_regularizer=self.kernel_regularizer)

        self.out_pen = tf.keras.layers.Dense(1, name="out_pen")

        # Build output model depending on the loss type.
        if self.config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
            self.out_stroke = OutputModelNormal(out_units=2,
                                                hidden_units=0,
                                                hidden_layers=0)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
            self.out_stroke = OutputModelNormal2DDense(
                sigma_activation=tf.keras.activations.exponential)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_GMM:
            self.out_stroke = OutputModelGMMDense(
                out_units=2,
                num_components=self.config_loss["stroke"]["num_components"],
                sigma_activation=tf.keras.activations.exponential)
        else:
            self.out_stroke = OutputModelDeterministic(out_units=2,
                                                       hidden_units=0,
                                                       hidden_layers=0)
Esempio n. 6
0
class InkRNN(BaseModel):
    def __init__(self,
                 config_rnn,
                 config_loss=None,
                 name="ink_rnn",
                 run_mode=C.RUN_STATIC,
                 **kwargs):
        """Constructor.

    Args:
      cell_type (str): 'lstm' or 'gru'.
      cell_units: number of cell units.
      cell_layers: number of encoder/decoder rnn cells.
      bidirectional:
      return_state:
      return_sequences:
      output_size: encoder/decoder rnn cell/output size.
      config_loss: loss configuration.
      name:
      run_mode: eager, static or estimator.
      **kwargs:
    """
        super(InkRNN, self).__init__(config_loss=config_loss,
                                     run_mode=run_mode,
                                     **kwargs)

        self.config_rnn = config_rnn
        self.cell_units = self.config_rnn["cell_units"]
        self.cell_layers = self.config_rnn["cell_layers"]
        self.cell_type = self.config_rnn["cell_type"]
        self.config_loss = config_loss
        self.pen_threshold = 0.5

        self._rnn_layer = RNNUtils.get_rnn_layer(self.cell_type,
                                                 self.cell_units,
                                                 return_state=True,
                                                 return_sequences=True,
                                                 stateful=False,
                                                 name=name)

        self._rnn_layer2 = None
        if self.cell_layers > 1:
            self._rnn_layer2 = RNNUtils.get_rnn_layer(self.cell_type,
                                                      self.cell_units,
                                                      return_state=True,
                                                      return_sequences=True,
                                                      stateful=False,
                                                      name=name)

        self._rnn_layer3 = None
        if self.cell_layers > 2:
            self._rnn_layer3 = RNNUtils.get_rnn_layer(self.cell_type,
                                                      self.cell_units,
                                                      return_state=True,
                                                      return_sequences=True,
                                                      stateful=False,
                                                      name=name)

        # Pen, stroke and end-of-sequence outputs.
        self.out_eos = True
        if "eos" in config_loss:
            self.out_eos = tf.keras.layers.Dense(
                1,
                name="out_eos",
                kernel_regularizer=self.kernel_regularizer,
                bias_regularizer=self.kernel_regularizer)

        self.out_pen = tf.keras.layers.Dense(1, name="out_pen")

        # Build output model depending on the loss type.
        if self.config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
            self.out_stroke = OutputModelNormal(out_units=2,
                                                hidden_units=0,
                                                hidden_layers=0)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
            self.out_stroke = OutputModelNormal2DDense(
                sigma_activation=tf.keras.activations.exponential)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_GMM:
            self.out_stroke = OutputModelGMMDense(
                out_units=2,
                num_components=self.config_loss["stroke"]["num_components"],
                sigma_activation=tf.keras.activations.exponential)
        else:
            self.out_stroke = OutputModelDeterministic(out_units=2,
                                                       hidden_units=0,
                                                       hidden_layers=0)

    def call(self, inputs, training=None, **kwargs):
        """Call method."""
        out_dict = dict()
        final_inputs = []
        shifted_inputs = tf.concat([
            tf.zeros_like(inputs[C.INP_ENC][:, 0:1]), inputs[C.INP_ENC][:, :-1]
        ],
                                   axis=1)
        shifted_len = tf.minimum(tf.reduce_max(inputs[C.INP_SEQ_LEN]),
                                 inputs[C.INP_SEQ_LEN] + 1)
        state = inputs.get("rnn_state", None)
        mask = tf.sequence_mask(shifted_len)
        rnn_out, state_h, state_c = self._rnn_layer(shifted_inputs,
                                                    mask=mask,
                                                    training=training,
                                                    initial_state=state)

        out_dict["rnn_state"] = [state_h, state_c]
        final_inputs.append(rnn_out)

        if self._rnn_layer2 is not None:
            state2 = inputs.get("rnn_state2", None)
            rnn_inp = tf.concat([rnn_out, inputs[C.INP_ENC]], axis=-1)
            rnn_out, state_h2, state_c2 = self._rnn_layer2(
                rnn_inp, mask=mask, training=training, initial_state=state2)
            out_dict["rnn_state2"] = [state_h2, state_c2]
            final_inputs.append(rnn_out)

        if self._rnn_layer3 is not None:
            state3 = inputs.get("rnn_state3", None)
            rnn_inp = tf.concat([rnn_out, inputs[C.INP_ENC]], axis=-1)
            rnn_out, state_h3, state_c3 = self._rnn_layer3(
                rnn_inp, mask=mask, training=training, initial_state=state3)
            out_dict["rnn_state3"] = [state_h3, state_c3]
            final_inputs.append(rnn_out)

        rnn_out = tf.concat(final_inputs, axis=-1)
        pen_logits = self.out_pen(rnn_out)
        stroke_logits = self.out_stroke(rnn_out)

        # Calculate pen-up probability from the logits.
        pen_prob = tf.nn.sigmoid(pen_logits)
        pen_binary = tf.compat.v1.where(
            tf.greater(pen_prob,
                       tf.fill(tf.shape(input=pen_prob), self.pen_threshold)),
            tf.fill(tf.shape(input=pen_prob), 1.0),
            tf.fill(tf.shape(input=pen_prob), 0.0))

        stroke_sample = self.out_stroke.draw_sample(stroke_logits, greedy=True)

        out_dict["stroke"] = stroke_sample
        out_dict["stroke_logits"] = stroke_logits
        out_dict["pen_logits"] = pen_logits
        out_dict["pen_prob"] = pen_prob
        out_dict["pen"] = pen_binary
        return out_dict

    @classmethod
    def get_model_tags(cls, config, config_loss=None):
        """Generates a string summarizing experiment parameters.

    Args:
      config:
      config_loss

    Returns:
    """
        if config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
            output = "normal"
        elif config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
            output = "binormal"
        elif config_loss["stroke"]["loss_type"] == C.NLL_GMM:
            output = "gmm"
        else:
            output = config_loss["stroke"]["loss_type"]

        rnn = "{}_{}x{}".format(config.cell_type, config.cell_layers,
                                config.cell_units)
        return dict(encoder=rnn,
                    latent="",
                    decoder="",
                    output=output,
                    model_name="InkRNN")
Esempio n. 7
0
class TransformerPredictive(BaseModel):
    """Decoder-only transformer model with an stroke model on strokes..

  The inputs are first projected into an stroke space. It operates on
  the entire sequence to predict the next step. Look-ahead masks are used
  to ensure that no future information is leaked.
  The encoder creates a context by using the steps until the current step.
  Similarly, the decoder uses the input steps until the current step to
  generate a query.
  """
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 rate=0.1,
                 config_loss=None,
                 output_model=C.OUT_DETERMINISTIC,
                 run_mode=C.RUN_ESTIMATOR,
                 scale=False,
                 pos_encoding_len=None,
                 **kwargs):

        super(TransformerPredictive, self).__init__(config_loss=config_loss,
                                                    run_mode=run_mode,
                                                    **kwargs)
        self.output_model = output_model
        self.pos_encoding_len = pos_encoding_len
        self.scale = scale
        self.seq_len = pos_encoding_len
        self.pen_threshold = 0.5
        self.decoder_embedding_units = 16

        self.embedding_size = d_model
        self.encoder_embedding = OutputModelDeterministic(d_model, 0, 0)
        self.decoder_embedding = OutputModelDeterministic(
            self.seq_len * self.decoder_embedding_units, 0, 0)
        self.decoder = TransformerDecoder(num_layers, d_model, num_heads, dff,
                                          rate)

        self.pos_encoding = None
        if pos_encoding_len > 0:
            self.pos_encoding = positional_encoding(pos_encoding_len, d_model)

        self.decoder_out_pen = tf.keras.layers.Dense(1,
                                                     activation=None,
                                                     name="out_pen")

        # Build output model depending on the loss type.
        if self.config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
            self.decoder_out_stroke = OutputModelNormal(out_units=2,
                                                        hidden_units=0,
                                                        hidden_layers=0)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
            self.decoder_out_stroke = OutputModelNormal2DDense(
                sigma_activation=tf.keras.activations.exponential)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_GMM:
            self.decoder_out_stroke = OutputModelGMMDense(
                out_units=2,
                num_components=self.config_loss["stroke"]["num_components"],
                sigma_activation=tf.keras.activations.exponential)
        else:
            self.decoder_out_stroke = OutputModelDeterministic(out_units=2,
                                                               hidden_units=0,
                                                               hidden_layers=0)

    def call(self, inputs, seq_len=None, training=None, **kwargs):
        inp = inputs[C.INP_ENC]
        # tar = inputs[C.INP_ENC]
        # seq_len = inputs[C.INP_SEQ_LEN]
        input_num_strokes = inputs[C.INP_NUM_STROKE]

        flat_strokes = tf.reshape(inp,
                                  (tf.shape(input=inp)[0], self.seq_len * 3))
        embedding = self.encoder_embedding(flat_strokes)

        diagram_embedding = self.batch_stroke_to_diagram(
            embedding, input_num_strokes)

        inp = diagram_embedding["mu"]
        _, look_ahead_mask, _ = create_masks(inp[:, :, 0], inp[:, :, 0],
                                             input_num_strokes)
        dec_padding_mask = look_ahead_mask

        inp = tf.concat([tf.zeros_like(inp[:, 0:1]), inp[:, 0:-1]], axis=1)
        # dec_output.shape == (batch_size, tar_seq_len, d_model)
        dec_output, _ = self.decoder(inp,
                                     inp,
                                     training,
                                     look_ahead_mask,
                                     dec_padding_mask,
                                     pos_encoding=self.pos_encoding,
                                     scale=self.scale)

        # dec_output = tf.reshape(dec_output, (-1, self.seq_len,))
        dec_output = self.decoder_embedding(dec_output)["mu"]
        dec_output = tf.reshape(
            dec_output, (-1, self.seq_len, self.decoder_embedding_units))

        stroke_logits = self.decoder_out_stroke(dec_output)
        pen_logits = self.decoder_out_pen(dec_output)

        # Calculate pen-up probability from the logits.
        pen_prob = tf.nn.sigmoid(pen_logits)
        pen_binary = tf.compat.v1.where(
            tf.greater(pen_prob,
                       tf.fill(tf.shape(input=pen_prob), self.pen_threshold)),
            tf.fill(tf.shape(input=pen_prob), 1.0),
            tf.fill(tf.shape(input=pen_prob), 0.0))
        stroke_sample = self.decoder_out_stroke.draw_sample(stroke_logits,
                                                            greedy=True)
        return dict(stroke=stroke_sample,
                    stroke_logits=stroke_logits,
                    pen_logits=pen_logits,
                    pen_prob=pen_prob,
                    pen=pen_binary)

    @classmethod
    def get_experiment_name(cls, config):
        """Generates a string summarizing experiment parameters.

    Args:
      config:

    Returns:
    """
        template = "{tag}EMB_{model}-{experiment}-{data}"

        data = config.data.data_name

        model = "TR_{}_{}x{}-head_{}-drop_{}".format(config.model.latent_units,
                                                     config.model.layers,
                                                     config.model.hidden_units,
                                                     config.model.heads,
                                                     config.model.dropout_rate)

        experiment = "B{}_LR{}".format(config.data.batch_size,
                                       config.experiment.learning_rate)

        return template.format(
            tag=config.experiment.tag + "_" if config.experiment.tag else "",
            experiment=experiment,
            model=model,
            data=data,
        )

    def batch_stroke_to_diagram(self, stroke_embedding, num_strokes):
        """Reshapes embeddings from batch of strokes to batch of diagrams.

    Args:
      stroke_embedding: Tensor of [num_diagrams x num_strokes, embedding_size]
        or dictionary of tensors with the same shape.
      num_strokes: [num_diagrams]

    Returns:
      Diagram as sequence of stroke embeddings [num_diagrams, num_strokes,
      embedding_size].
    """
        padded_num_strokes = tf.reduce_max(input_tensor=num_strokes)
        num_diagrams = tf.shape(input=num_strokes)[0]

        if isinstance(stroke_embedding, tf.Tensor):
            return tf.reshape(
                stroke_embedding,
                [num_diagrams, padded_num_strokes, self.embedding_size])
        else:
            out_dict = dict()
            for key_, value_ in stroke_embedding.items():
                out_dict[key_] = tf.reshape(
                    value_,
                    [num_diagrams, padded_num_strokes, self.embedding_size])
            return out_dict

    def batch_diagram_to_stroke(self, diagram_embedding):
        """Reshapes embeddings from batch of diagrams to batch of strokes.

    Args:
      diagram_embedding: Tensor or dictionary of tensors with shape
        [num_diagrams, num_strokes, embedding_size]

    Returns:
      Batch of embeddings [num_diagrams x num_strokes, 1, embedding_size].
    """
        if isinstance(diagram_embedding, tf.Tensor):
            return tf.reshape(diagram_embedding, [-1, 1, self.embedding_size])
        else:
            out_dict = dict()
            for key_, value_ in diagram_embedding.items():
                out_dict[key_] = tf.reshape(value_,
                                            [-1, 1, self.embedding_size])
            return out_dict
Esempio n. 8
0
    def __init__(self,
                 output_size,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 rate=0.1,
                 config_loss=None,
                 run_mode=C.RUN_ESTIMATOR,
                 scale=False,
                 pos_encoding_len=0,
                 autoregressive=False,
                 pooling="last_step",
                 **kwargs):
        super(TransformerSeq2seqConditional,
              self).__init__(config_loss=config_loss,
                             run_mode=run_mode,
                             **kwargs)

        self.output_size = output_size
        self.pos_encoding_len = pos_encoding_len
        self.scale = scale
        self.autoregressive = autoregressive

        self.input_layer = DenseLayers([d_model])
        # self.condition_layer = DenseLayers([d_model, d_model])
        # self.mha = MultiHeadAttention(d_model, num_heads)
        # self.encoder = TransformerEncoder(num_layers, d_model, num_heads, dff, rate)
        self.encoder = TransformerDecoder(num_layers, d_model, num_heads, dff,
                                          rate)
        self.decoder = DenseLayers([512, 256],
                                   output_activation=tf.keras.activations.relu)

        self.pos_encoding = None
        if pos_encoding_len > 0:
            self.pos_encoding = positional_encoding(pos_encoding_len, d_model)

        if pooling == "last_step":
            self.pooling_layer = self.pool_last_step
        elif pooling == "mean":
            self.pooling_layer = self.pool_mean
        else:
            self.pooling_layer = None

        # Deterministic or stochastic outputs.
        self.output_layer = None
        if self.output_size > 0:
            if config_loss is not None:
                if config_loss["loss_type"] == C.NLL_NORMAL:
                    self.output_layer = OutputModelNormal(self.output_size,
                                                          logvar=True)
                elif config_loss["loss_type"] == C.NLL_BINORMAL:
                    self.output_layer = OutputModelNormal2DDense(
                        sigma_activation=tf.keras.activations.exponential)
                elif config_loss["loss_type"] == C.NLL_GMM:
                    self.output_layer = OutputModelGMMDense(
                        out_units=self.output_size,
                        num_components=config_loss["num_components"],
                        sigma_activation=tf.keras.activations.exponential)
                elif config_loss["loss_type"] == C.KLD:
                    self.output_layer = OutputModelNormal(self.output_size,
                                                          logvar=True)
                else:
                    self.output_layer = OutputModelDeterministic(
                        self.output_size, 0, 0)
            else:
                self.output_layer = OutputModelDeterministic(
                    self.output_size, 0, 0)
Esempio n. 9
0
  def __init__(self,
               config_encoder,
               config_embedding,
               config_decoder,
               config_loss,
               run_mode=C.RUN_ESTIMATOR,
               **kwargs):
    """Constructor.

    Args:
      config_encoder:
      config_embedding:
      config_decoder:
      config_loss:
      run_mode: eager, static or estimator.
      **kwargs:

    Raises:
      ValueError: if run_mode is eager and tf.executing_eagerly() is False.
      Exception: if # layers > 1 and dynamic_h0 is True.
    """
    super(InkSeq2Seq, self).__init__(
        config_loss=config_loss, run_mode=run_mode, **kwargs)
    
    self.pen_threshold = 0.5
    self.config_encoder = config_encoder
    self.config_embedding = config_embedding
    self.config_decoder = config_decoder

    self.n_cell_units = self.config_encoder["cell_units"]
    self.n_cell_layers = self.config_encoder["cell_layers"]
    self.cell_type = self.config_encoder["cell_type"]
    self.bidirectional_encoder = self.config_encoder["bidirectional_encoder"]
    self.recurrent_dropout = self.config_encoder.get("rec_dropout_rate", 0.0)
    
    self.n_latent_units = self.config_embedding["latent_units"]
    self.use_vae = self.config_embedding["use_vae"]
    
    self.decoder_drop_rate = self.config_decoder.get("dropout_rate", 0)
    self.repeat_vae_sample = self.config_decoder.get("repeat_vae_sample", False)
    self.embedding_only = not self.config_decoder.get("autoregressive", False)
    self.dynamic_h0 = self.config_decoder.get("dynamic_h0", False)
    self.latent_prefix = ""
    
    if self.dynamic_h0 and self.n_cell_layers != 1:
      raise Exception("# rnn layers must be 1 for dynamic h0.")
    
    # RNN layer containers.
    self.encoder_rnn = list()
    self.decoder_rnn = list()
    
    # Encoder network:
    enc_rnn_units = self.n_cell_units
    if self.bidirectional_encoder:
      enc_rnn_units = enc_rnn_units//2
    for idx in range(self.n_cell_layers):
      rnn_layer = RNNUtils.get_rnn_layer(
          self.cell_type,
          enc_rnn_units,
          return_state=True,
          return_sequences=True,
          stateful=False,
          name="encoder_rnn_" + str(idx + 1),
          recurrent_dropout=self.recurrent_dropout)
      
      if self.bidirectional_encoder:
        rnn_layer = tf.keras.layers.Bidirectional(
            rnn_layer, merge_mode="concat")
      self.encoder_rnn.append(rnn_layer)
    
    # Deterministic or stochastic embedding.
    if self.use_vae:
      self.net_embedding = OutputModelNormal(
          out_units=self.n_latent_units,
          prefix=self.latent_prefix,
          sigma_activation=None,
          logvar=True)
    else:
      self.net_embedding = OutputModelDeterministic(
          out_units=self.n_latent_units,
          hidden_units=self.n_latent_units*2,
          hidden_layers=0,
          prefix=self.latent_prefix)
    
    # Decoder network:
    # Embedding (+input) -> Dense -> RNN layers -> Reconstruction.
    self.decoder_inp_dense = tf.keras.Sequential(name="decoder_inp")
    
    # RNN state is required for autoregressive prediction.
    for idx in range(self.n_cell_layers):
      rnn_layer = RNNUtils.get_rnn_layer(
          self.cell_type,
          self.n_cell_units,
          return_state=True,
          return_sequences=True,
          stateful=False,
          name="decoder_rnn_" + str(idx + 1))
      self.decoder_rnn.append(rnn_layer)
    
    # Pen and stroke outputs.
    if config_loss["pen"]["eval_only"]:
      self.decoder_out_pen = None
    else:
      self.decoder_out_pen = tf.keras.layers.Dense(1, activation=None, name="out_pen")
    
    # Build output model depending on the loss type.
    if self.config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
      self.decoder_out_stroke = OutputModelNormal(
          out_units=2, hidden_units=0, hidden_layers=0)
    elif self.config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
      self.decoder_out_stroke = OutputModelNormal2DDense(
          sigma_activation=tf.keras.activations.exponential)
    elif self.config_loss["stroke"]["loss_type"] == C.NLL_GMM:
      self.decoder_out_stroke = OutputModelGMMDense(
          out_units=2,
          num_components=self.config_loss["stroke"]["num_components"],
          sigma_activation=tf.keras.activations.exponential)
    else:
      self.decoder_out_stroke = OutputModelDeterministic(out_units=2,
                                                         hidden_units=0,
                                                         hidden_layers=0)
    self.decoder_inp_dropout = None
    if self.decoder_drop_rate > 0:
      self.decoder_inp_dropout = tf.keras.layers.Dropout(self.decoder_drop_rate)
    
    # Provides access to the sample op if repeat_vae_sample is True.
    if self.dynamic_h0:
      init_ = tf.compat.v1.random_normal_initializer(stddev=0.001)
      dense_ = tf.keras.layers.Dense(
          self.n_cell_units*2,
          activation=tf.keras.activations.tanh,
          kernel_initializer=init_)
      self.decoder_state_nn = dense_
    else:
      self.decoder_state_nn = None

    # Variables for static mode. They are assigned in call method.
    # TODO We can get rid of them if autoregressive sampling is no
    #  longer required in static (graph) mode.
    self.op_encoder_inputs = None
    self.op_decoder_inputs = None
    self.op_input_seq_len = None
    self.op_embedding = None
    # self.op_decoder_initial_state = None
    self.op_embedding_sample = None
Esempio n. 10
0
class InkSeq2Seq(BaseModel):
  """A sequence to sequence model.

  The encoder and decoder networks are created by
  stacking RNN layers. Hence, it supports fast RNN
  layer implementations backed by cuDNN.
  The cuDNN variant is much faster, but it only works
  with a GPU. A model trained on GPU can't be restored
  on CPU.
  """
  
  def __init__(self,
               config_encoder,
               config_embedding,
               config_decoder,
               config_loss,
               run_mode=C.RUN_ESTIMATOR,
               **kwargs):
    """Constructor.

    Args:
      config_encoder:
      config_embedding:
      config_decoder:
      config_loss:
      run_mode: eager, static or estimator.
      **kwargs:

    Raises:
      ValueError: if run_mode is eager and tf.executing_eagerly() is False.
      Exception: if # layers > 1 and dynamic_h0 is True.
    """
    super(InkSeq2Seq, self).__init__(
        config_loss=config_loss, run_mode=run_mode, **kwargs)
    
    self.pen_threshold = 0.5
    self.config_encoder = config_encoder
    self.config_embedding = config_embedding
    self.config_decoder = config_decoder

    self.n_cell_units = self.config_encoder["cell_units"]
    self.n_cell_layers = self.config_encoder["cell_layers"]
    self.cell_type = self.config_encoder["cell_type"]
    self.bidirectional_encoder = self.config_encoder["bidirectional_encoder"]
    self.recurrent_dropout = self.config_encoder.get("rec_dropout_rate", 0.0)
    
    self.n_latent_units = self.config_embedding["latent_units"]
    self.use_vae = self.config_embedding["use_vae"]
    
    self.decoder_drop_rate = self.config_decoder.get("dropout_rate", 0)
    self.repeat_vae_sample = self.config_decoder.get("repeat_vae_sample", False)
    self.embedding_only = not self.config_decoder.get("autoregressive", False)
    self.dynamic_h0 = self.config_decoder.get("dynamic_h0", False)
    self.latent_prefix = ""
    
    if self.dynamic_h0 and self.n_cell_layers != 1:
      raise Exception("# rnn layers must be 1 for dynamic h0.")
    
    # RNN layer containers.
    self.encoder_rnn = list()
    self.decoder_rnn = list()
    
    # Encoder network:
    enc_rnn_units = self.n_cell_units
    if self.bidirectional_encoder:
      enc_rnn_units = enc_rnn_units//2
    for idx in range(self.n_cell_layers):
      rnn_layer = RNNUtils.get_rnn_layer(
          self.cell_type,
          enc_rnn_units,
          return_state=True,
          return_sequences=True,
          stateful=False,
          name="encoder_rnn_" + str(idx + 1),
          recurrent_dropout=self.recurrent_dropout)
      
      if self.bidirectional_encoder:
        rnn_layer = tf.keras.layers.Bidirectional(
            rnn_layer, merge_mode="concat")
      self.encoder_rnn.append(rnn_layer)
    
    # Deterministic or stochastic embedding.
    if self.use_vae:
      self.net_embedding = OutputModelNormal(
          out_units=self.n_latent_units,
          prefix=self.latent_prefix,
          sigma_activation=None,
          logvar=True)
    else:
      self.net_embedding = OutputModelDeterministic(
          out_units=self.n_latent_units,
          hidden_units=self.n_latent_units*2,
          hidden_layers=0,
          prefix=self.latent_prefix)
    
    # Decoder network:
    # Embedding (+input) -> Dense -> RNN layers -> Reconstruction.
    self.decoder_inp_dense = tf.keras.Sequential(name="decoder_inp")
    
    # RNN state is required for autoregressive prediction.
    for idx in range(self.n_cell_layers):
      rnn_layer = RNNUtils.get_rnn_layer(
          self.cell_type,
          self.n_cell_units,
          return_state=True,
          return_sequences=True,
          stateful=False,
          name="decoder_rnn_" + str(idx + 1))
      self.decoder_rnn.append(rnn_layer)
    
    # Pen and stroke outputs.
    if config_loss["pen"]["eval_only"]:
      self.decoder_out_pen = None
    else:
      self.decoder_out_pen = tf.keras.layers.Dense(1, activation=None, name="out_pen")
    
    # Build output model depending on the loss type.
    if self.config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
      self.decoder_out_stroke = OutputModelNormal(
          out_units=2, hidden_units=0, hidden_layers=0)
    elif self.config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
      self.decoder_out_stroke = OutputModelNormal2DDense(
          sigma_activation=tf.keras.activations.exponential)
    elif self.config_loss["stroke"]["loss_type"] == C.NLL_GMM:
      self.decoder_out_stroke = OutputModelGMMDense(
          out_units=2,
          num_components=self.config_loss["stroke"]["num_components"],
          sigma_activation=tf.keras.activations.exponential)
    else:
      self.decoder_out_stroke = OutputModelDeterministic(out_units=2,
                                                         hidden_units=0,
                                                         hidden_layers=0)
    self.decoder_inp_dropout = None
    if self.decoder_drop_rate > 0:
      self.decoder_inp_dropout = tf.keras.layers.Dropout(self.decoder_drop_rate)
    
    # Provides access to the sample op if repeat_vae_sample is True.
    if self.dynamic_h0:
      init_ = tf.compat.v1.random_normal_initializer(stddev=0.001)
      dense_ = tf.keras.layers.Dense(
          self.n_cell_units*2,
          activation=tf.keras.activations.tanh,
          kernel_initializer=init_)
      self.decoder_state_nn = dense_
    else:
      self.decoder_state_nn = None

    # Variables for static mode. They are assigned in call method.
    # TODO We can get rid of them if autoregressive sampling is no
    #  longer required in static (graph) mode.
    self.op_encoder_inputs = None
    self.op_decoder_inputs = None
    self.op_input_seq_len = None
    self.op_embedding = None
    # self.op_decoder_initial_state = None
    self.op_embedding_sample = None
  
  def call(self, inputs, output_len=None, training=None, **kwargs):
    """Encoder and decoder functionality.

    Given an input sequence, calculates the embedding and reconstructs the
    sequence or makes a prediction autoregressively.

    If decoder_inputs is passed, then the decoder is fed with the input
    embedding and the corresponding decoder_inputs step.

    If the decoder_inputs is None and output_len passed, then the decoder
    is fed with its own predictions at the next step.

    The length of the output sequence is determined by either the length
    of the decoder_inputs or the output_len.
    Args:
      inputs (dict): expected to contain inputs for the encoder and decoder,
        and seq len ops.
      output_len (int): length of output sequence. If None, it is determined
        from the decoder input sequence.
      training: whether in training mode or not.
      **kwargs:

    Returns:
      [batch_size, seq_len, feature_size]
    """
    self.op_encoder_inputs = inputs[C.INP_ENC]
    self.op_decoder_inputs = inputs[C.INP_DEC] if output_len is None else None
    self.op_input_seq_len = inputs[C.INP_SEQ_LEN]
    
    assert not (self.op_decoder_inputs is None and
                output_len is None), "Output length is undetermined."
    
    self.op_embedding = self.call_encode(self.op_encoder_inputs,
                                         self.op_input_seq_len, training)
    
    # We need the embedding distribution in case of VAE. Hence, not passing an
    # embedding sample, but the embedding predictions.
    out_dict = self.call_decode(self.op_embedding,
                                self.op_decoder_inputs,
                                output_len,
                                None,
                                training)
    
    out_dict["embedding"] = self.op_embedding
    out_dict["embedding_sample"] = self.op_embedding_sample
    return out_dict
  
  def call_encode(self, inputs, input_seq_len, training):
    """Calculates the stroke embedding.

    Args:
      inputs:
      input_seq_len:
      training:

    Returns:
      embedding of size [batch_size, 1, latent_size]
    """
    rnn_layer = self.encoder_rnn[0]
    # non_zero_seq_len = tf.where(input_seq_len == 0, 1, input_seq_len)
    non_zero_seq_len = tf.compat.v1.where(input_seq_len == 0, tf.ones_like(input_seq_len), input_seq_len)
    encoder_rnn = rnn_layer(inputs, mask=tf.sequence_mask(non_zero_seq_len), training=training)
    
    if self.bidirectional_encoder:
      embedding_last_step = tf.concat([encoder_rnn[1], encoder_rnn[3]], axis=-1)
    else:
      embedding_last_step = encoder_rnn[1]

    embedding = self.net_embedding(embedding_last_step, training=training)
    return embedding
  
  def call_decode(self,
                  embedding,
                  decoder_inputs=None,
                  output_len=None,
                  decoder_rnn_state=None,
                  training=None):
    """Reconstructs stroke sequence given an embedding.

    If embedding_only is True, then decoder_inputs is not used.
    Args:
      embedding: (batch_size, n_latent_units)
      decoder_inputs:
      output_len:
      decoder_rnn_state:
      training:

    Returns:
      A dictionary of stroke, pen logits, pen probability and binary pen.
    """
    if isinstance(embedding, dict):
      self.op_embedding_sample = self.net_embedding.draw_sample(embedding)
    else:
      self.op_embedding_sample = embedding
      
    embedding_sample = tf.expand_dims(self.op_embedding_sample, axis=1)
    
    if decoder_rnn_state is None:
      op_decoder_initial_state = RNNUtils.get_initial_states_layer(
          self.decoder_rnn, embedding_sample)
      if self.dynamic_h0:
        decoder_state = self.decoder_state_nn(embedding_sample[:, 0])
        op_decoder_initial_state[0] = tf.split(decoder_state, 2, axis=-1)
      decoder_rnn_state = op_decoder_initial_state
    if decoder_inputs is not None:
      output_len = tf.shape(input=decoder_inputs)[1]
    
    # Prepare decoder input.
    if not self.use_vae or self.repeat_vae_sample or not isinstance(embedding, dict):
      # Use the same latent sample in all decoder steps.
      embedding_seq = tf.tile(embedding_sample, (1, output_len, 1))
    else:
      # Draw a new latent sample per decoding step.
      assert isinstance(embedding, dict), "Latent distribution is required."
      embedding_seq = dict()
      mu_ = tf.expand_dims(embedding[self.latent_prefix + C.MU], axis=1)
      embedding_seq[self.latent_prefix + C.MU] = tf.tile(mu_,
                                                         (1, output_len, 1))
      sigma_ = tf.expand_dims(embedding.get(self.latent_prefix + C.SIGMA, None), axis=1)
      if sigma_ is not None:
        embedding_seq[self.latent_prefix + C.SIGMA] = tf.tile(sigma_,
                                                              (1, output_len, 1))
      embedding_seq = self.net_embedding.draw_sample(embedding_seq)
    
    if self.embedding_only:
      emb_dec_input = embedding_seq
    else:
      if self.decoder_inp_dropout is not None:
        decoder_inputs = self.decoder_inp_dropout(decoder_inputs,
                                                  training=training)
      emb_dec_input = tf.concat([embedding_seq, decoder_inputs], axis=-1)
    
    # Running decoder.
    decoder_state = list()
    decoder_hidden = [self.decoder_inp_dense(emb_dec_input)]
    for idx, rnn_layer in enumerate(self.decoder_rnn):
      decoder_hidden = rnn_layer(
          decoder_hidden[0],
          initial_state=decoder_rnn_state[idx],
          training=training)
      decoder_state.append(decoder_hidden[1:])
    
    stroke_logits = self.decoder_out_stroke(decoder_hidden[0])
    
    if self.decoder_out_pen is not None:
      pen_logits = self.decoder_out_pen(decoder_hidden[0])
      # Calculate pen-up probability from the logits.
      pen_prob = tf.nn.sigmoid(pen_logits)
      pen_binary = tf.compat.v1.where(
          tf.greater(pen_prob, tf.fill(tf.shape(input=pen_prob), self.pen_threshold)),
          tf.fill(tf.shape(input=pen_prob), 1.0), tf.fill(tf.shape(input=pen_prob), 0.0))
    else:
      pen_logits = tf.ones_like(stroke_logits["mu"][:, :, 0:1])
      pen_prob = tf.random.uniform(tf.shape(pen_logits))
      pen_binary = tf.cast(tf.greater(pen_prob, 0.5), dtype=tf.float32)
    
    stroke_sample = self.decoder_out_stroke.draw_sample(stroke_logits, greedy=True)
    
    return dict(
        stroke=stroke_sample,
        stroke_logits=stroke_logits,
        pen_logits=pen_logits,
        pen_prob=pen_prob,
        pen=pen_binary,
        decoder_state=decoder_state)
  
  def decode_sequence(self,
                      embedding,
                      seq_len,
                      decoder_input=None,
                      decoder_state=None):
  
    max_steps = tf.reduce_max(input_tensor=seq_len)
    
    if self.embedding_only or decoder_input is not None:
      decoded_seq = self.call_decode(embedding,
                                     decoder_inputs=decoder_input,
                                     output_len=max_steps,
                                     training=False)
      decoded_seq["seq_len"] = self.estimate_seq_len(decoded_seq, seq_len)
      return decoded_seq
    
    if isinstance(embedding, dict):
      embedding = self.net_embedding.draw_sample(embedding)
    embedding = tf.expand_dims(embedding, axis=1)
      
    if decoder_input is None:
      decoder_input_t = tf.zeros((embedding.shape[0], 1, 3))
    else:
      decoder_input_t = decoder_input[:, 0:1]
    
    state_t = RNNUtils.get_initial_states_layer(self.decoder_rnn,
                                                embedding)
    if decoder_state is None:
      if  self.dynamic_h0:
        decoder_state = self.decoder_state_nn(embedding[:, 0])
        state_t[0] = tf.split(decoder_state, 2, axis=-1)
    else:
      state_t[0] = decoder_state
    
    stop_signal = False
    step = 1
    stroke_logits = None
    pen, pen_prob, pen_logits, stroke_samples = list(), list(), list(), list()
    embedding_t = embedding
    while not stop_signal:
      if step == max_steps:
        stop_signal = True

      if isinstance(embedding, dict) and not self.repeat_vae_sample:
        embedding_t = self.net_embedding.draw_sample(embedding)
      
      if self.embedding_only:
        emb_dec_input_t = embedding_t
      else:
        if self.decoder_inp_dropout is not None:
          decoder_input_t = self.decoder_inp_dropout(decoder_input_t, training=False)
        emb_dec_input_t = tf.concat([embedding_t, decoder_input_t], axis=-1)
      
      decoder_hidden_t = self.decoder_inp_dense(emb_dec_input_t)
      for idx, rnn_layer in enumerate(self.decoder_rnn):
        decoder_rnn_t = rnn_layer(decoder_hidden_t, initial_state=state_t[idx])
        state_t[idx] = decoder_rnn_t[1:]
        decoder_hidden_t = decoder_rnn_t[0]
      
      stroke_t = self.decoder_out_stroke(decoder_hidden_t)
      pen_logits_t = self.decoder_out_pen(decoder_hidden_t)
      
      # Calculate pen-up probability from the logits.
      pen_t_prob = tf.nn.sigmoid(pen_logits_t)
      pen_t_binary = tf.compat.v1.where(
          tf.greater(pen_t_prob,
                     tf.fill(tf.shape(input=pen_t_prob), self.pen_threshold)),
          tf.fill(tf.shape(input=pen_t_prob), 1.0),
          tf.fill(tf.shape(input=pen_t_prob), 0.0))
      
      # Deterministic or probabilistic.
      stroke_t_sample = self.decoder_out_stroke.draw_sample_np(
          stroke_t, greedy=True)
      
      if decoder_input is None:
        decoder_input_t = tf.concat((stroke_t_sample, pen_t_binary), axis=-1)
      else:
        decoder_input_t = decoder_input[:, step:step + 1]
      
      stroke_logits = utils.dict_append(stroke_logits, stroke_t)
      stroke_samples.append(stroke_t_sample)
      pen_logits.append(pen_logits_t)
      pen.append(pen_t_binary)
      pen_prob.append(pen_t_prob)
      step += 1
    
    out_dict = dict()
    out_dict["stroke_logits"] = utils.tf_dict_concatenate(stroke_logits, axis=1)
    out_dict["stroke"] = tf.concat(stroke_samples, axis=1)
    out_dict["pen_logits"] = tf.concat(pen_logits, axis=1)
    out_dict["pen_prob"] = tf.concat(pen_prob, axis=1)
    out_dict["pen"] = tf.concat(pen, axis=1)
    # out_dict["seq_len"] = seq_len
    out_dict["seq_len"] = self.estimate_seq_len(out_dict, seq_len)
    
    return out_dict

  def estimate_seq_len(self, sample_dict, filler=None):
    # Detect when the pen-up event occurs.
    seq_len = np.argmax(sample_dict["pen"][:, :, 0].numpy() == 1, axis=1)
    # If pen-up doesn't occur, set a proxy seq_len if passed.
    if filler is not None:
      seq_len = np.where(seq_len == 0, filler, seq_len)
    return seq_len

  def latent_walk(self, latent_start, latent_end, steps, output_len):
    interp_data = np.vstack([
        self.get_numpy_value(latent_start[0]),
        self.get_numpy_value(latent_end[0])
        ])
    interp = interp1d([0, steps - 1], interp_data, axis=0)
    
    embeddings = interp(range(steps))
    embeddings = {C.MU: tf.expand_dims(tf.cast(embeddings, tf.float32), axis=1)}
    out_dict = self.decode_sequence(embeddings, output_len)
    out_dict["embeddings"] = embeddings
    out_dict["seq_len"] = output_len
    return out_dict
  
  def get_config(self):
    base_config = super(InkSeq2Seq, self).get_config()
    return base_config

  @classmethod
  def get_model_tags(cls, config, config_loss):
    """Generates a string summarizing experiment parameters.

    Args:
      config:
      config_loss

    Returns:
    """
    if config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
      output = "normal"
    elif config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
      output = "binormal"
    elif config_loss["stroke"]["loss_type"] == C.NLL_GMM:
      output = "gmm"
    else:
      output = config_loss["stroke"]["loss_type"]
  
    latent = "L{}".format(config.embedding.latent_units)
    if config.embedding.use_vae:
      latent += "_vae"
      if isinstance(config_loss.embedding_kld.weight, float):
        latent += "_w" + str(config_loss.embedding_kld.weight)
      else:
        latent += "_aw" + str(config_loss.embedding_kld.weight["values"][1])
  
    if config.encoder.name == "rnn":
      encoder = "{}_{}x{}".format(config.encoder.cell_type,
                                  config.encoder.cell_layers,
                                  config.encoder.cell_units)
      if config.encoder.bidirectional_encoder:
        encoder = "bi" + encoder

      if config.encoder.rec_dropout_rate > 0:
        encoder += "_rdrop{}".format(config.encoder.rec_dropout_rate)
    else:
      err_unknown_type(config.encoder["name"])

    decoder = ""
    if config.decoder.repeat_vae_sample:
      decoder += "rep_"
    if config.decoder.dropout_rate > 0:
      decoder += "ddrop_" + str(config.decoder.dropout_rate)
    if config.decoder.dynamic_h0:
      decoder += "dh0_"

    model_name = "Seq2Seq"
    if config.decoder.autoregressive:
      model_name += "_ar"
  
    return dict(encoder=encoder, latent=latent, decoder=decoder, output=output,
                model_name=model_name)
Esempio n. 11
0
    def __init__(self,
                 config_encoder,
                 config_embedding,
                 config_decoder,
                 config_loss,
                 run_mode=C.RUN_ESTIMATOR,
                 **kwargs):
        """Constructor.

    Args:
      config_encoder:
      config_embedding:
      config_decoder:
      config_loss:
      run_mode: eager, static or estimator.
      **kwargs:

    Raises:
      ValueError: if run_mode is eager and tf.executing_eagerly() is False.
      Exception: if # layers > 1 and dynamic_h0 is True.
    """
        super(TEmbedding, self).__init__(config_loss=config_loss,
                                         run_mode=run_mode,
                                         **kwargs)

        self.pen_threshold = 0.3
        self.config_encoder = config_encoder
        self.config_embedding = config_embedding
        self.config_decoder = config_decoder
        self.latent_prefix = ""

        self.regularize_decoder = self.config_decoder.get(
            "regularizer_weight", 0) > 0
        self.kernel_regularizer = None
        if self.regularize_decoder:
            self.kernel_regularizer = tf.keras.regularizers.l2(
                self.config_decoder.get("regularizer_weight", 0))

        self.n_latent_units = self.config_embedding["latent_units"]
        self.use_vae = self.config_embedding["use_vae"]
        self.decoder_drop_rate = self.config_decoder.get("dropout_rate", 0)
        self.t_frequency_channels = self.config_decoder.get(
            "t_frequency_channels", 0)

        # Encoder network
        self.net_encoder = None
        if self.config_encoder["name"] == "rnn":
            self.net_encoder = RNN(
                self.config_encoder["cell_type"],
                self.config_encoder["cell_units"],
                self.config_encoder["cell_layers"],
                self.config_encoder["bidirectional_encoder"],
                return_sequences=False,
                return_state=False,
                run_mode=run_mode,
                use_cudnn=self.config_encoder["use_cudnn"],
                name="encoder_rnn")
        elif self.config_encoder["name"] == "mlp":
            pass
        elif self.config_encoder["name"] == "cnn":
            pass
        elif self.config_encoder["name"] == "transformer":
            self.net_encoder = Transformer(
                num_layers=self.config_encoder["layers"],
                d_model=self.config_encoder["d_model"],
                num_heads=self.config_encoder["heads"],
                dff=self.config_encoder["hidden_units"],
                rate=self.config_encoder["dropout_rate"],
                scale=self.config_encoder["scale"],
                pos_encoding_len=self.config_encoder["pos_encoding"],
                autoregressive=self.config_encoder["autoregressive"],
                return_sequences=False,
                config_loss=None,
                run_mode=run_mode)
        else:
            err_unknown_type(self.config_encoder["name"])

        # Deterministic or stochastic stroke.
        if self.use_vae:
            self.net_embedding = OutputModelNormal(
                out_units=self.n_latent_units,
                prefix=self.latent_prefix,
                sigma_activation=None,
                logvar=True)
        else:
            self.net_embedding = OutputModelDeterministic(
                out_units=self.n_latent_units, prefix=self.latent_prefix)

        # Decoder network:
        self.net_decoder = tf.keras.Sequential(name="decoder")

        layer_units = self.config_decoder["hidden_units"]
        if len(layer_units) == 1:
            layer_units = layer_units * self.config_decoder["n_layers"]

        decoder_activation = Activations.get(self.config_decoder["activation"])

        for idx in range(self.config_decoder["layers"]):
            self.net_decoder.add(
                tf.keras.layers.Dense(
                    layer_units[idx],
                    activation=decoder_activation,
                    kernel_regularizer=self.kernel_regularizer,
                    bias_regularizer=self.kernel_regularizer))
            if self.decoder_drop_rate > 0:
                self.net_decoder.add(
                    tf.keras.layers.Dropout(self.decoder_drop_rate))

        # Pen and stroke outputs.
        if config_loss["pen"]["eval_only"]:
            self.decoder_out_pen = None
        else:
            self.decoder_out_pen = tf.keras.layers.Dense(
                1,
                name="out_pen",
                kernel_regularizer=self.kernel_regularizer,
                bias_regularizer=self.kernel_regularizer)

        # Build output model depending on the loss type.
        if self.config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
            self.decoder_out_stroke = OutputModelNormal(out_units=2,
                                                        hidden_units=0,
                                                        hidden_layers=0)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
            self.decoder_out_stroke = OutputModelNormal2DDense(
                sigma_activation=tf.keras.activations.exponential)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_GMM:
            self.decoder_out_stroke = OutputModelGMMDense(
                out_units=2,
                num_components=self.config_loss["stroke"]["num_components"],
                sigma_activation=tf.keras.activations.exponential)
        else:
            self.decoder_out_stroke = OutputModelDeterministic(
                out_units=2,
                hidden_units=0,
                hidden_layers=0,
                kernel_regularizer=self.kernel_regularizer,
                bias_regularizer=self.kernel_regularizer)

        # Variables for static mode. They are assigned in call method.
        # TODO We can get rid of them if autoregressive sampling is no
        #  longer required in static (graph) mode.
        self.op_encoder_inputs = None
        self.op_input_seq_len = None
        self.op_embedding = None
        self.op_decoder_inputs = None
        self.op_embedding_sample = None
Esempio n. 12
0
class TEmbedding(BaseModel):
    """A temporal stroke model explicitly factoring out the temporal dimension."""
    def __init__(self,
                 config_encoder,
                 config_embedding,
                 config_decoder,
                 config_loss,
                 run_mode=C.RUN_ESTIMATOR,
                 **kwargs):
        """Constructor.

    Args:
      config_encoder:
      config_embedding:
      config_decoder:
      config_loss:
      run_mode: eager, static or estimator.
      **kwargs:

    Raises:
      ValueError: if run_mode is eager and tf.executing_eagerly() is False.
      Exception: if # layers > 1 and dynamic_h0 is True.
    """
        super(TEmbedding, self).__init__(config_loss=config_loss,
                                         run_mode=run_mode,
                                         **kwargs)

        self.pen_threshold = 0.3
        self.config_encoder = config_encoder
        self.config_embedding = config_embedding
        self.config_decoder = config_decoder
        self.latent_prefix = ""

        self.regularize_decoder = self.config_decoder.get(
            "regularizer_weight", 0) > 0
        self.kernel_regularizer = None
        if self.regularize_decoder:
            self.kernel_regularizer = tf.keras.regularizers.l2(
                self.config_decoder.get("regularizer_weight", 0))

        self.n_latent_units = self.config_embedding["latent_units"]
        self.use_vae = self.config_embedding["use_vae"]
        self.decoder_drop_rate = self.config_decoder.get("dropout_rate", 0)
        self.t_frequency_channels = self.config_decoder.get(
            "t_frequency_channels", 0)

        # Encoder network
        self.net_encoder = None
        if self.config_encoder["name"] == "rnn":
            self.net_encoder = RNN(
                self.config_encoder["cell_type"],
                self.config_encoder["cell_units"],
                self.config_encoder["cell_layers"],
                self.config_encoder["bidirectional_encoder"],
                return_sequences=False,
                return_state=False,
                run_mode=run_mode,
                use_cudnn=self.config_encoder["use_cudnn"],
                name="encoder_rnn")
        elif self.config_encoder["name"] == "mlp":
            pass
        elif self.config_encoder["name"] == "cnn":
            pass
        elif self.config_encoder["name"] == "transformer":
            self.net_encoder = Transformer(
                num_layers=self.config_encoder["layers"],
                d_model=self.config_encoder["d_model"],
                num_heads=self.config_encoder["heads"],
                dff=self.config_encoder["hidden_units"],
                rate=self.config_encoder["dropout_rate"],
                scale=self.config_encoder["scale"],
                pos_encoding_len=self.config_encoder["pos_encoding"],
                autoregressive=self.config_encoder["autoregressive"],
                return_sequences=False,
                config_loss=None,
                run_mode=run_mode)
        else:
            err_unknown_type(self.config_encoder["name"])

        # Deterministic or stochastic stroke.
        if self.use_vae:
            self.net_embedding = OutputModelNormal(
                out_units=self.n_latent_units,
                prefix=self.latent_prefix,
                sigma_activation=None,
                logvar=True)
        else:
            self.net_embedding = OutputModelDeterministic(
                out_units=self.n_latent_units, prefix=self.latent_prefix)

        # Decoder network:
        self.net_decoder = tf.keras.Sequential(name="decoder")

        layer_units = self.config_decoder["hidden_units"]
        if len(layer_units) == 1:
            layer_units = layer_units * self.config_decoder["n_layers"]

        decoder_activation = Activations.get(self.config_decoder["activation"])

        for idx in range(self.config_decoder["layers"]):
            self.net_decoder.add(
                tf.keras.layers.Dense(
                    layer_units[idx],
                    activation=decoder_activation,
                    kernel_regularizer=self.kernel_regularizer,
                    bias_regularizer=self.kernel_regularizer))
            if self.decoder_drop_rate > 0:
                self.net_decoder.add(
                    tf.keras.layers.Dropout(self.decoder_drop_rate))

        # Pen and stroke outputs.
        if config_loss["pen"]["eval_only"]:
            self.decoder_out_pen = None
        else:
            self.decoder_out_pen = tf.keras.layers.Dense(
                1,
                name="out_pen",
                kernel_regularizer=self.kernel_regularizer,
                bias_regularizer=self.kernel_regularizer)

        # Build output model depending on the loss type.
        if self.config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
            self.decoder_out_stroke = OutputModelNormal(out_units=2,
                                                        hidden_units=0,
                                                        hidden_layers=0)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
            self.decoder_out_stroke = OutputModelNormal2DDense(
                sigma_activation=tf.keras.activations.exponential)
        elif self.config_loss["stroke"]["loss_type"] == C.NLL_GMM:
            self.decoder_out_stroke = OutputModelGMMDense(
                out_units=2,
                num_components=self.config_loss["stroke"]["num_components"],
                sigma_activation=tf.keras.activations.exponential)
        else:
            self.decoder_out_stroke = OutputModelDeterministic(
                out_units=2,
                hidden_units=0,
                hidden_layers=0,
                kernel_regularizer=self.kernel_regularizer,
                bias_regularizer=self.kernel_regularizer)

        # Variables for static mode. They are assigned in call method.
        # TODO We can get rid of them if autoregressive sampling is no
        #  longer required in static (graph) mode.
        self.op_encoder_inputs = None
        self.op_input_seq_len = None
        self.op_embedding = None
        self.op_decoder_inputs = None
        self.op_embedding_sample = None

    def call(self, inputs, training=None, **kwargs):
        """Encoder and decoder functionality.

    Given an input sequence, calculates the stroke first. Then predicts a
    single step corresponding the real-valued time step.

    It doesn't reconstruct the entire sequence. Instead it only predicts one
    step. See decode_sequence method to get the entire sequence.
    Args:
      inputs (dict): expected to contain inputs for the encoder and decoder, and
        seq len ops.
      training: whether in training mode or not.
      **kwargs:

    Returns:
      [batch_size, seq_len, feature_size]
    """
        self.op_encoder_inputs = inputs[C.INP_ENC]
        self.op_decoder_inputs = inputs[C.INP_T]
        # self.op_input_seq_len = inputs[C.INP_SEQ_LEN]

        if len(inputs[C.INP_SEQ_LEN].shape) == 2:
            self.op_input_seq_len = inputs[C.INP_SEQ_LEN][:, 0]
        else:
            self.op_input_seq_len = inputs[C.INP_SEQ_LEN]

        self.op_embedding = self.call_encode(self.op_encoder_inputs,
                                             self.op_input_seq_len, training)

        self.op_embedding_sample = self.net_embedding.draw_sample(
            self.op_embedding)

        out_dict = self.call_decode(self.op_embedding_sample,
                                    self.op_decoder_inputs, training)
        out_dict["embedding"] = self.op_embedding
        out_dict["embedding_sample"] = self.op_embedding_sample
        return out_dict

    def call_encode(self, inputs, input_seq_len, training):
        """Calculates the stroke stroke.

    Args:
      inputs:
      input_seq_len:
      training:

    Returns:
      stroke of size [batch_size, 1, latent_size]
    """
        inp_dict = {"input_seq": inputs, "seq_len": input_seq_len}
        # encoder_out = self.net_encoder(inputs, seq_len=input_seq_len, training=training)
        encoder_out = self.net_encoder(inp_dict, training=training)
        embedding = self.net_embedding(encoder_out, training=training)
        return embedding

    def call_decode(self, embedding, decoder_inputs, training=None):
        """Reconstructs stroke sequence given an stroke.

    Args:
      embedding:
      decoder_inputs: t value between 0 and 1.
      training:

    Returns:
      A dictionary of stroke, pen logits, pen probability and binary pen.
    """
        if isinstance(embedding, dict):
            embedding = self.net_embedding.draw_sample(embedding)

        # We may use multiple t samples for the same stroke vector. Hence,
        # tile and reshape the stroke vector accordingly.
        # if training:
        n_t_targets = tf.shape(input=decoder_inputs)[1]
        decoder_inputs = tf.reshape(decoder_inputs, [-1, 1])
        tiled = tf.tile(embedding[:, tf.newaxis, :], [1, n_t_targets, 1])
        embedding = tf.reshape(tiled, [-1, self.n_latent_units])

        if self.t_frequency_channels > 0:
            decoder_inputs = self.frequency_encoding(decoder_inputs,
                                                     self.t_frequency_channels)
            # decoder_inputs = self.frequency_encoding_emb(decoder_inputs, embedding, self.t_frequency_channels)

        decoder_inp = tf.concat([decoder_inputs, embedding], axis=-1)
        # Running decoder.
        decoder_hidden = self.net_decoder(decoder_inp, training=training)
        stroke_logits = self.decoder_out_stroke(decoder_hidden)
        if self.decoder_out_pen is not None:
            pen_logits = self.decoder_out_pen(decoder_hidden)
            # Calculate pen-up probability from the logits.
            pen_prob = tf.nn.sigmoid(pen_logits)
            pen_binary = tf.compat.v1.where(
                tf.greater(
                    pen_prob,
                    tf.fill(tf.shape(input=pen_prob), self.pen_threshold)),
                tf.fill(tf.shape(input=pen_prob), 1.0),
                tf.fill(tf.shape(input=pen_prob), 0.0))
        else:
            pen_logits = tf.ones_like(stroke_logits["mu"][:, 0:1])
            pen_prob = tf.random.uniform(tf.shape(pen_logits))
            pen_binary = tf.cast(tf.greater(pen_prob, 0.5), dtype=tf.float32)

        stroke_sample = self.decoder_out_stroke.draw_sample(stroke_logits,
                                                            greedy=True)

        return dict(stroke=stroke_sample,
                    stroke_logits=stroke_logits,
                    pen_logits=pen_logits,
                    pen_prob=pen_prob,
                    pen=pen_binary)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=[None, 8], dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.int32)
    ])
    def serving_decode_strokes(self, embedding_sample, target_seq_len):
        """Decodes stroke embeddings into a sequence by mapping t in [0,1] to
    target_seq_len. Decoded stroke length will be the same for all.

    Args:
      embedding_sample: embedding sample shape (batch_size, latent_units).
      target_seq_len (): # of sequence steps.

    Returns (dict):
      with keys stroke, pen and more.
    """
        n_strokes = tf.shape(input=embedding_sample)[0]

        t_inp = tf.tile(
            tf.expand_dims(tf.linspace(0.0, 1.0, target_seq_len), axis=0),
            (n_strokes, 1))  # (batch_size, target_seq_len)
        decoded = self.call_decode(embedding_sample, t_inp, training=False)
        decoded_seq_len = tf.ones(n_strokes, dtype=tf.int32) * target_seq_len

        return dict(stroke=tf.reshape(decoded["stroke"], (n_strokes, -1, 2)),
                    pen=tf.reshape(decoded["pen"], (n_strokes, -1, 1)),
                    seq_len=decoded_seq_len)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=[None, None, 3], dtype=tf.float32),
        tf.TensorSpec(shape=[None], dtype=tf.int32)
    ])
    def serving_encode_strokes(self, input_stroke, input_seq_len):
        """Encodes a stroke sequence into a fixed length embedding.

    Args:
      input_stroke: stroke sequence of shape (batch_size, seq_len, 3).
      input_seq_len: (batch_size)

    Returns:
      embedding vector with shape (batch_size, latent_units).
    """
        embedding = self.call_encode(input_stroke,
                                     input_seq_len,
                                     training=False)
        embedding_sample = self.net_embedding.draw_sample(embedding)
        return dict(embedding_sample=embedding_sample)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=[None, None, 3], dtype=tf.float32),
        tf.TensorSpec(shape=[None], dtype=tf.int32),
        tf.TensorSpec(shape=(), dtype=tf.int32)
    ])
    def serving_forward_pass(self, input_stroke, input_seq_len,
                             target_seq_len):
        """Encodes and decodes a stroke sequence. Also works with batches if the
    decoded stroke length is the same for all.

    Args:
      input_stroke: stroke sequence of shape (batch_size, input_seq_len, 3).
      input_seq_len: (batch_size)
      target_seq_len: ()
    Returns:
      embedding sample, decoded stroke.
    """
        embedding = self.call_encode(input_stroke,
                                     input_seq_len,
                                     training=False)
        embedding_sample = self.net_embedding.draw_sample(embedding)

        n_strokes = tf.shape(input=embedding_sample)[0]
        # embedding_inp = tf.tile(embedding_sample, [target_seq_len, 1])
        t_inp = tf.tile(
            tf.expand_dims(tf.linspace(0.0, 1.0, target_seq_len), axis=0),
            (n_strokes, 1))  # (batch_size, target_seq_len)
        decoded = self.call_decode(embedding_sample, t_inp, training=False)

        # decoded = self.batch_stroke_to_single_diagram(decoded_out, n_strokes)
        decoded_seq_len = tf.ones_like(input_seq_len) * target_seq_len

        return dict(stroke=tf.reshape(decoded["stroke"], (n_strokes, -1, 2)),
                    pen=tf.reshape(decoded["pen"], (n_strokes, -1, 1)),
                    seq_len=decoded_seq_len,
                    embedding_sample=embedding_sample)

    def decode_sequence(self, embedding, seq_len):
        """Decodes an stroke into a sequence by mapping t in [0,1] to seq_len.

    Args:
      embedding: stroke sample or dict of shape (1, latent_units).
      seq_len (np.array): # of sequence steps.

    Returns:
    """
        if isinstance(embedding, dict):
            embedding = self.net_embedding.draw_sample(embedding)

        n_strokes = tf.shape(input=embedding)[0]
        n_latent = tf.shape(input=embedding)[1]
        max_len = tf.reduce_max(input_tensor=seq_len)

        embedding_inp = tf.reshape(
            tf.tile(tf.expand_dims(embedding, 1), [1, max_len, 1]),
            (-1, n_latent))
        # TODO TF compatible? Only works in eager mode right now.
        t_vals = []
        for sid in range(len(seq_len)):
            t_ = tf.expand_dims(tf.linspace(0.0, 1.0, seq_len[sid]), axis=1)
            t_ = tf.pad(tensor=t_,
                        paddings=[[0, max_len - seq_len[sid]], [0, 0]])
            t_vals.append(t_)
        t_inp = tf.concat(t_vals, axis=0)

        decoded_out = self.call_decode(embedding_inp, t_inp, training=False)
        # Convert stroke batch into a diagram sample with padded strokes.
        decoded_seq = self.batch_stroke_to_single_diagram(
            decoded_out, n_strokes)
        decoded_seq["seq_len"] = seq_len
        return decoded_seq

    def latent_walk(self, latent_start, latent_end, steps, output_len):
        interp_data = np.vstack([
            self.get_numpy_value(latent_start[0]),
            self.get_numpy_value(latent_end[0])
        ])
        interp = interp1d([0, steps - 1], interp_data, axis=0)
        embeddings = tf.cast(interp(range(steps)), tf.float32)

        if isinstance(output_len, list):
            interp = interp1d([0, steps - 1], np.array(output_len), axis=0)
            seq_len = np.round(interp(range(steps))).astype(np.int32)
        else:
            seq_len = np.array([output_len] * steps)

        out_dict = self.decode_sequence(embeddings, seq_len)
        out_dict["embeddings"] = embeddings
        out_dict["seq_len"] = seq_len
        return out_dict

    def loss(self,
             predictions,
             targets,
             seq_len=None,
             prefix="",
             training=True):
        if not prefix:
            prefix = self.config_loss.get("prefix", "")
        output_losses = self.loss_fn(self.config_loss,
                                     predictions=predictions,
                                     targets=targets,
                                     seq_len=seq_len,
                                     prefix=prefix,
                                     run_mode=self.run_mode,
                                     training=training)

        if self.regularize_decoder:
            dec_all = self.net_decoder.losses
            dec_all.extend(self.decoder_out_stroke.losses)
            if self.decoder_out_pen is not None:
                dec_all.extend(self.decoder_out_pen.losses)
            dec_reg = tf.math.add_n(dec_all)
            output_losses["loss"] += dec_reg
            output_losses["decoder_l2"] = dec_reg

        return output_losses

    @classmethod
    def frequency_encoding(cls, inputs, n_layers):
        out = list()
        for l in range(n_layers):
            pi_constant = (np.power(2, l) * np.pi).astype(np.float32)
            out.append(tf.sin(pi_constant * inputs))
            out.append(tf.cos(pi_constant * inputs))
        return tf.concat(out, axis=-1)

    @classmethod
    def frequency_encoding_emb(cls, t, embeddings, n_layers):
        out = list()
        for l in range(n_layers):
            pi_constant = (np.power(2, l) * np.pi).astype(np.float32)
            out.append(tf.sin(pi_constant * t) * embeddings)
            out.append(tf.cos(pi_constant * t) * embeddings)
        return tf.concat(out, axis=-1)

    @classmethod
    def batch_stroke_to_single_diagram(cls, stroke_batch, n_strokes):
        """Converts a batch of strokes into a diagram sample.

    Reshapes entries of shape (n_strokes x padded_seq_len, feature_dim)
    in the given dictionary to (n_strokes, padded_seq_len, feature_dim).
    Works with 1-level of nested structure to handle model output dictionaries.

    Args:
      stroke_batch:
      n_strokes:

    Returns:
    """
        diagram = dict()
        for key_, val_ in stroke_batch.items():
            if isinstance(val_, dict):
                val_dict = dict()
                for val_key, val_val in val_.items():
                    feature_size = tf.shape(input=val_val)[-1]
                    val_dict[val_key] = tf.reshape(
                        val_val, (n_strokes, -1, feature_size))
                diagram[key_] = val_dict
            else:
                feature_size = tf.shape(input=val_)[-1]
                diagram[key_] = tf.reshape(val_, (n_strokes, -1, feature_size))

        return diagram

    @classmethod
    def get_model_tags(cls, config, config_loss):
        """Generates a string summarizing experiment parameters.

    Args:
      config:
      config_loss

    Returns:
    """
        if config_loss["stroke"]["loss_type"] == C.NLL_NORMAL:
            output = "normal"
        elif config_loss["stroke"]["loss_type"] == C.NLL_BINORMAL:
            output = "binormal"
        elif config_loss["stroke"]["loss_type"] == C.NLL_GMM:
            output = "gmm"
        else:
            output = config_loss["stroke"]["loss_type"]

        decoder = "{}x{}".format(config.decoder.layers,
                                 config.decoder.hidden_units[0])

        latent = "L{}".format(config.embedding.latent_units)
        if config.embedding.use_vae:
            latent += "_vae"
            if isinstance(config_loss.embedding_kld.weight, float):
                latent += "_w" + str(config_loss.embedding_kld.weight)
            else:
                latent += "_aw" + str(
                    config_loss.embedding_kld.weight["values"][1])

        if config.encoder.name == "rnn":
            encoder = "{}_{}x{}".format(config.encoder.cell_type,
                                        config.encoder.cell_layers,
                                        config.encoder.cell_units)
            if config.encoder.bidirectional_encoder:
                encoder = "bi" + encoder
        elif config.encoder.name == "transformer":
            encoder = "TR_{}_{}x{}-head_{}-drop_{}".format(
                config.encoder.d_model, config.encoder.layers,
                config.encoder.hidden_units, config.encoder.heads,
                config.encoder.dropout_rate)
            if not config.encoder.autoregressive:
                encoder = "bi" + encoder
        else:
            err_unknown_type(config.encoder["name"])

        return dict(encoder=encoder,
                    latent=latent,
                    decoder=decoder,
                    output=output,
                    model_name="TEMB")
Esempio n. 13
0
    def __init__(self,
                 cell_type,
                 cell_units,
                 cell_layers,
                 bidirectional=False,
                 return_state=True,
                 return_sequences=True,
                 output_size=0,
                 config_loss=None,
                 name="rnn",
                 run_mode=C.RUN_STATIC,
                 **kwargs):
        """Constructor.

    Args:
      cell_type (str): 'lstm' or 'gru'.
      cell_units: number of cell units.
      cell_layers: number of encoder/decoder rnn cells.
      bidirectional:
      return_state:
      return_sequences:
      output_size: encoder/decoder rnn cell/output size.
      config_loss: loss configuration.
      name:
      run_mode: eager, static or estimator.
      **kwargs:

    Raises:
      ValueError: if run_mode is eager and tf.executing_eagerly() is False.
    """
        super(RNN, self).__init__(config_loss=config_loss,
                                  run_mode=run_mode,
                                  **kwargs)

        self.cell_units = cell_units
        self.cell_layers = cell_layers
        self.cell_type = cell_type
        self.bidirectional = bidirectional
        self.return_state = return_state
        self.return_sequences = return_sequences

        self.output_size = output_size

        self.output_layer = None
        self._rnn_layer = tf.keras.Sequential()

        for i in range(self.cell_layers - 1):
            rnn_layer = RNNUtils.get_rnn_layer(self.cell_type,
                                               self.cell_units,
                                               return_state=False,
                                               return_sequences=True,
                                               stateful=False,
                                               name=name + "_" + str(i))

            if self.bidirectional:
                rnn_layer = tf.keras.layers.Bidirectional(rnn_layer,
                                                          merge_mode="concat")
            self._rnn_layer.add(rnn_layer)

        rnn_layer = RNNUtils.get_rnn_layer(self.cell_type,
                                           self.cell_units,
                                           return_state=return_state,
                                           return_sequences=return_sequences,
                                           stateful=False,
                                           name=name)
        if self.bidirectional:
            rnn_layer = tf.keras.layers.Bidirectional(rnn_layer,
                                                      merge_mode="concat")
        self._rnn_layer.add(rnn_layer)

        # Deterministic or probabilistic outputs.
        if output_size > 0:
            if config_loss is not None:
                if config_loss["loss_type"] == C.NLL_NORMAL:
                    self.output_layer = OutputModelNormal(self.output_size,
                                                          logvar=True)
                elif config_loss["loss_type"] == C.NLL_BINORMAL:
                    self.output_layer = OutputModelNormal2DDense(
                        sigma_activation=tf.keras.activations.exponential)
                elif config_loss["loss_type"] == C.NLL_GMM:
                    self.output_layer = OutputModelGMMDense(
                        out_units=self.output_size,
                        num_components=config_loss["num_components"],
                        sigma_activation=tf.keras.activations.exponential)
                else:
                    self.output_layer = OutputModelDeterministic(
                        self.output_size, 0, 0)
            else:
                self.output_layer = OutputModelDeterministic(
                    self.output_size, 0, 0)