예제 #1
0
  def testSelectLastActivations(self):
    """Test `select_last_activations`."""
    batch_size = 4
    padded_length = 6
    num_classes = 4
    np.random.seed(4444)
    sequence_length = np.random.randint(0, padded_length + 1, batch_size)
    activations = np.random.rand(batch_size, padded_length, num_classes)
    last_activations_t = rnn_common.select_last_activations(
        constant_op.constant(activations, dtype=dtypes.float32),
        constant_op.constant(sequence_length, dtype=dtypes.int32))

    with session.Session() as sess:
      last_activations = sess.run(last_activations_t)

    expected_activations_shape = [batch_size, num_classes]
    np.testing.assert_equal(
        expected_activations_shape, last_activations.shape,
        'Wrong activations shape. Expected {}; got {}.'.format(
            expected_activations_shape, last_activations.shape))

    for i in range(batch_size):
      actual_activations = last_activations[i, :]
      expected_activations = activations[i, sequence_length[i] - 1, :]
      np.testing.assert_almost_equal(
          expected_activations,
          actual_activations,
          err_msg='Unexpected logit value at index [{}, :].'
          '  Expected {}; got {}.'.format(i, expected_activations,
                                          actual_activations))
예제 #2
0
    def testSelectLastActivations(self):
        """Test `select_last_activations`."""
        batch_size = 4
        padded_length = 6
        num_classes = 4
        np.random.seed(4444)
        sequence_length = np.random.randint(0, padded_length + 1, batch_size)
        activations = np.random.rand(batch_size, padded_length, num_classes)
        last_activations_t = rnn_common.select_last_activations(
            constant_op.constant(activations, dtype=dtypes.float32),
            constant_op.constant(sequence_length, dtype=dtypes.int32))

        with session.Session() as sess:
            last_activations = sess.run(last_activations_t)

        expected_activations_shape = [batch_size, num_classes]
        np.testing.assert_equal(
            expected_activations_shape, last_activations.shape,
            'Wrong activations shape. Expected {}; got {}.'.format(
                expected_activations_shape, last_activations.shape))

        for i in range(batch_size):
            actual_activations = last_activations[i, :]
            expected_activations = activations[i, sequence_length[i] - 1, :]
            np.testing.assert_almost_equal(
                expected_activations,
                actual_activations,
                err_msg='Unexpected logit value at index [{}, :].'
                '  Expected {}; got {}.'.format(i, expected_activations,
                                                actual_activations))
예제 #3
0
    def model_fn(features, labels, mode):

        color_name = features[COLOR_NAME_KEY]
        sequence_length = features[SEQUENCE_LENGTH_KEY]

        # Creating dense representation for the names
        # and then converting it to one hot representation
        dense_color_name = tf.sparse_tensor_to_dense(
            color_name, default_value=len(CHARACTERS))
        color_name_onehot = tf.one_hot(dense_color_name,
                                       depth=len(CHARACTERS) + 1)

        # Each RNN layer will consist of a LSTM cell
        rnn_layers = [tf.contrib.rnn.LSTMCell(size) for size in rnn_cell_sizes]

        # Construct the layers
        multi_rnn_cell = tf.contrib.rnn.MultiRNNCell(rnn_layers)

        # Runs the RNN model dynamically
        # more about it at:
        # https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn
        outputs, final_state = tf.nn.dynamic_rnn(
            cell=multi_rnn_cell,
            inputs=color_name_onehot,
            sequence_length=sequence_length,
            dtype=tf.float32)

        # Slice to keep only the last cell of the RNN
        last_activations = rnn_common.select_last_activations(
            outputs, sequence_length)

        # Construct dense layers on top of the last cell of the RNN
        for units in dnn_layer_sizes:
            last_activations = tf.layers.dense(last_activations,
                                               units,
                                               activation=tf.nn.relu)

        # Final dense layer for prediction
        predictions = tf.layers.dense(last_activations, label_dimension)

        loss = None
        train_op = None

        if mode != tf.contrib.learn.ModeKeys.INFER:
            loss = tf.losses.mean_squared_error(labels, predictions)

        if mode == tf.contrib.learn.ModeKeys.TRAIN:
            train_op = tf.contrib.layers.optimize_loss(
                loss,
                tf.contrib.framework.get_global_step(),
                optimizer=optimizer,
                learning_rate=learning_rate)

        return tf.contrib.learn.ModelFnOps(mode,
                                           predictions=predictions,
                                           loss=loss,
                                           train_op=train_op)
예제 #4
0
def compute_attention(seq_output, last_output, hidden_layer_dim, seq_mask,
                      sequence_length):
  """Constructs attention of the last_output as query and the sequence output.

  The attention is the dot-product of the last_output (the final RNN output),
  with the seq_output (the RNN's output at each step). Here the final RNN output
  is considered as the "query" or "context" vector. The final attention output
  is a weighted sum of the RNN's outputs at all steps. Details:

    alpha_i = seq_output_i * last_output
    beta is then obtained by normalizing alpha:
    beta_i = exp(alpha_i) / sum_j exp(alpha_j)
    The new attention vector is then the beta-weighted sum over the seq_output:
    attention_vector = sum_i beta_i * seq_output_i

  If hidden_dim > 0 then before computing alpha the seq_output and the
  last_output are sent through two separate hidden layers.
  seq_output = hidden_layer(seq_output)
  last_output = hidden_layer(last_output)

  Args:
    seq_output: The raw rnn output of shape [batch_size, max_sequence_length,
      rnn_size].
    last_output: The last output of the rnn of shape [batch_size, rnn_size].
    hidden_layer_dim: If 0 no hidden layer is applied before multiplying the
      last_logits with the seq_logits.
    seq_mask: A Tensor of shape [batch_size, max_sequence_length, 1] indicating
      which timesteps are padded.
    sequence_length: Sequence length (before padding), Tensor of shape
      [batch_size].

  Returns:
    Attention output with shape [batch_size, rnn_size].
    The attention beta tensor.
  """
  # Compute the weights.
  if hidden_layer_dim > 0:
    last_output = tf.layers.dense(
        last_output, hidden_layer_dim, activation=tf.nn.relu6)
    seq_output = tf.layers.dense(
        seq_output, hidden_layer_dim, activation=tf.nn.relu6)
  last_output = tf.expand_dims(last_output, 1)  # [batch_size, 1, rnn_size]
  tmp = tf.multiply(seq_output, last_output)  # dim 1: broadcast
  alpha_tensor = tf.reduce_sum(tmp, 2)  # [b, max_seq_len]
  alpha_tensor *= tf.squeeze(seq_mask, axis=2)
  beta_tensor = tf.nn.softmax(alpha_tensor)  # using default dim -1
  beta_tensor = tf.expand_dims(beta_tensor, -1)  # [b, max_seq_len, 1]

  # Compute weighted sum of the original rnn_outputs over all steps
  tmp = seq_output * beta_tensor  # last dim: use "broadcast"
  rnn_outputs_weighted_sum = tf.reduce_sum(tmp, 1)  # [b, rnn_size]
  last_beta = rnn_common.select_last_activations(
      beta_tensor, tf.to_int32(sequence_length))
  tf.summary.histogram('last_beta_attention', last_beta)

  return rnn_outputs_weighted_sum, beta_tensor
예제 #5
0
def _single_value_predictions(activations,
                              sequence_length,
                              target_column,
                              problem_type,
                              predict_probabilities):
  """Maps `activations` from the RNN to predictions for single value models.

  If `predict_probabilities` is `False`, this function returns a `dict`
  containing single entry with key `PREDICTIONS_KEY`. If `predict_probabilities`
  is `True`, it will contain a second entry with key `PROBABILITIES_KEY`. The
  value of this entry is a `Tensor` of probabilities with shape
  `[batch_size, num_classes]`.

  Args:
    activations: Output from an RNN. Should have dtype `float32` and shape
      `[batch_size, padded_length, ?]`.
    sequence_length: A `Tensor` with shape `[batch_size]` and dtype `int32`
      containing the length of each sequence in the batch. If `None`, sequences
      are assumed to be unpadded.
    target_column: An initialized `TargetColumn`, calculate predictions.
    problem_type: Either `ProblemType.CLASSIFICATION` or
      `ProblemType.LINEAR_REGRESSION`.
    predict_probabilities: A Python boolean, indicating whether probabilities
      should be returned. Should only be set to `True` for
      classification/logistic regression problems.
  Returns:
    A `dict` mapping strings to `Tensors`.
  """
  with ops.name_scope('SingleValuePrediction'):
    last_activations = rnn_common.select_last_activations(
        activations, sequence_length)
    predictions_name = (prediction_key.PredictionKey.CLASSES
                        if problem_type == constants.ProblemType.CLASSIFICATION
                        else prediction_key.PredictionKey.SCORES)
    if predict_probabilities:
      probabilities = target_column.logits_to_predictions(
          last_activations, proba=True)
      prediction_dict = {
          prediction_key.PredictionKey.PROBABILITIES: probabilities,
          predictions_name: math_ops.argmax(probabilities, 1)}
    else:
      predictions = target_column.logits_to_predictions(
          last_activations, proba=False)
      prediction_dict = {predictions_name: predictions}
    return prediction_dict
예제 #6
0
def construct_logits(diff_delta_time, obs_values, indicator, sequence_length,
                     seq_mask, hparams, reuse):
  """Constructs logits through an RNN.

  Args:
    diff_delta_time: Difference between two consecutive time steps.
    obs_values: A dense representation of the observation_values with
      obs_values[b, t, :] has at most one non-zero value at the position
      of the corresponding lab test from obs_code_ids with the value of the lab
      result. A padded Tensor of shape [batch_size, max_sequence_length,
      vocab_size] of type float32 of possibly normalized observation values.
    indicator: A one-hot encoding of whether a value in obs_values comes from
      observation_values or is just filled in to be 0. A Tensor of
      shape [batch_size, max_sequence_length, vocab_size] and type float32.
    sequence_length: Sequence length (before padding), Tensor of shape
      [batch_size].
    seq_mask: A Tensor of shape [batch_size, max_sequence_length, 1] indicating
      which timesteps are padded.
    hparams: Hyper parameters.
    reuse: Boolean indicator of whether to re-use the variables.

  Returns:
    - Logits: A Tensor of shape [batch, {max_sequence_length,1}, 1].
    - Weights: Defaults to None. Only populated to a Tensor of shape
               [batch, max_sequence_length, 1] if
               hparams.use_rnn_attention is True.
  """

  logits, raw_output = construct_rnn_logits(
      diff_delta_time, obs_values, indicator, sequence_length, hparams.rnn_size,
      hparams.variational_recurrent_keep_prob,
      hparams.variational_input_keep_prob, hparams.variational_output_keep_prob,
      reuse)
  if hparams.use_rnn_attention:
    with tf.variable_scope('logits/rnn/attention', reuse=reuse) as sc:
      last_logits = rnn_common.select_last_activations(
          raw_output, tf.to_int32(sequence_length))
      weighted_final_output, weight = compute_attention(
          raw_output, last_logits, hparams.attention_hidden_layer_dim,
          seq_mask, sequence_length)
      return tf.layers.dense(
          weighted_final_output, 1, name=sc, reuse=reuse,
          activation=None), weight
  else:
    return logits, None
def _single_value_predictions(activations,
                              sequence_length,
                              target_column,
                              problem_type,
                              predict_probabilities):
  """Maps `activations` from the RNN to predictions for single value models.

  If `predict_probabilities` is `False`, this function returns a `dict`
  containing single entry with key `PREDICTIONS_KEY`. If `predict_probabilities`
  is `True`, it will contain a second entry with key `PROBABILITIES_KEY`. The
  value of this entry is a `Tensor` of probabilities with shape
  `[batch_size, num_classes]`.

  Args:
    activations: Output from an RNN. Should have dtype `float32` and shape
      `[batch_size, padded_length, ?]`.
    sequence_length: A `Tensor` with shape `[batch_size]` and dtype `int32`
      containing the length of each sequence in the batch. If `None`, sequences
      are assumed to be unpadded.
    target_column: An initialized `TargetColumn`, calculate predictions.
    problem_type: Either `ProblemType.CLASSIFICATION` or
      `ProblemType.LINEAR_REGRESSION`.
    predict_probabilities: A Python boolean, indicating whether probabilities
      should be returned. Should only be set to `True` for
      classification/logistic regression problems.
  Returns:
    A `dict` mapping strings to `Tensors`.
  """
  with ops.name_scope('SingleValuePrediction'):
    last_activations = rnn_common.select_last_activations(
        activations, sequence_length)
    predictions_name = (prediction_key.PredictionKey.CLASSES
                        if problem_type == constants.ProblemType.CLASSIFICATION
                        else prediction_key.PredictionKey.SCORES)
    if predict_probabilities:
      probabilities = target_column.logits_to_predictions(
          last_activations, proba=True)
      prediction_dict = {
          prediction_key.PredictionKey.PROBABILITIES: probabilities,
          predictions_name: math_ops.argmax(probabilities, 1)}
    else:
      predictions = target_column.logits_to_predictions(
          last_activations, proba=False)
      prediction_dict = {predictions_name: predictions}
    return prediction_dict
예제 #8
0
def _single_value_loss(
    activations, labels, sequence_length, target_column, features):
  """Maps `activations` from the RNN to loss for multi value models.

  Args:
    activations: Output from an RNN. Should have dtype `float32` and shape
      `[batch_size, padded_length, ?]`.
    labels: A `Tensor` with length `[batch_size]`.
    sequence_length: A `Tensor` with shape `[batch_size]` and dtype `int32`
      containing the length of each sequence in the batch. If `None`, sequences
      are assumed to be unpadded.
    target_column: An initialized `TargetColumn`, calculate predictions.
    features: A `dict` containing the input and (optionally) sequence length
      information and initial state.
  Returns:
    A scalar `Tensor` containing the loss.
  """

  with ops.name_scope('SingleValueLoss'):
    last_activations = rnn_common.select_last_activations(
        activations, sequence_length)
    return target_column.loss(last_activations, labels, features)
def _single_value_loss(
    activations, labels, sequence_length, target_column, features):
  """Maps `activations` from the RNN to loss for multi value models.

  Args:
    activations: Output from an RNN. Should have dtype `float32` and shape
      `[batch_size, padded_length, ?]`.
    labels: A `Tensor` with length `[batch_size]`.
    sequence_length: A `Tensor` with shape `[batch_size]` and dtype `int32`
      containing the length of each sequence in the batch. If `None`, sequences
      are assumed to be unpadded.
    target_column: An initialized `TargetColumn`, calculate predictions.
    features: A `dict` containing the input and (optionally) sequence length
      information and initial state.
  Returns:
    A scalar `Tensor` containing the loss.
  """

  with ops.name_scope('SingleValueLoss'):
    last_activations = rnn_common.select_last_activations(
        activations, sequence_length)
    return target_column.loss(last_activations, labels, features)
예제 #10
0
    def model_fn(features, labels, mode):
      """Creates the prediction, loss, and train ops.

      Args:
        features: A dictionary of tensors keyed by the feature name.
        labels: A dictionary of label tensors keyed by the label key.
        mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.

      Returns:
        EstimatorSpec with the mode, prediction, loss, train_op and
        output_alternatives a dictionary specifying the output for a
        servo request during serving.
      """
      # 1. Construct input to RNN
      sequence_feature_map = {
          k: features[input_fn.SEQUENCE_KEY_PREFIX + k]
          for k in hparams.sequence_features
      }
      sequence_length = tf.squeeze(
          features[input_fn.CONTEXT_KEY_PREFIX + 'sequenceLength'],
          axis=1,
          name='sq_seq_len')
      tf.summary.scalar('sequence_length', tf.reduce_mean(sequence_length))
      diff_delta_time, obs_values, indicator = construct_input(
          sequence_feature_map, hparams.categorical_values,
          hparams.categorical_seq_feature, hparams.feature_value, mode,
          hparams.normalize, hparams.momentum, hparams.min_value,
          hparams.max_value, hparams.input_keep_prob)

      seq_mask = tf.expand_dims(
          tf.sequence_mask(sequence_length, dtype=tf.float32), axis=2)
      logits, weights = construct_logits(
          diff_delta_time,
          obs_values,
          indicator,
          sequence_length,
          seq_mask,
          hparams,
          reuse=False)

      all_attribution_dict = {}
      if mode == tf.estimator.ModeKeys.TRAIN:
        if hparams.sequence_prediction:
          assert not hparams.use_rnn_attention
          # If we train a sequence_prediction we repeat the labels over time.
          label_tensor = labels[hparams.label_key]
          labels[hparams.label_key] = tf.tile(
              tf.expand_dims(label_tensor, 2),
              multiples=[1, tf.shape(logits)[1], 1])
          if hparams.volatility_loss_factor > 0.0:
            volatility = tf.reduce_sum(
                tf.square(seq_mask *
                          compute_prediction_diff_attribution(logits)))
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                                 volatility * hparams.volatility_loss_factor)
        elif not hparams.use_rnn_attention:
          logits = rnn_common.select_last_activations(
              logits, tf.to_int32(sequence_length))
      else:
        if hparams.sequence_prediction:
          last_logits = rnn_common.select_last_activations(
              logits, tf.to_int32(sequence_length))
        else:
          last_logits = logits
        if mode == tf.estimator.ModeKeys.PREDICT:
          delta_time = sequence_feature_map['deltaTime']
          all_attributions = {}
          if hparams.include_gradients_attribution:
            all_attributions['gradient_last'] = compute_gradient_attribution(
                last_logits, obs_values, indicator)
          if hparams.include_gradients_sum_time_attribution:
            assert not hparams.use_rnn_attention
            all_attributions['gradient_sum'] = compute_gradient_attribution(
                _predictions_for_gradients(
                    logits, seq_mask, delta_time,
                    hparams.attribution_max_delta_time, averaged=False),
                obs_values, indicator)
          if hparams.include_gradients_avg_time_attribution:
            assert not hparams.use_rnn_attention
            all_attributions['gradient_avg'] = compute_gradient_attribution(
                _predictions_for_gradients(
                    logits, seq_mask, delta_time,
                    hparams.attribution_max_delta_time, averaged=True),
                obs_values, indicator)
          if hparams.include_path_integrated_gradients_attribution:
            all_attributions['integrated_gradient'] = (
                compute_path_integrated_gradient_attribution(
                    obs_values, indicator, diff_delta_time, delta_time,
                    sequence_length, seq_mask, hparams))
          if hparams.use_rnn_attention:
            all_attributions['rnn_attention'] = weights
          if hparams.include_diff_sequence_prediction_attribution:
            all_attributions['diff_sequence'] = (
                compute_prediction_diff_attribution(logits))

          all_attribution_dict = {}
          for attribution_name, attribution in all_attributions.items():
            attribution_dict = convert_attribution(
                attribution,
                sequence_feature_map,
                seq_mask,
                delta_time,
                hparams.attribution_threshold,
                hparams.attribution_max_delta_time,
                prefix=attribution_name + '-')
            all_attribution_dict.update(attribution_dict)
          if hparams.include_sequence_prediction:
            # Add the predictions at each time step to the attention dictionary.
            attribution_indices = tf.where(seq_mask > 0.5)
            all_attribution_dict['predictions'] = tf.sparse.expand_dims(
                tf.SparseTensor(
                    indices=attribution_indices,
                    values=tf.gather_nd(
                        tf.sigmoid(logits), attribution_indices),
                    dense_shape=tf.to_int64(tf.shape(delta_time))),
                axis=1)
        # At test/inference time we only make a single prediction even if we did
        # sequence_prediction during training.
        logits = last_logits
        seq_mask = None

      probabilities = tf.sigmoid(logits)
      classes = probabilities > 0.5
      predictions = {
          PredictionKeys.LOGITS: logits,
          PredictionKeys.PROBABILITIES: probabilities,
          PredictionKeys.CLASSES: classes
      }
      # Calculate the loss for TRAIN and EVAL, but not PREDICT.
      if mode == tf.estimator.ModeKeys.PREDICT:
        loss = None
      else:
        loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels[hparams.label_key],
            logits=predictions[PredictionKeys.LOGITS])
        if hparams.sequence_prediction:
          loss *= seq_mask
        loss = tf.reduce_mean(loss)
        regularization_losses = tf.losses.get_regularization_losses()
        if regularization_losses:
          tf.summary.scalar('loss/prior_regularization', loss)
          regularization_loss = tf.add_n(regularization_losses)
          tf.summary.scalar('loss/regularization_loss', regularization_loss)
          loss += regularization_loss
        tf.summary.scalar('loss', loss)

      train_op = None
      if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(
            learning_rate=hparams.learning_rate, beta1=0.9, beta2=0.999,
            epsilon=1e-8)
        optimizer = tf.contrib.estimator.clip_gradients_by_norm(
            optimizer, 6.0)
        train_op = tf.contrib.training.create_train_op(
            total_loss=loss, optimizer=optimizer,
            summarize_gradients=False)
      if mode != tf.estimator.ModeKeys.TRAIN:
        for k, v in all_attribution_dict.items():
          if not isinstance(v, tf.SparseTensor):
            raise ValueError('Expect attributions to be in SparseTensor, '
                             'getting %s for feature %s' %
                             (v.__class__.__name__, k))
          predictions['attention_attribution,%s,indices' % k] = v.indices
          predictions['attention_attribution,%s,values' % k] = v.values
          predictions['attention_attribution,%s,shape' % k] = v.dense_shape

      eval_metric_ops = {}
      if mode == tf.estimator.ModeKeys.EVAL:
        auc = tf.metrics.auc
        prob_k = PredictionKeys.PROBABILITIES
        class_k = PredictionKeys.CLASSES
        m = 'careful_interpolation'
        metric_fn_dict = {
            'auc-roc':
                lambda l, p: auc(l, p[prob_k], curve='ROC', summation_method=m),
            'auc-pr':
                lambda l, p: auc(l, p[prob_k], curve='PR', summation_method=m),
            'accuracy':
                lambda l, p: tf.metrics.accuracy(l, p[class_k]),
        }
        for (k, f) in metric_fn_dict.items():
          eval_metric_ops[k] = f(label_tensor, predictions)
      # Define the output for serving.
      export_outputs = {}
      if mode == tf.estimator.ModeKeys.PREDICT:
        export_outputs = {
            'mortality': tf.estimator.export.PredictOutput(predictions)
        }

      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=predictions,
          loss=loss,
          train_op=train_op,
          eval_metric_ops=eval_metric_ops,
          export_outputs=export_outputs)
예제 #11
0
def compute_path_integrated_gradient_attribution(
    obs_values,
    indicator,
    diff_delta_time,
    delta_time,
    sequence_length,
    seq_mask,
    hparams,
    construct_logits_fn=None):
  """Constructs the attribution of what inputs result in a higher prediction.

  Attribution here refers to the integrated gradients as defined here
  https://arxiv.org/pdf/1703.01365.pdf and approximated for the j-th variable
  via

  (x-x') * 1/num_steps * sum_{i=1}^{num_steps} of the derivative of
  F(x'+(x-x')*i/num_steps) w.r.t. its j-th input.

  where we take x' the most recent value before attribution_max_delta_time and
  x to be the subsequent observation values from the same lab test.
  x'+(x-x')*i/num_steps is the linear interpolation between x' and x.

  Args:
    obs_values: A dense representation of the observation_values with
      obs_values[b, t, :] has at most one non-zero value at the position
      of the corresponding lab test from obs_code_ids with the value of the lab
      result. A padded Tensor of shape [batch_size, max_sequence_length,
      vocab_size] of type float32 of possibly normalized observation values.
    indicator: A one-hot encoding of whether a value in obs_values comes from
      observation_values or is just filled in to be 0. A Tensor of
      shape [batch_size, max_sequence_length, vocab_size] and type float32.
    diff_delta_time: Difference between two consecutive time steps.
    delta_time: A Tensor of shape [batch_size, max_sequence_length] describing
      the time to prediction.
    sequence_length: Sequence length (before padding), Tensor of shape
      [batch_size].
    seq_mask: A Tensor of shape [batch_size, max_sequence_length, 1]
      indicating which timesteps are padded.
    hparams: Hyper parameters.
    construct_logits_fn: A method with constructing the logits given input as
      construct_logits. If None using construct_logits.
  Returns:
    A Tensor of shape [batch, max_sequence_length, 1] of the gradient of the
    prediction as a function of the lab result at that batch-entry time.
  """
  last_obs_values_0 = _most_recent_obs_value(obs_values, indicator, delta_time,
                                             hparams.attribution_max_delta_time)
  gradients = []
  # We need to limit the diff over the base to timesteps after base.
  last_obs_values = last_obs_values_0 * (
      tf.to_float(indicator) *
      tf.to_float(delta_time < hparams.attribution_max_delta_time))
  obs_values_with_last_replaced = obs_values * tf.to_float(
      delta_time >= hparams.attribution_max_delta_time) + last_obs_values
  diff_over_base = obs_values - obs_values_with_last_replaced

  for i in range(hparams.path_integrated_gradients_num_steps):
    alpha = 1.0 * i / (hparams.path_integrated_gradients_num_steps - 1)
    step_obs_values = obs_values_with_last_replaced + diff_over_base * alpha
    if not construct_logits_fn:
      construct_logits_fn = construct_logits
    logits, _ = construct_logits_fn(
        diff_delta_time,
        step_obs_values,
        indicator,
        sequence_length,
        seq_mask,
        hparams,
        reuse=True)
    if hparams.use_rnn_attention:
      last_logits = logits
    else:
      last_logits = rnn_common.select_last_activations(
          logits, tf.to_int32(sequence_length))
    # Ideally, we'd like to get the gradients of the change in
    # value over the previous one to attribute it to both and not just a single
    # value.
    gradient = compute_gradient_attribution(last_logits, step_obs_values,
                                            indicator)
    gradients.append(
        tf.reduce_sum(diff_over_base, axis=2, keepdims=True) * gradient)
  return tf.add_n(gradients) / tf.to_float(
      hparams.path_integrated_gradients_num_steps)
예제 #12
0
    def _model_fn(features, labels, mode):

        color_name = features[COLOR_NAME_KEY]
        # int64 -> int32
        sequence_length = tf.cast(features[SEQUENCE_LENGTH_KEY],
                                  dtype=tf.int32)

        # ----------- Preparing input --------------------
        # Creating a tf constant to hold the map char -> index
        mapping = tf.constant(CHARACTERS, name='mapping')
        table = tf.contrib.lookup.index_table_from_tensor(mapping,
                                                          dtype=tf.string)
        int_color_name = table.lookup(color_name)

        # representing colornames with one hot representation
        color_name_onehot = tf.one_hot(int_color_name,
                                       depth=len(CHARACTERS) + 1)

        # ---------- RNN -------------------
        # Each RNN layer will consist of a LSTM cell
        rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in rnn_cell_sizes]

        # Construct the layers
        multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)

        # Runs the RNN model dynamically
        # more about it at:
        # https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn
        outputs, final_state = tf.nn.dynamic_rnn(
            cell=multi_rnn_cell,
            inputs=color_name_onehot,
            sequence_length=sequence_length,
            dtype=tf.float32)

        # Slice to keep only the last cell of the RNN
        last_activations = rnn_common.select_last_activations(
            outputs, sequence_length)

        # ------------ Dense layers -------------------
        # Construct dense layers on top of the last cell of the RNN
        for units in dnn_layer_sizes:
            last_activations = tf.layers.dense(last_activations,
                                               units,
                                               activation=tf.nn.relu)

        # Final dense layer for prediction
        predictions = tf.layers.dense(last_activations, label_dimension)

        # ----------- Loss and Optimizer ----------------
        loss = None
        train_op = None

        if mode != tf.estimator.ModeKeys.PREDICT:
            loss = tf.losses.mean_squared_error(labels, predictions)

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.contrib.layers.optimize_loss(
                loss,
                tf.contrib.framework.get_global_step(),
                optimizer=optimizer,
                learning_rate=learning_rate)

        return model_fn_lib.EstimatorSpec(mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op)
예제 #13
0
  def model_fn(features, labels, mode):

    x = features['x']
    sequence_length = tf.cast(features[rnn_common.RNNKeys.SEQUENCE_LENGTH_KEY],
                              tf.int32)

    # creating embedding for the reviews
    embedding = tf.contrib.layers.embed_sequence(x,
                                                 vocab_size=num_words,
                                                 embed_dim=embed_dim)

    # Each RNN layer will consist of a LSTM cell
    if len(dropout_keep_probabilities) == len(rnn_cell_sizes):

      if mode != tf.estimator.ModeKeys.TRAIN:
        rnn_layers = [
            rnn.DropoutWrapper(rnn.LSTMCell(size),
                               input_keep_prob=1,
                               output_keep_prob=1,
                               state_keep_prob=1)
            for size, keep_prob in rnn_cell_sizes]
      else:
        rnn_layers = [
            rnn.DropoutWrapper(rnn.LSTMCell(size),
                               input_keep_prob=keep_prob,
                               output_keep_prob=keep_prob,
                               state_keep_prob=keep_prob)
            for size, keep_prob in zip(rnn_cell_sizes,
                                       dropout_keep_probabilities)]
    else:
      rnn_layers = [rnn.LSTMCell(size) for size in rnn_cell_sizes]

    # Construct the layers
    multi_rnn_cell = rnn.MultiRNNCell(rnn_layers)

    # Runs the RNN model dynamically
    # more about it at:
    # https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn
    outputs, final_state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
                                             inputs=embedding,
                                             sequence_length=sequence_length,
                                             dtype=tf.float32)

    # Slice to keep only the last cell of the RNN
    last_activations = rnn_common.select_last_activations(outputs,
                                                          sequence_length)

    # Construct dense layers on top of the last cell of the RNN
    for units in dnn_layer_sizes:
      last_activations = tf.layers.dense(last_activations,
                                         units,
                                         activation=tf.nn.relu)

    # Final dense layer for prediction
    predictions = tf.layers.dense(last_activations, label_dimension)
    predictions_softmax = tf.nn.softmax(predictions)

    loss = None
    train_op = None
    eval_op = None

    if mode != tf.estimator.ModeKeys.PREDICT:
      labels_onehot = tf.one_hot(labels, 2)

      eval_op = {
          'accuracy': tf.metrics.accuracy(
              tf.argmax(input=predictions_softmax, axis=1),
              tf.argmax(input=labels_onehot, axis=1))
      }

      loss = tf.losses.softmax_cross_entropy(labels_onehot, predictions)

    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = tf.contrib.layers.optimize_loss(
          loss,
          tf.contrib.framework.get_global_step(),
          optimizer=optimizer,
          learning_rate=learning_rate)

    return tf.estimator.EstimatorSpec(mode,
                                      predictions=predictions_softmax,
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_op)
예제 #14
0
    def makeqnetwork(self,
                     input_size,
                     rnnshape,
                     ffnshape,
                     num_actions,
                     training_input=None,
                     training_sequence_lengths=None,
                     inference_input=None,
                     inference_hidden_state=None,
                     scope_name="RNN"):
        """
        Construct graph.
        :return: input placeholder, output layer, list of variables
        """
        # Build brain model
        with tf.name_scope(scope_name + '/') as ns:
            rnn_layers = [tf.nn.rnn_cell.GRUCell(units) for units in rnnshape]
            multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)

            if training_input is not None and training_sequence_lengths is not None:
                state_in = training_input
                sequence_lengths = training_sequence_lengths
            else:
                state_in = tf.placeholder(shape=[None, None, input_size],
                                          dtype=tf.float32)
                sequence_lengths = tf.placeholder(shape=[None], dtype=tf.int32)

            outputs, hidden = tf.nn.dynamic_rnn(
                cell=multi_rnn_cell,
                inputs=state_in,
                sequence_length=sequence_lengths,
                dtype=tf.float32)
            layer = rnn_common.select_last_activations(outputs,
                                                       sequence_lengths)

            if inference_input is not None and inference_hidden_state is not None:
                inference_in = inference_input
                inference_state = inference_hidden_state
            else:
                inference_in = tf.placeholder(shape=[None, input_size],
                                              dtype=tf.float32)
                inference_state = multi_rnn_cell.zero_state(
                    tf.shape(inference_in)[0], dtype=tf.float32)

            inference_output, inference_hidden = multi_rnn_cell(
                inference_in, inference_state)
            inference_layer = inference_output

            for i, units in enumerate(ffnshape):
                layer = tf.layers.dense(layer,
                                        units,
                                        activation=tf.nn.relu,
                                        reuse=None,
                                        name="ffn_{}".format(i))
            for i, units in enumerate(ffnshape):
                inference_layer = tf.layers.dense(inference_layer,
                                                  units,
                                                  activation=tf.nn.relu,
                                                  reuse=True,
                                                  name="ffn_{}".format(i))

            # Make output layer without relu
            layer = tf.layers.dense(layer,
                                    num_actions,
                                    activation=None,
                                    reuse=None,
                                    name="ffn_last")
            inference_layer = tf.layers.dense(inference_layer,
                                              num_actions,
                                              activation=None,
                                              reuse=True,
                                              name="ffn_last")

            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                          scope=ns)

        return (state_in, sequence_lengths,
                layer), (inference_in, inference_state, inference_layer,
                         inference_hidden), variables