def connector_capsule_mat(input_tensor,
                          position_grid,
                          input_activation,
                          input_dim,
                          output_dim,
                          layer_name,
                          num_routing=3,
                          num_in_atoms=3,
                          num_out_atoms=3,
                          leaky=False,
                          final_beta=1.0,
                          min_var=0.0005):
    """Final Capsule Layer with Pose Matrices and Shared connections."""
    # One weight tensor for each capsule of the layer bellow: w: [8*128, 8*10]
    with tf.variable_scope(layer_name):
        # This Variable will hold the state of the weights for the layer
        with tf.name_scope('input_center_connector'):
            utils.activation_summary(input_tensor)
        weights = utils.weight_variable(
            [input_dim, num_out_atoms, output_dim * num_out_atoms],
            stddev=0.01)
        # weights = tf.clip_by_norm(weights, 1.0, axes=[1])
        activation_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1],
                                                init_value=1.0,
                                                name='activation_biases')
        sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1],
                                           init_value=2.0,
                                           name='sigma_biases')

        with tf.name_scope('Wx_plus_b'):
            # input_tensor: [x, 128, 8, h, w]
            input_shape = tf.shape(input_tensor)
            input_trans = tf.transpose(input_tensor, [1, 0, 3, 4, 2])
            input_share = tf.reshape(input_trans,
                                     [input_dim, -1, num_in_atoms])
            # input_expanded: [x, 128, 8, 1]
            wx_share = tf.matmul(input_share, weights)
            # sqr_num_out_atoms = num_out_atoms
            num_out_atoms *= num_out_atoms
            wx_trans = tf.reshape(wx_share, [
                input_dim, input_shape[0], input_shape[3], input_shape[4],
                num_out_atoms, output_dim
            ])
            wx_trans.set_shape(
                (input_dim, None, input_tensor.get_shape()[3],
                 input_tensor.get_shape()[4], num_out_atoms, output_dim))
            h, w, _ = position_grid.get_shape()
            height = h
            width = w
            # t_pose = tf.transpose(position_grid, [2, 0, 1])
            # t_pose_exp = tf.scatter_nd([[sqr_num_out_atoms -1],
            #   [2 * sqr_num_out_atoms - 1]], t_pose, [num_out_atoms, height, width])
            # pose_g_exp = tf.transpose(t_pose_exp, [1, 2, 0])
            zero_grid = tf.zeros([height, width, num_out_atoms - 2])
            pose_g_exp = tf.concat([position_grid, zero_grid], axis=2)
            pose_g = tf.expand_dims(
                tf.expand_dims(tf.expand_dims(pose_g_exp, -1), 0), 0)
            wx_posed = wx_trans + pose_g
            wx_posed_t = tf.transpose(wx_posed, [1, 0, 2, 3, 5, 4])

            # Wx_reshaped: [x, 128, 10, 8]
            wx = tf.reshape(wx_posed_t, [
                -1, input_dim * height * width, output_dim, num_out_atoms, 1, 1
            ])
        with tf.name_scope('routing'):
            # Routing
            # logits: [x, 128, 10]
            logit_shape = [input_dim * height * width, output_dim, 1, 1, 1]
            for _ in range(4):
                input_activation = tf.expand_dims(input_activation, axis=-1)
            activation, center = update_em_routing(
                wx=wx,
                input_activation=input_activation,
                activation_biases=activation_biases,
                sigma_biases=sigma_biases,
                logit_shape=logit_shape,
                num_out_atoms=num_out_atoms,
                num_routing=num_routing,
                output_dim=output_dim,
                leaky=leaky,
                final_beta=final_beta / 4,
                min_var=min_var,
            )
        out_activation = tf.squeeze(activation, axis=[1, 3, 4, 5])
        out_center = tf.squeeze(center, axis=[1, 4, 5])
        return tf.sigmoid(out_activation), out_center
import numpy as np
tf.disable_v2_behavior()

xy = np.loadtxt('data/data-03-diabetes.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 8])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([8, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# sigmoid 함수를 이용한 가설, 0~1 사이의 실수가 나온다.
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

#  log 함수가 적용된 cost 함수
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# hypothesis가 0.5 보다 크면 True, 아니면 False인데, 이를 float32로 cast하면 1.0 혹은 0.0이 나온다.
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)

# 예측값과 Y값이 같은지 비교하고, 이 횟수를 통해 정확도(accuracy)를 구한다
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
def conv_capsule_mat_fast(
    input_tensor,
    input_activation,
    input_dim,
    output_dim,
    layer_name,
    num_routing=3,
    num_in_atoms=3,
    num_out_atoms=3,
    stride=2,
    kernel_size=5,
    min_var=0.0005,
    final_beta=1.0,
):
    """Convolutional Capsule layer with fast EM routing.

  Args:
    input_tensor: The input capsule features.
    input_activation: The input capsule activations.
    input_dim: Number of input capsule types.
    output_dim: Number of output capsule types.
    layer_name: Name of this layer, e.g. conv_capsule1
    num_routing: Number of routing iterations.
    num_in_atoms: Number of features in each of the input capsules.
    num_out_atoms: Number of features in each of the output capsules.
    stride: Stride of the convolution.
    kernel_size: kernel size of the convolution.
    min_var: Minimum varience for each capsule to avoid NaNs.
    final_beta: beta for making the routing factors sharp.

  Returns:
    The final capsule center and activations.
  """
    tf.logging.info('conv_capsule_mat %s', layer_name)
    tf.logging.info('input_shape %s', input_tensor.shape.as_list())
    in_atom_sq = num_in_atoms * num_in_atoms
    with tf.variable_scope(layer_name):
        # This should be fully defined...
        # input_shape = tf.shape(input_tensor)
        input_shape = input_tensor.shape.as_list()
        batch, _, _, in_height, in_width = input_shape
        o_height = (in_height - kernel_size) // stride + 1
        o_width = (in_width - kernel_size) // stride + 1

        # This Variable will hold the state of the weights for the layer.
        kernel = utils.weight_variable(shape=[
            input_dim, kernel_size, kernel_size, num_in_atoms,
            output_dim * num_out_atoms
        ],
                                       stddev=0.1)
        activation_biases = utils.bias_variable(
            [1, 1, output_dim, 1, 1, 1, 1, 1],
            init_value=0.2,
            name='activation_biases')
        sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1],
                                           init_value=.5,
                                           name='sigma_biases')

        with utils.maybe_jit_scope(), tf.name_scope('conv'):
            input_tensor_reshaped = tf.reshape(
                input_tensor,
                [batch * input_dim * in_atom_sq, in_height, in_width, 1])
            input_act_reshaped = tf.reshape(
                input_activation, [batch * input_dim, in_height, in_width, 1])

            conv_patches = utils.kernel_tile(input_tensor_reshaped,
                                             kernel_size, stride)
            act_patches = utils.kernel_tile(input_act_reshaped, kernel_size,
                                            stride)

            patches = tf.reshape(conv_patches,
                                 (batch, input_dim, in_atom_sq, o_height,
                                  o_width, kernel_size, kernel_size))
            patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2])
            patch_split = tf.reshape(
                patch_trans,
                (input_dim, kernel_size, kernel_size,
                 batch * o_height * o_width * num_in_atoms, num_in_atoms),
                name='patch_split')
            a_patches = tf.reshape(act_patches,
                                   (batch, input_dim, 1, 1, o_height, o_width,
                                    kernel_size, kernel_size),
                                   name='a_patches')

        # Recompute Wx on backprop to save memory (perhaps redo patches as well?)
        # @tf.contrib.layers.recompute_grad
        def compute_wx(patch_split, kernel, is_recomputing=False):
            tf.logging.info('compute_wx(is_recomputing=%s)', is_recomputing)
            with utils.maybe_jit_scope(), tf.name_scope('wx'):
                wx = tf.matmul(patch_split, kernel)
                wx = tf.reshape(
                    wx, (input_dim, kernel_size, kernel_size, batch, o_height,
                         o_width, num_in_atoms * num_out_atoms, output_dim))
                wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2])
            return wx

        wx = compute_wx(patch_split, kernel.value())

        with utils.maybe_jit_scope():
            # Routing
            logit_shape = [
                input_dim, output_dim, 1, o_height, o_width, kernel_size,
                kernel_size
            ]
            tf.logging.info('logit_shape: %s', logit_shape)
            activation, center = update_conv_routing_fast(
                wx=wx,
                input_activation=a_patches,
                activation_biases=activation_biases,
                sigma_biases=sigma_biases,
                logit_shape=logit_shape,
                num_out_atoms=num_out_atoms * num_out_atoms,
                input_dim=input_dim,
                num_routing=num_routing,
                output_dim=output_dim,
                min_var=min_var,
                final_beta=4 * final_beta,
                stride=stride,
                layer_name=layer_name,
            )

        with utils.maybe_jit_scope():
            out_activation = tf.squeeze(activation,
                                        axis=[1, 3, 6, 7],
                                        name='out_activation')
            out_center = tf.squeeze(center, axis=[1, 6, 7], name='out_center')
            out_activation = tf.sigmoid(out_activation)

        with tf.name_scope('center'):
            utils.activation_summary(out_center)

        return out_activation, out_center
    def model_fn(features, labels, mode):
      """Creates the prediction, loss, and train ops.

      Args:
        features: A dictionary of tensors keyed by the feature name.
        labels: A dictionary of label tensors keyed by the label key.
        mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.

      Returns:
        EstimatorSpec with the mode, prediction, loss, train_op and
        output_alternatives a dictionary specifying the output for a
        servo request during serving.
      """
      # 1. Construct input to RNN
      sequence_feature_map = {
          k: features[input_fn.SEQUENCE_KEY_PREFIX + k]
          for k in hparams.sequence_features
      }
      sequence_length = tf.squeeze(
          features[input_fn.CONTEXT_KEY_PREFIX + 'sequenceLength'],
          axis=1,
          name='sq_seq_len')
      tf.summary.scalar('sequence_length', tf.reduce_mean(sequence_length))
      diff_delta_time, obs_values, indicator = construct_input(
          sequence_feature_map, hparams.categorical_values,
          hparams.categorical_seq_feature, hparams.feature_value, mode,
          hparams.normalize, hparams.momentum, hparams.min_value,
          hparams.max_value, hparams.input_keep_prob)

      seq_mask = tf.expand_dims(
          tf.sequence_mask(sequence_length, dtype=tf.float32), axis=2)
      logits, weights = construct_logits(
          diff_delta_time,
          obs_values,
          indicator,
          sequence_length,
          seq_mask,
          hparams,
          reuse=False)

      all_attribution_dict = {}
      if mode == tf.estimator.ModeKeys.TRAIN:
        if hparams.sequence_prediction:
          assert not hparams.use_rnn_attention
          # If we train a sequence_prediction we repeat the labels over time.
          label_tensor = labels[hparams.label_key]
          labels[hparams.label_key] = tf.tile(
              tf.expand_dims(label_tensor, 2),
              multiples=[1, tf.shape(logits)[1], 1])
          if hparams.volatility_loss_factor > 0.0:
            volatility = tf.reduce_sum(
                tf.square(seq_mask *
                          compute_prediction_diff_attribution(logits)))
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                                 volatility * hparams.volatility_loss_factor)
        elif not hparams.use_rnn_attention:
          logits = rnn_common.select_last_activations(
              logits, tf.to_int32(sequence_length))
      else:
        if hparams.sequence_prediction:
          last_logits = rnn_common.select_last_activations(
              logits, tf.to_int32(sequence_length))
        else:
          last_logits = logits
        if mode == tf.estimator.ModeKeys.PREDICT:
          delta_time = sequence_feature_map['deltaTime']
          all_attributions = {}
          if hparams.include_gradients_attribution:
            all_attributions['gradient_last'] = compute_gradient_attribution(
                last_logits, obs_values, indicator)
          if hparams.include_gradients_sum_time_attribution:
            assert not hparams.use_rnn_attention
            all_attributions['gradient_sum'] = compute_gradient_attribution(
                _predictions_for_gradients(
                    logits, seq_mask, delta_time,
                    hparams.attribution_max_delta_time, averaged=False),
                obs_values, indicator)
          if hparams.include_gradients_avg_time_attribution:
            assert not hparams.use_rnn_attention
            all_attributions['gradient_avg'] = compute_gradient_attribution(
                _predictions_for_gradients(
                    logits, seq_mask, delta_time,
                    hparams.attribution_max_delta_time, averaged=True),
                obs_values, indicator)
          if hparams.include_path_integrated_gradients_attribution:
            all_attributions['integrated_gradient'] = (
                compute_path_integrated_gradient_attribution(
                    obs_values, indicator, diff_delta_time, delta_time,
                    sequence_length, seq_mask, hparams))
          if hparams.use_rnn_attention:
            all_attributions['rnn_attention'] = weights
          if hparams.include_diff_sequence_prediction_attribution:
            all_attributions['diff_sequence'] = (
                compute_prediction_diff_attribution(logits))

          all_attribution_dict = {}
          for attribution_name, attribution in all_attributions.items():
            attribution_dict = convert_attribution(
                attribution,
                sequence_feature_map,
                seq_mask,
                delta_time,
                hparams.attribution_threshold,
                hparams.attribution_max_delta_time,
                prefix=attribution_name + '-')
            all_attribution_dict.update(attribution_dict)
          if hparams.include_sequence_prediction:
            # Add the predictions at each time step to the attention dictionary.
            attribution_indices = tf.where(seq_mask > 0.5)
            all_attribution_dict['predictions'] = tf.sparse.expand_dims(
                tf.SparseTensor(
                    indices=attribution_indices,
                    values=tf.gather_nd(
                        tf.sigmoid(logits), attribution_indices),
                    dense_shape=tf.to_int64(tf.shape(delta_time))),
                axis=1)
        # At test/inference time we only make a single prediction even if we did
        # sequence_prediction during training.
        logits = last_logits
        seq_mask = None

      probabilities = tf.sigmoid(logits)
      classes = probabilities > 0.5
      predictions = {
          PredictionKeys.LOGITS: logits,
          PredictionKeys.PROBABILITIES: probabilities,
          PredictionKeys.CLASSES: classes
      }
      # Calculate the loss for TRAIN and EVAL, but not PREDICT.
      if mode == tf.estimator.ModeKeys.PREDICT:
        loss = None
      else:
        loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels[hparams.label_key],
            logits=predictions[PredictionKeys.LOGITS])
        if hparams.sequence_prediction:
          loss *= seq_mask
        loss = tf.reduce_mean(loss)
        regularization_losses = tf.losses.get_regularization_losses()
        if regularization_losses:
          tf.summary.scalar('loss/prior_regularization', loss)
          regularization_loss = tf.add_n(regularization_losses)
          tf.summary.scalar('loss/regularization_loss', regularization_loss)
          loss += regularization_loss
        tf.summary.scalar('loss', loss)

      train_op = None
      if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(
            learning_rate=hparams.learning_rate, beta1=0.9, beta2=0.999,
            epsilon=1e-8)
        optimizer = contrib_estimator.clip_gradients_by_norm(optimizer, 6.0)
        train_op = contrib_training.create_train_op(
            total_loss=loss, optimizer=optimizer, summarize_gradients=False)
      if mode != tf.estimator.ModeKeys.TRAIN:
        for k, v in all_attribution_dict.items():
          if not isinstance(v, tf.SparseTensor):
            raise ValueError('Expect attributions to be in SparseTensor, '
                             'getting %s for feature %s' %
                             (v.__class__.__name__, k))
          predictions['attention_attribution,%s,indices' % k] = v.indices
          predictions['attention_attribution,%s,values' % k] = v.values
          predictions['attention_attribution,%s,shape' % k] = v.dense_shape

      eval_metric_ops = {}
      if mode == tf.estimator.ModeKeys.EVAL:
        auc = tf.metrics.auc
        prob_k = PredictionKeys.PROBABILITIES
        class_k = PredictionKeys.CLASSES
        m = 'careful_interpolation'
        metric_fn_dict = {
            'auc-roc':
                lambda l, p: auc(l, p[prob_k], curve='ROC', summation_method=m),
            'auc-pr':
                lambda l, p: auc(l, p[prob_k], curve='PR', summation_method=m),
            'accuracy':
                lambda l, p: tf.metrics.accuracy(l, p[class_k]),
        }
        for (k, f) in metric_fn_dict.items():
          eval_metric_ops[k] = f(label_tensor, predictions)
      # Define the output for serving.
      export_outputs = {}
      if mode == tf.estimator.ModeKeys.PREDICT:
        export_outputs = {
            'mortality': tf.estimator.export.PredictOutput(predictions)
        }

      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=predictions,
          loss=loss,
          train_op=train_op,
          eval_metric_ops=eval_metric_ops,
          export_outputs=export_outputs)
Exemple #5
0
def get_estimator_spec(hparams,
                       mode,
                       features,
                       labels,
                       frame_logits,
                       onset_logits,
                       offset_logits,
                       velocity_values,
                       offset_network=True):
    """Create TPUEstimatorSpec."""
    loss_metrics = {}
    loss = None
    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
        onset_losses = tf.losses.sigmoid_cross_entropy(
            labels.onsets[:, :, :constants.MIDI_PITCHES],
            onset_logits[:, :, :constants.MIDI_PITCHES],
            weights=tf.expand_dims(tf.sequence_mask(features.length,
                                                    maxlen=tf.shape(
                                                        labels.onsets)[1]),
                                   axis=2))
        loss_metrics['onset'] = onset_losses

        if offset_network and not hparams.drums_only:
            offset_losses = tf.losses.sigmoid_cross_entropy(
                labels.offsets[:, :, :constants.MIDI_PITCHES],
                offset_logits[:, :, :constants.MIDI_PITCHES],
                weights=tf.expand_dims(tf.sequence_mask(
                    features.length, maxlen=tf.shape(labels.offsets)[1]),
                                       axis=2))
            loss_metrics['offset'] = offset_losses

        velocity_losses = tf.losses.mean_squared_error(
            labels.velocities,
            velocity_values,
            weights=labels.onsets * hparams.velocity_loss_weight)
        loss_metrics['velocity'] = velocity_losses

        if not hparams.drums_only:
            frame_losses = tf.losses.sigmoid_cross_entropy(
                labels.labels[:, :, :constants.MIDI_PITCHES],
                frame_logits[:, :, :constants.MIDI_PITCHES],
                weights=tf.expand_dims(tf.sequence_mask(features.length,
                                                        maxlen=tf.shape(
                                                            labels.labels)[1]),
                                       axis=2))
            loss_metrics['frame'] = frame_losses

        loss = tf.losses.get_total_loss()

    if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
        frame_probs = tf.sigmoid(frame_logits)
        onset_probs = tf.sigmoid(onset_logits)
        if offset_network:
            offset_probs = tf.sigmoid(offset_logits)
        else:
            offset_probs = tf.zeros_like(onset_probs)
        frame_predictions = frame_probs > hparams.predict_frame_threshold
        onset_predictions = onset_probs > hparams.predict_onset_threshold
        offset_predictions = offset_probs > hparams.predict_offset_threshold

        if hparams.drum_prediction_map:
            map_predictions = functools.partial(
                drum_mappings.map_pianoroll,
                mapping_name=hparams.drum_prediction_map,
                reduce_mode='any',
                min_pitch=constants.MIN_MIDI_PITCH)
            frame_predictions = tf.map_fn(map_predictions, frame_predictions)
            onset_predictions = tf.map_fn(map_predictions, onset_predictions)
            offset_predictions = tf.map_fn(map_predictions, offset_predictions)
            map_values = functools.partial(
                drum_mappings.map_pianoroll,
                mapping_name=hparams.drum_prediction_map,
                reduce_mode='max',
                min_pitch=constants.MIN_MIDI_PITCH)
            velocity_values = tf.map_fn(map_values, velocity_values)

        metrics_values = get_metrics(features, labels, frame_probs,
                                     onset_probs, frame_predictions,
                                     onset_predictions, offset_predictions,
                                     velocity_values, hparams)

        for label, loss_collection in loss_metrics.items():
            loss_label = 'losses/' + label
            metrics_values[loss_label] = loss_collection

    if mode == tf.estimator.ModeKeys.TRAIN:
        train_op = tf_slim.optimize_loss(
            name='training',
            loss=loss,
            global_step=tf.train.get_or_create_global_step(),
            learning_rate=hparams.learning_rate,
            learning_rate_decay_fn=functools.partial(
                tf.train.exponential_decay,
                decay_steps=hparams.decay_steps,
                decay_rate=hparams.decay_rate,
                staircase=True),
            clip_gradients=hparams.clip_norm,
            summaries=[],
            optimizer=lambda lr: tf.tpu.CrossShardOptimizer(
                tf.train.AdamOptimizer(lr)))

        return tf.tpu.estimator.TPUEstimatorSpec(mode=mode,
                                                 loss=loss,
                                                 train_op=train_op)
    elif mode == tf.estimator.ModeKeys.EVAL:
        metric_ops = {k: tf.metrics.mean(v) for k, v in metrics_values.items()}
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=metric_ops)
    elif mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'frame_probs':
            frame_probs,
            'onset_probs':
            onset_probs,
            'frame_predictions':
            frame_predictions,
            'onset_predictions':
            onset_predictions,
            'offset_predictions':
            offset_predictions,
            'velocity_values':
            velocity_values,
            'sequence_predictions':
            _predict_sequences(frame_probs=frame_probs,
                               onset_probs=onset_probs,
                               frame_predictions=frame_predictions,
                               onset_predictions=onset_predictions,
                               offset_predictions=offset_predictions,
                               velocity_values=velocity_values,
                               hparams=hparams),
            # Include some features and labels in output because Estimator 'predict'
            # API does not give access to them.
            'sequence_ids':
            features.sequence_id,
            'sequence_labels':
            labels.note_sequence,
            'frame_labels':
            labels.labels,
            'onset_labels':
            labels.onsets,
        }
        for k, v in metrics_values.items():
            predictions[k] = tf.stack(v)

        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    else:
        raise ValueError('Unsupported mode: %s' % mode)
Exemple #6
0
    def __call__(self, x, state, timestep=0, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            total_h, total_c = tf.split(state, 2, 1)
            h = total_h[:, 0:self.num_units]
            c = total_c[:, 0:self.num_units]
            self.hyper_state = tf.concat(
                [total_h[:, self.num_units:], total_c[:, self.num_units:]], 1)

            batch_size = x.get_shape().as_list()[0]
            x_size = x.get_shape().as_list()[1]
            self._input_size = x_size

            w_init = None  # uniform

            h_init = lstm_ortho_initializer(1.0)

            w_xh = tf.get_variable(
                'W_xh', [x_size, 4 * self.num_units], initializer=w_init)
            w_hh = tf.get_variable(
                'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init)
            bias = tf.get_variable(
                'bias', [4 * self.num_units],
                initializer=tf.constant_initializer(0.0))

            # concatenate the input and hidden states for hyperlstm input
            hyper_input = tf.concat([x, h], 1)
            hyper_output, hyper_new_state = self.hyper_cell(hyper_input,
                                                            self.hyper_state)
            self.hyper_output = hyper_output
            self.hyper_state = hyper_new_state

            xh = tf.matmul(x, w_xh)
            hh = tf.matmul(h, w_hh)

            # split Wxh contributions
            ix, jx, fx, ox = tf.split(xh, 4, 1)
            ix = self.hyper_norm(ix, 'hyper_ix', use_bias=False)
            jx = self.hyper_norm(jx, 'hyper_jx', use_bias=False)
            fx = self.hyper_norm(fx, 'hyper_fx', use_bias=False)
            ox = self.hyper_norm(ox, 'hyper_ox', use_bias=False)

            # split Whh contributions
            ih, jh, fh, oh = tf.split(hh, 4, 1)
            ih = self.hyper_norm(ih, 'hyper_ih', use_bias=True)
            jh = self.hyper_norm(jh, 'hyper_jh', use_bias=True)
            fh = self.hyper_norm(fh, 'hyper_fh', use_bias=True)
            oh = self.hyper_norm(oh, 'hyper_oh', use_bias=True)

            # split bias
            ib, jb, fb, ob = tf.split(bias, 4, 0)  # bias is to be broadcasted.

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i = ix + ih + ib
            j = jx + jh + jb
            f = fx + fh + fb
            o = ox + oh + ob

            if self.use_layer_norm:
                concat = tf.concat([i, j, f, o], 1)
                concat = layer_norm_all(concat, batch_size, 4, self.num_units, 'ln_all')
                i, j, f, o = tf.split(concat, 4, 1)

            if self.use_recurrent_dropout:
                g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob)
            else:
                g = tf.tanh(j)

            new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g
            new_h = tf.tanh(layer_norm(new_c, self.num_units, 'ln_c')) * tf.sigmoid(o)

            hyper_h, hyper_c = tf.split(hyper_new_state, 2, 1)
            new_total_h = tf.concat([new_h, hyper_h], 1)
            new_total_c = tf.concat([new_c, hyper_c], 1)
            new_total_state = tf.concat([new_total_h, new_total_c], 1)
        return new_h, new_total_state
    def build(self,
              inputs,
              is_training,
              rescale_inputs=True,
              include_decoder=True,
              use_reduce_mean_to_pool=False):
        """Build the graph for this configuration.

    Args:
      inputs: A dict of inputs. For training, should contain 'wav'.
      is_training: Whether we are training or not. Not used in this config.
      rescale_inputs: Whether to convert inputs to mu-law and back to unit
        scaling before passing through the model (loses gradients).
      include_decoder: bool, whether to include the decoder in the build().
      use_reduce_mean_to_pool: whether to use reduce_mean (instead of pool1d)
        for pooling.
    Returns:
      A dict of outputs that includes the 'predictions', 'loss', the 'encoding',
      the 'quantized_input', and whatever metrics we want to track for eval.
    """
        num_stages = 10
        num_layers = 30
        filter_length = 3
        width = 512
        skip_width = 256
        ae_num_stages = 10
        ae_num_layers = 30
        ae_filter_length = 3
        ae_width = 128

        # Encode the source with 8-bit Mu-Law.
        x = inputs['wav']
        x_quantized = utils.mu_law(x)
        x_scaled = tf.cast(x_quantized, tf.float32) / 128.0
        x_scaled = tf.expand_dims(x_scaled, 2)
        x = tf.expand_dims(x, 2)

        ###
        # The Non-Causal Temporal Encoder.
        ###
        en = masked.conv1d(x_scaled if rescale_inputs else x,
                           causal=False,
                           num_filters=ae_width,
                           filter_length=ae_filter_length,
                           name='ae_startconv',
                           is_training=is_training)

        for num_layer in range(ae_num_layers):
            dilation = 2**(num_layer % ae_num_stages)
            d = tf.nn.relu(en)
            d = masked.conv1d(d,
                              causal=False,
                              num_filters=ae_width,
                              filter_length=ae_filter_length,
                              dilation=dilation,
                              name='ae_dilatedconv_%d' % (num_layer + 1),
                              is_training=is_training)
            d = tf.nn.relu(d)
            en += masked.conv1d(d,
                                num_filters=ae_width,
                                filter_length=1,
                                name='ae_res_%d' % (num_layer + 1),
                                is_training=is_training)

        en = masked.conv1d(en,
                           num_filters=self.ae_bottleneck_width,
                           filter_length=1,
                           name='ae_bottleneck',
                           is_training=is_training)

        if use_reduce_mean_to_pool:
            # Depending on the accelerator used for training, masked.pool1d may
            # lead to out of memory error.
            # reduce_mean is equivalent to masked.pool1d when the stride is the same
            # as the window length (which is the case here).
            batch_size, unused_length, depth = en.shape.as_list()
            en = tf.reshape(en, [batch_size, -1, self.ae_hop_length, depth])
            en = tf.reduce_mean(en, axis=2)
        else:
            en = masked.pool1d(en,
                               self.ae_hop_length,
                               name='ae_pool',
                               mode='avg')
        encoding = en

        if not include_decoder:
            return {'encoding': encoding}

        ###
        # The WaveNet Decoder.
        ###
        l = masked.shift_right(x_scaled if rescale_inputs else x)
        l = masked.conv1d(l,
                          num_filters=width,
                          filter_length=filter_length,
                          name='startconv',
                          is_training=is_training)

        # Set up skip connections.
        s = masked.conv1d(l,
                          num_filters=skip_width,
                          filter_length=1,
                          name='skip_start',
                          is_training=is_training)

        # Residual blocks with skip connections.
        for i in range(num_layers):
            dilation = 2**(i % num_stages)
            d = masked.conv1d(l,
                              num_filters=2 * width,
                              filter_length=filter_length,
                              dilation=dilation,
                              name='dilatedconv_%d' % (i + 1),
                              is_training=is_training)
            d = self._condition(
                d,
                masked.conv1d(en,
                              num_filters=2 * width,
                              filter_length=1,
                              name='cond_map_%d' % (i + 1),
                              is_training=is_training))

            assert d.get_shape().as_list()[2] % 2 == 0
            m = d.get_shape().as_list()[2] // 2
            d_sigmoid = tf.sigmoid(d[:, :, :m])
            d_tanh = tf.tanh(d[:, :, m:])
            d = d_sigmoid * d_tanh

            l += masked.conv1d(d,
                               num_filters=width,
                               filter_length=1,
                               name='res_%d' % (i + 1),
                               is_training=is_training)
            s += masked.conv1d(d,
                               num_filters=skip_width,
                               filter_length=1,
                               name='skip_%d' % (i + 1),
                               is_training=is_training)

        s = tf.nn.relu(s)
        s = masked.conv1d(s,
                          num_filters=skip_width,
                          filter_length=1,
                          name='out1',
                          is_training=is_training)
        s = self._condition(
            s,
            masked.conv1d(en,
                          num_filters=skip_width,
                          filter_length=1,
                          name='cond_map_out1',
                          is_training=is_training))
        s = tf.nn.relu(s)

        ###
        # Compute the logits and get the loss.
        ###
        logits = masked.conv1d(s,
                               num_filters=256,
                               filter_length=1,
                               name='logits',
                               is_training=is_training)
        logits = tf.reshape(logits, [-1, 256])
        probs = tf.nn.softmax(logits, name='softmax')
        x_indices = tf.cast(tf.reshape(x_quantized, [-1]), tf.int32) + 128
        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=x_indices, name='nll'),
                              0,
                              name='loss')

        return {
            'predictions': probs,
            'loss': loss,
            'eval': {
                'nll': loss
            },
            'quantized_input': x_quantized,
            'encoding': encoding,
        }
Exemple #8
0
    def __call__(self, input_, state, scope=None):
        """Run one step of RHN.

    All tensor arguments are shaped [batch_size, *].

    Args:
      input_: A tensor.
      state: An TiledRHNStateTuple.
      scope: VariableScope for the created subgraph; defaults to
        `TiledRHNCell`.

    Returns:
      A tuple containing:
      - A `2-D, [batch, num_units]`, Tensor representing the output of
        the RHN after one time step (which consists of `depth` number
        of computational steps).
      - An TiledRHNStateTuple tuple of Tensors representing the new state
        of the RHN after one time step.

    Raises:
      ValueError: If input size cannot be inferred from `input_`
      via static shape inference.
    """
        num_units = self._num_units

        def maybe_transform(transform, x):
            if transform is None:
                return x
            else:
                return transform(x)

        # Apply transformations to the input and the recurrent state.
        transformed_input = maybe_transform(self._input_transform, input_)

        # Let's figure out what the outputs are.
        output_name_and_sizes = [
            # This is the proposed update (usually 'j' in an LSTM).
            ('h', num_units),
            # Called 'carry' gate in the paper. This pretty much plays the
            # part of the forget gate of an LSTM.
            ('c', num_units)
        ]
        if not self._tie_gates:
            # Called 'transform' gate, this is like the input gate of an
            # LSTM.
            output_name_and_sizes.append(('t', num_units))

        with tf.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):
            s = state.s
            for level in six.moves.range(self._depth):
                with tf.variable_scope('layer{}'.format(level)):
                    transformed_s = maybe_transform(self._state_transform, s)
                    if level == 0:
                        inputs = [transformed_input, transformed_s]
                        input_name_and_sizes = [
                            ('x',
                             transformed_input.get_shape().with_rank(2)[1]),
                            # This is the raw cell state. Unlike in an LSTM this
                            # is not passed through any non-linearity.
                            ('s', num_units)
                        ]
                    else:
                        inputs = [transformed_s]
                        input_name_and_sizes = [('s', num_units)]
                    if self._tiled_linear_mods[level] is None:
                        self._tiled_linear_mods[
                            level] = self._tiled_linear_class(
                                input_name_and_sizes, output_name_and_sizes,
                                self._tiled_linear_var_init_params)
                    if self._tie_gates:
                        h_pre, c_pre = self._tiled_linear_mods[level](inputs)
                    else:
                        h_pre, c_pre, t_pre = self._tiled_linear_mods[level](
                            inputs)
                    # Compute the cell state s.
                    c = tf.sigmoid(c_pre)
                    h = self._activation(h_pre)
                    h = maybe_transform(self._update_transform, h)
                    if self._tie_gates:
                        t = 1 - c
                    else:
                        t = tf.sigmoid(t_pre)
                    s = c * s + t * h

                    if self._cell_clip is not None:
                        # pylint: disable=invalid-unary-operand-type
                        s = tf.clip_by_value(s, -self._cell_clip,
                                             self._cell_clip)
                        # pylint: enable=invalid-unary-operand-type

        return s, TiledRHNStateTuple(s)
Exemple #9
0
def AdjMatrixAccuracy(logits, labels):
    predictions = tf.cast(tf.greater(tf.sigmoid(logits), .5), tf.float64)
    accuracies = tf.cast(tf.equal(predictions, labels), tf.float64)

    return tf.reduce_mean(accuracies)  # Report accuracy per edge
Exemple #10
0
    def __call__(self, inputs, state, scope=None):
        """Run this RNN cell on inputs, starting from the given state.

    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: `2-D Tensor` with shape `[batch_size, self.state_size]`.
      scope: optional cell scope.

    Returns:
      A pair containing:

      - Output: A `2-D` tensor with shape `[batch_size, self.output_size]`.
      - New state: A single `2-D` tensor.
    """
        batch_size, hidden_size = inputs.shape
        fixed_arc = self._params.fixed_arc
        num_layers = len(fixed_arc) // 2
        prev_s = self.prev_s
        w_prev = self.w_prev
        w_skip = self.w_skip
        input_mask = self._input_mask
        layer_mask = self._layer_mask

        if layer_mask is not None:
            assert input_mask is not None
            ht = tf.matmul(
                tf.concat([inputs * input_mask, state * layer_mask], axis=1),
                w_prev)
        else:
            ht = tf.matmul(tf.concat([inputs, state], axis=1), w_prev)
        h, t = tf.split(ht, 2, axis=1)
        h = tf.tanh(h)
        t = tf.sigmoid(t)
        s = state + t * (h - state)
        layers = [s]

        def _select_function(h, function_id):
            if function_id == 0:
                return tf.tanh(h)
            elif function_id == 1:
                return tf.nn.relu(h)
            elif function_id == 2:
                return tf.sigmoid(h)
            elif function_id == 3:
                return h
            raise ValueError('Unknown func_idx {0}'.format(function_id))

        start_idx = 0
        used = np.zeros(num_layers + 1, dtype=np.float32)
        for layer_id in range(num_layers):
            prev_idx = fixed_arc[start_idx]
            func_idx = fixed_arc[start_idx + 1]
            prev_s = layers[prev_idx]
            used[prev_idx] = 1
            if layer_mask is not None:
                ht = tf.matmul(prev_s * layer_mask, w_skip[layer_id])
            else:
                ht = tf.matmul(prev_s, w_skip[layer_id])
            h, t = tf.split(ht, 2, axis=1)

            h = _select_function(h, func_idx)
            t = tf.sigmoid(t)
            s = prev_s + t * (h - prev_s)
            s.set_shape([batch_size, hidden_size])
            layers.append(s)
            start_idx += 2

        if self._params.average_loose_ends:
            layers = [l for l, u in zip(layers, used) if u == 0]
            next_s = tf.add_n(layers) / np.sum(1. - used)
        else:
            next_s = tf.add_n(layers[1:]) / tf.cast(num_layers,
                                                    dtype=tf.float32)
        return next_s, next_s
def build_network(image, layers, variables):
    def _weights(layer_name):
        return variables["MobilenetV1/" + layer_name + "/weights"]['x']

    def _biases(layer_name):
        return variables["MobilenetV1/" + layer_name + "/biases"]['x']

    def _depthwise_weights(layer_name):
        return variables["MobilenetV1/" + layer_name +
                         "/depthwise_weights"]['x']

    def _conv_to_output(mobile_net_output, output_layer_name):
        w = tf.nn.conv2d(mobile_net_output,
                         _weights(output_layer_name), [1, 1, 1, 1],
                         padding='SAME')
        w = tf.nn.bias_add(w,
                           _biases(output_layer_name),
                           name=output_layer_name)
        return w

    def _conv(inputs, stride, block_id):
        return tf.nn.relu6(
            tf.nn.conv2d(inputs,
                         _weights("Conv2d_" + str(block_id)),
                         stride,
                         padding='SAME') + _biases("Conv2d_" + str(block_id)))

    def _separable_conv(inputs, stride, block_id, dilations):
        if dilations is None:
            dilations = [1, 1]

        dw_layer = "Conv2d_" + str(block_id) + "_depthwise"
        pw_layer = "Conv2d_" + str(block_id) + "_pointwise"

        w = tf.nn.depthwise_conv2d(inputs,
                                   _depthwise_weights(dw_layer),
                                   stride,
                                   'SAME',
                                   rate=dilations,
                                   data_format='NHWC')
        w = tf.nn.bias_add(w, _biases(dw_layer))
        w = tf.nn.relu6(w)

        w = tf.nn.conv2d(w, _weights(pw_layer), [1, 1, 1, 1], padding='SAME')
        w = tf.nn.bias_add(w, _biases(pw_layer))
        w = tf.nn.relu6(w)

        return w

    x = image
    buff = []
    with tf.variable_scope(None, 'MobilenetV1'):

        for m in layers:
            stride = [1, m['stride'], m['stride'], 1]
            rate = [m['rate'], m['rate']]
            if m['convType'] == "conv2d":
                x = _conv(x, stride, m['blockId'])
                buff.append(x)
            elif m['convType'] == "separableConv":
                x = _separable_conv(x, stride, m['blockId'], rate)
                buff.append(x)

    heatmaps = _conv_to_output(x, 'heatmap_2')
    offsets = _conv_to_output(x, 'offset_2')
    displacement_fwd = _conv_to_output(x, 'displacement_fwd_2')
    displacement_bwd = _conv_to_output(x, 'displacement_bwd_2')
    heatmaps = tf.sigmoid(heatmaps, 'heatmap')

    return heatmaps, offsets, displacement_fwd, displacement_bwd
Exemple #12
0
def build_model(spec, length, hparams, is_training):
    """Builds a raw, API-independent onsets & frames."""

    if hparams.stop_activation_gradient and not hparams.activation_loss:
        raise ValueError(
            'If stop_activation_gradient is true, activation_loss must be true.'
        )

    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
        with tf.variable_scope('onsets'):
            onset_outputs = acoustic_model(spec,
                                           hparams,
                                           lstm_units=hparams.onset_lstm_units,
                                           lengths=length,
                                           is_training=is_training)
            onset_logits = slim.fully_connected(onset_outputs,
                                                constants.MIDI_PITCHES,
                                                activation_fn=None,
                                                scope='onset_logits')

        offset_logits = []
        if hparams.offset_network:
            with tf.variable_scope('offsets'):
                offset_outputs = acoustic_model(
                    spec,
                    hparams,
                    lstm_units=hparams.offset_lstm_units,
                    lengths=length,
                    is_training=is_training)
                offset_logits = slim.fully_connected(offset_outputs,
                                                     constants.MIDI_PITCHES,
                                                     activation_fn=None,
                                                     scope='offset_logits')

        with tf.variable_scope('velocity'):
            velocity_outputs = acoustic_model(
                spec,
                hparams,
                lstm_units=hparams.velocity_lstm_units,
                lengths=length,
                is_training=is_training)
            velocity_values = slim.fully_connected(velocity_outputs,
                                                   constants.MIDI_PITCHES,
                                                   activation_fn=None,
                                                   scope='onset_velocities')

        with tf.variable_scope('frame'):
            if not hparams.share_conv_features:
                # TODO(eriche): this is broken when hparams.frame_lstm_units > 0
                activation_outputs = acoustic_model(
                    spec,
                    hparams,
                    lstm_units=hparams.frame_lstm_units,
                    lengths=length,
                    is_training=is_training)
                activation_logits = slim.fully_connected(
                    activation_outputs,
                    constants.MIDI_PITCHES,
                    activation_fn=None,
                    scope='activation_logits')
            else:
                activation_logits = slim.fully_connected(
                    onset_outputs,
                    constants.MIDI_PITCHES,
                    activation_fn=None,
                    scope='activation_logits')

            logits = []
            if hparams.stop_onset_gradient:
                logits.append(tf.stop_gradient(onset_logits))
            else:
                logits.append(onset_logits)

            if hparams.stop_activation_gradient:
                logits.append(tf.stop_gradient(activation_logits))
            else:
                logits.append(activation_logits)

            if hparams.offset_network:
                if hparams.stop_offset_gradient:
                    logits.append(tf.stop_gradient(offset_logits))
                else:
                    logits.append(offset_logits)

            combined_logits = tf.concat(logits, 2)

            if hparams.combined_lstm_units > 0:
                if hparams.use_tflite_compatible:
                    lstm_layer_builder = lstm_layer_static_for_tflite
                else:
                    lstm_layer_builder = lstm_layer

                outputs = lstm_layer_builder(
                    tf.sigmoid(combined_logits),
                    hparams.combined_lstm_units,
                    hparams.bidirectional,
                    is_training=is_training,
                    lengths=length if hparams.use_lengths else None,
                    stack_size=hparams.combined_rnn_stack_size,
                    dropout_keep_prob=hparams.combined_rnn_dropout_keep_prob)
            else:
                outputs = combined_logits

            frame_logits = slim.fully_connected(outputs,
                                                constants.MIDI_PITCHES,
                                                activation_fn=None,
                                                scope='frame_logits')

    return frame_logits, onset_logits, offset_logits, velocity_values
Exemple #13
0
def MLPdemo():
    # 数据格式处理
    # sample = "../Script/Mapping/Mfe/Sample.csv"
    sample = "../Script/Mapping/Mfe/SamTr.csv"
    potus = list(csv.reader(open(sample)))
    dx = []
    dy = []
    potus = potus[1:]
    # shuffle(potus)
    for i in range(0, len(potus)):
        dx.append([int(x) for x in potus[i][0:len(potus[i]) - 1]])
        dy.append([int(potus[i][len(potus[i]) - 1])])
    # train_dx = dx[0:864]; test_dx = dx[1152:]
    train_dx = dx[0:864]
    test_dx = dx[864:]
    train_dy = dy[0:864]
    test_dy = dy[864:]

    # 定义输入和输出
    x = tf.placeholder(tf.float32, shape=(None, 203), name="x-input")
    y_ = tf.placeholder(tf.float32, shape=(None, 1), name="y-input")
    # 定义神经网络的参数
    w1 = tf.Variable(tf.random_normal([203, 10], mean=0, stddev=1, seed=1))
    w2 = tf.Variable(tf.random_normal([10, 1], mean=0, stddev=1, seed=1))
    b1 = tf.Variable(tf.random_normal([10], mean=0, stddev=1, seed=1))
    b2 = tf.Variable(tf.random_normal([1], mean=0, stddev=1, seed=1))

    y1 = tf.matmul(x, w1) + b1
    y11 = tf.nn.relu(y1)
    y2 = tf.matmul(y11, w2) + b2
    y = tf.sigmoid(y2)
    # tf.clip_by_value(t, clip_value_min, clip_value_max,name=None)
    # cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))
    # loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y))
    loss = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)) +
                           (1 - y_) *
                           tf.log(tf.clip_by_value(1 - y, 1e-10, 1.0)))
    train_step = tf.train.AdamOptimizer(0.001).minimize(loss)
    X = train_dx
    Y = train_dy
    # 创建会话运行TensorFlow程序
    with tf.Session() as sess:
        init = tf.initialize_all_variables()
        saver = tf.train.Saver()
        sess.run(init)
        steps = 1500
        for i in range(steps):
            # 通过选取样本训练神经网络并更新参数
            sess.run(train_step, feed_dict={x: X, y_: Y})
            # 每迭代1000次输出一次日志信息
            if i % 100 == 0:
                # 计算所有数据的交叉熵
                total_cross_entropy, prob = sess.run([loss, y],
                                                     feed_dict={
                                                         x: test_dx,
                                                         y_: test_dy
                                                     })
                # 输出交叉熵之和
                print(
                    "After %d training step(s),cross entropy on all data is %g"
                    % (i, total_cross_entropy))
        prob_train = sess.run(y, feed_dict={x: train_dx, y_: train_dy})
        # print(str(w1.eval(session=sess)))
        # print(str(w2.eval(session=sess)))
        # print(b1.eval(session=sess))
        # print(b2.eval(session=sess))
        from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, recall_score, precision_score
        roc_test = roc_auc_score(test_dy, prob)
        roc_train = roc_auc_score(train_dy, prob_train)
        prob_sig = []
        for i in prob:
            prob_sig.append(1 if float(i) > 0.5 else 0)
        print(accuracy_score(test_dy, prob_sig))

        # save_path = saver.save(sess, '../ML/model.ckpt')
        # print("Model saved in file: %s" % save_path)
        result = []
        result.append([
            roc_test,
            str(w1.eval(session=sess)),
            str(w2.eval(session=sess)),
            str(b1.eval(session=sess)),
            str(b2.eval(session=sess))
        ])

        import matplotlib.pyplot as plt
        from sklearn.metrics import roc_curve
        import pandas as pd
        print("auc  :", roc_test, "-", roc_train)
        y_scores = prob_sig
        fpr, tpr, thresholds = roc_curve(test_dy, prob, pos_label=1.0)
        plt.figure(figsize=(6.4, 6.4))
        plt.plot(fpr, tpr, color='blue', label='AUC = %0.4f' % roc_test)
        plt.plot([0, 1], [0, 1], color='red', linestyle='--')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic of MLP')
        plt.legend(loc="lower right")
        plt.show()
Exemple #14
0
def conv_capsule_mat(input_tensor,
                     input_activation,
                     input_dim,
                     output_dim,
                     layer_name,
                     num_routing=3,
                     num_in_atoms=3,
                     num_out_atoms=3,
                     stride=2,
                     kernel_size=5,
                     min_var=0.0005,
                     final_beta=1.0):
    """Convolutional Capsule layer with Pose Matrices."""
    print('caps conv stride: {}'.format(stride))
    in_atom_sq = num_in_atoms * num_in_atoms
    with tf.variable_scope(layer_name):
        input_shape = tf.shape(input_tensor)
        _, _, _, in_height, in_width = input_tensor.get_shape()
        # This Variable will hold the state of the weights for the layer
        kernel = utils.weight_variable(shape=[
            input_dim, kernel_size, kernel_size, num_in_atoms,
            output_dim * num_out_atoms
        ],
                                       stddev=0.3)
        # kernel = tf.clip_by_norm(kernel, 3.0, axes=[1, 2, 3])
        activation_biases = utils.bias_variable(
            [1, 1, output_dim, 1, 1, 1, 1, 1],
            init_value=0.5,
            name='activation_biases')
        sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1],
                                           init_value=.5,
                                           name='sigma_biases')
        with tf.name_scope('conv'):
            print('convi;')
            # input_tensor: [x,128,8, c1,c2] -> [x*128,8, c1,c2]
            print(input_tensor.get_shape())
            input_tensor_reshaped = tf.reshape(input_tensor, [
                input_shape[0] * input_dim * in_atom_sq, input_shape[3],
                input_shape[4], 1
            ])
            input_tensor_reshaped.set_shape((None, input_tensor.get_shape()[3],
                                             input_tensor.get_shape()[4], 1))
            input_act_reshaped = tf.reshape(input_activation, [
                input_shape[0] * input_dim, input_shape[3], input_shape[4], 1
            ])
            input_act_reshaped.set_shape((None, input_tensor.get_shape()[3],
                                          input_tensor.get_shape()[4], 1))
            print(input_tensor_reshaped.get_shape())
            # conv: [x*128,out*out_at, c3,c4]
            conv_patches = tf.extract_image_patches(
                images=input_tensor_reshaped,
                ksizes=[1, kernel_size, kernel_size, 1],
                strides=[1, stride, stride, 1],
                rates=[1, 1, 1, 1],
                padding='VALID',
            )
            act_patches = tf.extract_image_patches(
                images=input_act_reshaped,
                ksizes=[1, kernel_size, kernel_size, 1],
                strides=[1, stride, stride, 1],
                rates=[1, 1, 1, 1],
                padding='VALID',
            )
            o_height = (in_height - kernel_size) // stride + 1
            o_width = (in_width - kernel_size) // stride + 1
            patches = tf.reshape(conv_patches,
                                 (input_shape[0], input_dim, in_atom_sq,
                                  o_height, o_width, kernel_size, kernel_size))
            patches.set_shape((None, input_dim, in_atom_sq, o_height, o_width,
                               kernel_size, kernel_size))
            patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2])
            patch_split = tf.reshape(
                patch_trans,
                (input_dim, kernel_size, kernel_size, input_shape[0] *
                 o_height * o_width * num_in_atoms, num_in_atoms))
            patch_split.set_shape(
                (input_dim, kernel_size, kernel_size, None, num_in_atoms))
            a_patches = tf.reshape(act_patches,
                                   (input_shape[0], input_dim, 1, 1, o_height,
                                    o_width, kernel_size, kernel_size))
            a_patches.set_shape((None, input_dim, 1, 1, o_height, o_width,
                                 kernel_size, kernel_size))
            with tf.name_scope('input_act'):
                utils.activation_summary(
                    tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(a_patches,
                                                              axis=1),
                                                axis=-1),
                                  axis=-1))
            with tf.name_scope('Wx'):
                wx = tf.matmul(patch_split, kernel)
                wx = tf.reshape(wx, (input_dim, kernel_size, kernel_size,
                                     input_shape[0], o_height, o_width,
                                     num_in_atoms * num_out_atoms, output_dim))
                wx.set_shape(
                    (input_dim, kernel_size, kernel_size, None, o_height,
                     o_width, num_in_atoms * num_out_atoms, output_dim))
                wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2])
                utils.activation_summary(wx)

        with tf.name_scope('routing'):
            # Routing
            # logits: [x, 128, 10, c3, c4]
            logit_shape = [
                input_dim, output_dim, 1, o_height, o_width, kernel_size,
                kernel_size
            ]
            activation, center = update_conv_routing(
                wx=wx,
                input_activation=a_patches,
                activation_biases=activation_biases,
                sigma_biases=sigma_biases,
                logit_shape=logit_shape,
                num_out_atoms=num_out_atoms * num_out_atoms,
                input_dim=input_dim,
                num_routing=num_routing,
                output_dim=output_dim,
                min_var=min_var,
                final_beta=final_beta,
            )
            # activations: [x, 10, 8, c3, c4]

        out_activation = tf.squeeze(activation, axis=[1, 3, 6, 7])
        out_center = tf.squeeze(center, axis=[1, 6, 7])
        with tf.name_scope('center'):
            utils.activation_summary(out_center)
        return tf.sigmoid(out_activation), out_center
Exemple #15
0
 def __init__(self, inputs, config_reader=None):
     self.x = inputs[0]
     # calc sigmoid for each value in matrix.
     self.y = tf.sigmoid(self.x)
def pixcnn_gated_nonlinearity(a, b):
    return tf.sigmoid(a) * tf.tanh(b)
#training data set
x_data = np.array([[10, 0], [8, 1], [3, 3], [2, 3], [5, 1], [2, 0], [1, 0]])
y_data = np.array([[1], [1], [1], [1], [0], [0], [0]])

#placeholder
X = tf.placeholder(shape=[None, 2], dtype=tf.float32)
Y = tf.placeholder(shape=[None, 1], dtype=tf.float32)

#Weight(2행1열) & bias(1개)
W = tf.Variable(tf.random_normal([2, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

#Hypothesis
#logits = X@W+b
logits = tf.matmul(X, W) + b
H = tf.sigmoid(logits)

#cost function
#cost = tf.reduce_mean(tf.square(H-Y))
cost = tf.reduce_mean(
    tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y))
#train
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

#Session & Variable 초기화
sess = tf.Session()
sess.run(tf.global_variables_initializer())

#학습
for step in range(1, 3001):
    _, cost_val = sess.run([train, cost], feed_dict={X: x_data, Y: y_data})
Exemple #18
0
    def __init__(self):

        super(CVAE, self).__init__()

        #TODO: add config parser
        #self.initizler = tf.keras.initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=None)

        #self.training_datadir='/media/jehill/DATA/ML_data/fastmri/singlecoil/train/singlecoil_train/'
        self.training_datadir = '/jmain01/home/JAD029/txl04/jxp48-txl04/data/fastmri_singlecoil/singlecoil_train/'

        self.BATCH_SIZE = 16
        self.num_epochs = 150
        self.learning_rate = 1e-3
        self.model_name = "CVAE"

        self.image_dim = 128
        self.channels = 1
        self.latent_dim = 64

        self.kernel_size = 3
        lrelu = lambda x: tf.keras.activations.relu(x, alpha=0.3)
        self.activation = lrelu

        self.input_image_1 = tf.placeholder(
            tf.float32, shape=[None, 256, 256,
                               self.channels])  #for time being resize images
        self.input_image = tf.image.resize_images(
            self.input_image_1,
            [np.int(self.image_dim),
             np.int(self.image_dim)])
        self.image_shape = self.input_image.shape[1:]
        self.learning_rate = tf.placeholder(tf.float32, [],
                                            name='learning_rate')

        self.encoder = self.inference_net()
        self.decoder = self.generative_net()  # note these are keras model

        mean, logvar = tf.split(self.encoder(self.input_image),
                                num_or_size_splits=2,
                                axis=1)
        self.z = self.reparameterize(mean, logvar)
        logits = self.decoder(self.z)
        self.reconstructed = tf.sigmoid(logits)

        # calculate the KL loss
        var = tf.exp(logvar)
        kl_loss = 0.5 * tf.reduce_sum(tf.square(mean) + var - 1. - logvar)

        # cal mse loss
        sse_loss = 0.5 * tf.reduce_sum(tf.square(self.input_image - logits))
        self.total_loss = tf.reduce_mean(kl_loss + sse_loss) / self.BATCH_SIZE
        self.list_gradients = self.encoder.trainable_variables + self.decoder.trainable_variables
        self.Optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate,
            beta1=0.5).minimize(self.total_loss, var_list=self.list_gradients)

        # summary and writer for tensorboard visulization

        tf.summary.image("Reconstructed image", self.reconstructed)
        tf.summary.image("Input image", self.input_image)

        tf.summary.scalar("KL", kl_loss)
        tf.summary.scalar("SSE", sse_loss)
        tf.summary.scalar("Total loss", self.total_loss)

        self.merged_summary = tf.summary.merge_all()
        self.init = tf.global_variables_initializer()
        self.saver = tf.train.Saver()

        self.logdir = './trained_models/' + self.model_name  # if not exist create logdir
        self.image_dir = self.logdir + '/images/'
        self.model_dir = self.logdir + '/final_model'

        self.gpu_list = ['/gpu:0', '/gpu:1', '/gpu:2', '/gpu:3']
        #self.gpu_list = ['/gpu:0']

        print("Completed creating the model")
        logging.debug("Completed creating the model")

        if (os.path.exists(self.image_dir)):
            shutil.rmtree(self.image_dir, ignore_errors=True)
            os.makedirs(self.image_dir)
        else:
            os.makedirs(self.image_dir)
def affine_coupling(name,
                    x,
                    x_mask,
                    inverse,
                    split_dim,
                    identity_first,
                    init,
                    decoder_self_attention_bias=None,
                    **kwargs):
    """Affine coupling transform layer.

  Args:
    name: variable scope.
    x: 3-D Tensor, shape=[B, L, C].
    x_mask : 2-D Tensor, shape=[B, L].
    inverse: Forward or inverse pass.
    split_dim: which dimension to split
      (time, channel_continuous, channel_alternate).
    identity_first: True means the first half remains constant. False for 2nd.
    init: init.
    decoder_self_attention_bias: bias.
    **kwargs: additional arguments. Contains hparams, encoder_output and
      encoder_decoder_attention_bias.

  Returns:
    z: data transformed by the affine coupling layer. shape=[B, L, C]
    logabsdets: Log absolute determinant Jacobian. shape=[B]
  """
    hparams = kwargs["hparams"]
    batch_size, length, n_channels = common_layers.shape_list(x)
    assert hparams.scale_width > 0.0 and hparams.scale_width < 1.0
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        x_id, x_tr, _, n_transform, bias, mask = gops.split_coupling(
            x, x_mask, split_dim, identity_first, decoder_self_attention_bias)
        z_id = x_id

        transform_params = gops.transformer_decoder_block(
            "theta_tr",
            n_layers=hparams.n_layers_transform_params,
            x=x_id,
            x_mask=mask,
            output_size=n_transform * 2,
            init=init,
            decoder_self_attention_bias=bias,
            **kwargs)
        loc, unconstrained_scale = tf.split(transform_params, 2, axis=-1)
        scale = tf.sigmoid(unconstrained_scale + 2.0)
        if not inverse:
            z_tr = (x_tr + loc) * scale
        else:
            z_tr = x_tr / scale - loc

        logabsdet = gops.reduce_sum_over_lc(tf.log(scale), mask)  # [B]
        if inverse:
            logabsdet *= -1

        tf.summary.histogram("_loc", tf.boolean_mask(loc, mask))
        tf.summary.histogram("_scale", tf.boolean_mask(scale, mask))
        result = gops.join_coupling(z_id, z_tr, split_dim, identity_first)
        result = tf.reshape(result, [batch_size, length, n_channels])
        return result, logabsdet
Exemple #20
0
import tensorflow.compat.v1 as tf
import numpy as np

tf.disable_v2_behavior()

x_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
y_data = np.array([[0], [1], [1], [0]], dtype=np.float32)

X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)

# use layer
W1 = tf.Variable(tf.random_normal([2, 2]), name='weight')
b1 = tf.Variable(tf.random_normal([2]), name='bias')
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)

W2 = tf.Variable(tf.random_normal([2, 1]), name='weight')
b2 = tf.Variable(tf.random_normal([1]), name='bias')
hypothesis = tf.sigmoid(tf.matmul(layer1, W2) + b2)

cost = -tf.reduce_mean(Y * tf.log(hypothesis) +
                       (1 - Y) * tf.log(1 - hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
    def build(self, inputs):
        """Build the graph for this configuration.

    Args:
      inputs: A dict of inputs. For training, should contain 'wav'.

    Returns:
      A dict of outputs that includes the 'predictions',
      'init_ops', the 'push_ops', and the 'quantized_input'.
    """
        num_stages = 10
        num_layers = 30
        filter_length = 3
        width = 512
        skip_width = 256
        num_z = 16

        # Encode the source with 8-bit Mu-Law.
        x = inputs['wav']
        batch_size = self.batch_size
        x_quantized = utils.mu_law(x)
        x_scaled = tf.cast(x_quantized, tf.float32) / 128.0
        x_scaled = tf.expand_dims(x_scaled, 2)

        encoding = tf.placeholder(name='encoding',
                                  shape=[batch_size, num_z],
                                  dtype=tf.float32)
        en = tf.expand_dims(encoding, 1)

        init_ops, push_ops = [], []

        ###
        # The WaveNet Decoder.
        ###
        l = x_scaled
        l, inits, pushs = utils.causal_linear(x=l,
                                              n_inputs=1,
                                              n_outputs=width,
                                              name='startconv',
                                              rate=1,
                                              batch_size=batch_size,
                                              filter_length=filter_length)

        for init in inits:
            init_ops.append(init)
        for push in pushs:
            push_ops.append(push)

        # Set up skip connections.
        s = utils.linear(l, width, skip_width, name='skip_start')

        # Residual blocks with skip connections.
        for i in range(num_layers):
            dilation = 2**(i % num_stages)

            # dilated masked cnn
            d, inits, pushs = utils.causal_linear(x=l,
                                                  n_inputs=width,
                                                  n_outputs=width * 2,
                                                  name='dilatedconv_%d' %
                                                  (i + 1),
                                                  rate=dilation,
                                                  batch_size=batch_size,
                                                  filter_length=filter_length)

            for init in inits:
                init_ops.append(init)
            for push in pushs:
                push_ops.append(push)

            # local conditioning
            d += utils.linear(en,
                              num_z,
                              width * 2,
                              name='cond_map_%d' % (i + 1))

            # gated cnn
            assert d.get_shape().as_list()[2] % 2 == 0
            m = d.get_shape().as_list()[2] // 2
            d = tf.sigmoid(d[:, :, :m]) * tf.tanh(d[:, :, m:])

            # residuals
            l += utils.linear(d, width, width, name='res_%d' % (i + 1))

            # skips
            s += utils.linear(d, width, skip_width, name='skip_%d' % (i + 1))

        s = tf.nn.relu(s)
        s = (utils.linear(s, skip_width, skip_width, name='out1') +
             utils.linear(en, num_z, skip_width, name='cond_map_out1'))
        s = tf.nn.relu(s)

        ###
        # Compute the logits and get the loss.
        ###
        logits = utils.linear(s, skip_width, 256, name='logits')
        logits = tf.reshape(logits, [-1, 256])
        probs = tf.nn.softmax(logits, name='softmax')

        return {
            'init_ops': init_ops,
            'push_ops': push_ops,
            'predictions': probs,
            'encoding': encoding,
            'quantized_input': x_quantized,
        }
    def get_word(self, sample_y, sample_h_pre, alpha_past_pre,
                 sample_annotation):

        emb = tf.cond(
            sample_y[0] < 0, lambda: tf.fill((1, self.word_dim), 0.0),
            lambda: tf.nn.embedding_lookup(self.embed_matrix, sample_y))

        # ret = self.parser.one_time_step((h_pre, None, None, alpha_past_pre, annotation, None), (emb, None))
        emb_y_z_r_vector = tf.tensordot(emb, self.parser.W_yz_yr, axes=1) + \
                           self.parser.b_yz_yr  # [batch, 2 * dim_decoder]
        hidden_z_r_vector = tf.tensordot(sample_h_pre,
                                         self.parser.U_hz_hr,
                                         axes=1)  # [batch, 2 * dim_decoder]
        pre_z_r_vector = tf.sigmoid(emb_y_z_r_vector + \
                                    hidden_z_r_vector)  # [batch, 2 * dim_decoder]

        r1 = pre_z_r_vector[:, :self.parser.hidden_dim]  # [batch, dim_decoder]
        z1 = pre_z_r_vector[:, self.parser.hidden_dim:]  # [batch, dim_decoder]

        emb_y_h_vector = tf.tensordot(emb, self.parser.W_yh, axes=1) + \
                         self.parser.b_yh  # [batch, dim_decoder]
        hidden_r_h_vector = tf.tensordot(sample_h_pre,
                                         self.parser.U_rh,
                                         axes=1)  # [batch, dim_decoder]
        hidden_r_h_vector *= r1
        pre_h_proposal = tf.tanh(hidden_r_h_vector + emb_y_h_vector)

        pre_h = z1 * sample_h_pre + (1. - z1) * pre_h_proposal

        context, _, alpha_past = self.parser.attender.get_context(
            sample_annotation, pre_h, alpha_past_pre, None)  # [batch, dim_ctx]
        emb_y_z_r_nl_vector = tf.tensordot(
            pre_h, self.parser.U_hz_hr_nl, axes=1) + self.parser.b_hz_hr_nl
        context_z_r_vector = tf.tensordot(context, self.parser.W_c_z_r, axes=1)
        z_r_vector = tf.sigmoid(emb_y_z_r_nl_vector + context_z_r_vector)

        r2 = z_r_vector[:, :self.parser.hidden_dim]
        z2 = z_r_vector[:, self.parser.hidden_dim:]

        emb_y_h_nl_vector = tf.tensordot(pre_h, self.parser.U_rh_nl,
                                         axes=1) + self.parser.b_rh_nl
        emb_y_h_nl_vector *= r2
        context_h_vector = tf.tensordot(context, self.parser.W_c_h_nl, axes=1)
        h_proposal = tf.tanh(emb_y_h_nl_vector + context_h_vector)
        h = z2 * pre_h + (1. - z2) * h_proposal

        h_t = h
        c_t = context
        alpha_past_t = alpha_past
        y_t_1 = emb
        logit_gru = tf.tensordot(h_t, self.Wh, axes=1) + self.bh
        logit_ctx = tf.tensordot(c_t, self.Wc, axes=1) + self.bc
        logit_pre = tf.tensordot(y_t_1, self.Wy, axes=1) + self.by
        logit = logit_pre + logit_ctx + logit_gru  # batch x word_dim

        shape = tf.shape(logit)
        logit = tf.reshape(logit, [-1, shape[1] // 2, 2])
        logit = tf.reduce_max(logit, axis=2)

        logit = tf.layers.dropout(inputs=logit,
                                  rate=0.2,
                                  training=self.training)

        logit = tf.tensordot(logit, self.Wo, axes=1) + self.bo

        next_probs = tf.nn.softmax(logits=logit)
        next_word = tf.reduce_max(tf.multinomial(next_probs, num_samples=1),
                                  axis=1)
        return next_probs, next_word, h_t, alpha_past_t
Exemple #23
0
    def __init__(self, config):
        # 导入配置好的参数
        self.hiddens = hiddens = config.modelConfig.hidden_layers  # 200个隐层节点
        self.num_skills = num_skills = config.num_skills
        self.input_size = input_size = config.input_size
        self.batch_size = batch_size = config.batch_size
        self.keep_prob_value = config.modelConfig.dropout_keep_prob

        # 定义需要喂给模型的参数
        self.max_steps = tf.placeholder(tf.int32, name="max_steps")  # 当前batch中最大序列长度
        # input_data: (32, None, 248):None表示的是序列的长度, 即max_len/num_steps/最长做题序列
        self.input_data = tf.placeholder(tf.float32, [batch_size, None, input_size], name="input_x")

        self.sequence_len = tf.placeholder(tf.int32, [batch_size], name="sequence_len")
        self.keep_prob = tf.placeholder(tf.float32, name="keep_prob")  # dropout keep prob

        self.target_id = tf.placeholder(tf.int32, [batch_size, None], name="target_id")
        self.target_correctness = tf.placeholder(tf.float32, [batch_size, None], name="target_correctness")
        self.flat_target_correctness = None

        # 构建lstm模型结构self.hidden_cell,包含hiddens(200)个节点
        hidden_layers = []
        for idx, hidden_size in enumerate(hiddens):
            lstm_layer = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True)
            hidden_layer = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_layer, output_keep_prob=self.keep_prob)
            hidden_layers.append(hidden_layer)
        self.hidden_cell = tf.nn.rnn_cell.MultiRNNCell(cells=hidden_layers, state_is_tuple=True)

        # 采用动态rnn,动态输入序列的长度
        outputs, self.current_state = tf.nn.dynamic_rnn(cell=self.hidden_cell,
                                                        inputs=self.input_data,
                                                        sequence_length=self.sequence_len,
                                                        dtype=tf.float32)

        # 隐层到输出层的权重系数(最后隐层的神经元数量,知识点数(num_skills))
        output_w = tf.get_variable("W", [hiddens[-1], num_skills])
        output_b = tf.get_variable("b", [num_skills])

        # output: (batch_size * max_steps, 最后隐层的神经元数量)
        self.output = tf.reshape(outputs, [batch_size * self.max_steps, hiddens[-1]])
        # 输出层logits:(batch_size * max_steps, num_skills),猜测是做完第step道题目之后,每个学生对每个知识点的掌握情况
        self.logits = tf.matmul(self.output, output_w) + output_b
        # 转化为(batch_size, max_steps, num_skills)
        self.mat_logits = tf.reshape(self.logits, [batch_size, self.max_steps, num_skills])

        # 对每个batch中每个序列中的每个时间点的输出中的每个值进行sigmoid计算,这里的值表示对某个知识点的掌握情况,
        self.pred_all = tf.sigmoid(self.mat_logits, name="pred_all")

        # self.target_correctness是做题序列目标结果,即0或1的序列,由用户进行输入
        flat_target_correctness = tf.reshape(self.target_correctness, [-1])
        # flat_target_correctness是target_correctness的一维表示
        self.flat_target_correctness = flat_target_correctness
        flat_base_target_index = tf.range(batch_size * self.max_steps) * num_skills
        flat_base_target_id = tf.reshape(self.target_id, [-1])
        # 目标的序列flat_target_id: 长度是batch_size * num_steps
        flat_target_id = flat_base_target_id + flat_base_target_index

        # flat_logits是模型预测的输出,其长度为batch_size * num_steps * num_skills
        flat_logits = tf.reshape(self.logits, [-1])
        # tf.gather用一个一维的索引数组,将张量中对应索引的向量提取出来
        flat_target_logits = tf.gather(flat_logits, flat_target_id)

        # 对切片后的数据进行sigmoid转换
        self.pred = tf.sigmoid(tf.reshape(flat_target_logits, [batch_size, self.max_steps]), name="pred")
        # 将sigmoid后的值表示为0或1
        self.binary_pred = tf.cast(tf.greater_equal(self.pred, 0.5), tf.float32, name="binary_pred")

        # 定义损失函数
        with tf.name_scope("loss"):
            self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=flat_target_correctness, logits=flat_target_logits))
Exemple #24
0
 def _gate(self, x, W, b):
     return tf.sigmoid(self._net(x, W, b))
Exemple #25
0
    def build_graph(self):
        self.params = self.init_params()
        self.x_input = tf.placeholder(tf.int64, [None, None])
        self.mask_x = tf.placeholder(tf.float32, [None, None])
        self.y_target = tf.placeholder(tf.int64, [None])
        self.len_x = tf.placeholder(tf.int64, [None])
        self.keep_prob = tf.placeholder(tf.float32, [None])
        self.starting = tf.placeholder(tf.bool)
        """       
        attention gru & global gru
        Output:
        global_session_representation
        attentive_session_represention
        """
        self.n_timesteps = tf.shape(self.x_input)[1]
        self.n_samples = tf.shape(self.x_input)[0]

        emb = tf.nn.embedding_lookup(self.params['Wemb'], self.x_input)
        emb = tf.nn.dropout(emb, keep_prob=self.keep_prob[0])

        with tf.variable_scope('global_encoder'):
            cell_global = tf.compat.v1.nn.rnn_cell.GRUCell(self.hidden_units)
            init_state = cell_global.zero_state(self.n_samples, tf.float32)
            outputs_global, state_global = tf.nn.dynamic_rnn(
                cell_global,
                inputs=emb,
                sequence_length=self.len_x,
                initial_state=init_state,
                dtype=tf.float32)
            last_global = state_global  # batch_size*hidden_units

        with tf.variable_scope('local_encoder'):
            cell_local = tf.compat.v1.nn.rnn_cell.GRUCell(self.hidden_units)
            init_statel = cell_local.zero_state(self.n_samples, tf.float32)
            outputs_local, state_local = tf.nn.dynamic_rnn(
                cell_local,
                inputs=emb,
                sequence_length=self.len_x,
                initial_state=init_statel,
                dtype=tf.float32)
            last_h = state_local  # batch_size*hidden_units

            tmp_0 = tf.reshape(outputs_local, [-1, self.hidden_units])
            tmp_1 = tf.reshape(
                tf.matmul(tmp_0, self.params['W_encoder']),
                [self.n_samples, self.n_timesteps, self.hidden_units])
            tmp_2 = tf.expand_dims(tf.matmul(last_h, self.params['W_decoder']),
                                   1)  # batch_size*hidden_units
            tmp_3 = tf.reshape(
                tf.sigmoid(tmp_1 + tmp_2),
                [-1, self.hidden_units])  # batch_size,n_steps, hidden_units
            alpha = tf.matmul(tmp_3, tf.transpose(self.params['bl_vector']))
            res = tf.reduce_sum(alpha, axis=1)
            sim_matrix = tf.reshape(res, [self.n_samples, self.n_timesteps])

            att = tf.nn.softmax(
                sim_matrix * self.mask_x) * self.mask_x  # batch_size*n_step
            p = tf.expand_dims(tf.reduce_sum(att, axis=1), 1)
            weight = att / p
            atttention_proj = tf.reduce_sum(
                (outputs_local * tf.expand_dims(weight, 2)), 1)
        self.global_session_representation = last_global
        self.attentive_session_represention = atttention_proj

        self.ome_cell = OME(mem_size=(self.memory_size, self.memory_dim),
                            shift_range=self.shift_range,
                            hidden_units=self.hidden_units)

        self.state = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.hidden_units])
        self.memory_network_reads, self.memory_new_state = self.ome_cell(
            self.state, atttention_proj, self.starting)

        att_mean, att_var = tf.nn.moments(self.attentive_session_represention,
                                          axes=[1])
        self.attentive_session_represention = (
            self.attentive_session_represention - tf.expand_dims(
                att_mean, 1)) / tf.expand_dims(tf.sqrt(att_var + 1e-10), 1)
        glo_mean, glo_var = tf.nn.moments(self.global_session_representation,
                                          axes=[1])
        self.global_session_representation = (
            self.global_session_representation - tf.expand_dims(
                glo_mean, 1)) / tf.expand_dims(tf.sqrt(glo_var + 1e-10), 1)
        ntm_mean, ntm_var = tf.nn.moments(self.memory_network_reads, axes=[1])
        self.memory_network_reads = (
            self.memory_network_reads - tf.expand_dims(
                ntm_mean, 1)) / tf.expand_dims(tf.sqrt(ntm_var + 1e-10), 1)

        new_gate = tf.matmul(self.attentive_session_represention, self.params['inner_encoder']) + \
                   tf.matmul(self.memory_network_reads, self.params['outer_encoder']) + \
                   tf.matmul(self.global_session_representation, self.params['state_encoder'])
        new_gate = tf.nn.sigmoid(new_gate)
        self.narm_representation = tf.concat(
            (self.attentive_session_represention,
             self.global_session_representation),
            axis=1)
        self.memory_representation = tf.concat(
            (self.memory_network_reads, self.memory_network_reads), axis=1)
        final_representation = new_gate * self.narm_representation + (
            1 - new_gate) * self.memory_representation

        # prediction
        proj = tf.nn.dropout(final_representation, keep_prob=self.keep_prob[1])
        ytem = tf.matmul(self.params['Wemb'],
                         self.params['bili'])  # [n_items, 200]
        hypothesis = tf.matmul(
            proj, tf.transpose(ytem)) + 1e-10  # [batch_size, n_step, n_items]
        self.hypo = tf.nn.softmax(hypothesis)
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=hypothesis, labels=self.y_target))
        # optimize
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.lr).minimize(self.loss)

        self.saver = tf.train.Saver(max_to_keep=1)
Exemple #26
0
    def loss_layer(self, predict, labels):
        """
        Define loss layer

        Parameters
        ----------
        predict: TensorFlow Tensor
            The predicted values for the batch of data
        labels: TensorFlow Tensor
            Ground truth labels for the batch of data

        Returns
        -------
        loss: TensorFlow Tensor
            Loss (combination of regression and classification losses)
        """
        rescore = int(_utils.convert_shared_float_array_to_numpy(self.config.get('od_rescore')))
        lmb_coord_xy = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_coord_xy'))
        lmb_coord_wh = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_coord_wh'))
        lmb_obj = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_obj'))
        lmb_noobj = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_noobj'))
        lmb_class = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_class'))

        # Prediction values from model on the images
        ypred = _tf.reshape(predict, [-1] + list(self.grid_shape) + [self.num_anchors, 5 + self.num_classes])
        raw_xy = ypred[..., 0:2]
        raw_wh = ypred[..., 2:4]
        raw_conf = ypred[..., 4]
        class_scores = ypred[..., 5:]

        tf_anchors = _tf.constant(self.anchors)

        # Ground Truth info derived from ymap/labels
        gt_xy = labels[..., 0:2]
        gt_wh = labels[..., 2:4]
        gt_raw_wh = _tf.math.log(gt_wh / tf_anchors + 1e-5)
        gt_conf = labels[..., 4]
        gt_conf0 = labels[..., 0:1, 4]
        gt_class = labels[..., 5:]

        # Calculations on predicted confidences
        xy = _tf.sigmoid(raw_xy)
        wh = _tf.exp(raw_wh) * tf_anchors
        wh_anchors = _tf.exp(raw_wh * 0.0) * tf_anchors
        lo = xy - wh / 2
        hi = xy + wh / 2

        gt_area = gt_wh[..., 0] * gt_wh[..., 1]
        gt_lo = gt_xy - gt_wh / 2
        gt_hi = gt_xy + gt_wh / 2

        c_inter = _tf.maximum(2 * _tf.minimum(wh_anchors / 2, gt_wh / 2), 0)
        c_area = wh_anchors[..., 0] * wh_anchors[..., 1]
        c_inter_area = c_inter[..., 0] * c_inter[..., 1]
        c_iou = c_inter_area / (c_area + gt_area - c_inter_area)

        inter = _tf.maximum(_tf.minimum(hi, gt_hi) - _tf.maximum(lo, gt_lo), 0)
        area = wh[..., 0] * wh[..., 1]
        inter_area = inter[..., 0] * inter[..., 1]
        iou = inter_area / (area + gt_area - inter_area)
        active_iou = c_iou

        max_iou = _tf.reduce_max(active_iou, 3, keepdims=True)
        resp_box = _tf.cast(_tf.equal(active_iou, max_iou), dtype=_tf.float32)
        count = _tf.reduce_sum(gt_conf0)

        kr_obj_ij = _tf.stop_gradient(resp_box * gt_conf)

        kr_noobj_ij = 1 - kr_obj_ij
        s = 1 / (self.batch_size * self.grid_shape[0] * self.grid_shape[1])
        kr_obj_ij_plus1 = _tf.expand_dims(kr_obj_ij, -1)

        if rescore:
            obj_gt_conf = kr_obj_ij * _tf.stop_gradient(iou)
        else:
            obj_gt_conf = kr_obj_ij
        kr_box = kr_obj_ij_plus1
        obj_w = (kr_obj_ij * lmb_obj + kr_noobj_ij * lmb_noobj)

        loss_xy = lmb_coord_xy * _tf.reduce_sum(kr_box * _tf.square(gt_xy - xy)) / (count + 0.01)

        loss_wh = _tf.losses.huber_loss (labels=gt_raw_wh, predictions=raw_wh, weights=lmb_coord_wh * kr_box,
                                                   delta= 1.0)
        # Confidence loss
        loss_conf = s * _tf.reduce_sum(
            obj_w * _tf.nn.sigmoid_cross_entropy_with_logits(labels=obj_gt_conf, logits=raw_conf))

        # TODO: tf.nn.softmax_cross_entropy_with_logits_v2 instead of tf.nn.softmax_cross_entropy_with_logits
        loss_cls = lmb_class * _tf.reduce_sum(
            kr_obj_ij * _tf.nn.softmax_cross_entropy_with_logits_v2(labels=gt_class, logits=class_scores)) / (
                           count + 0.01)
        losses = [loss_xy, loss_wh, loss_conf, loss_cls]
        loss = _tf.add_n(losses)
        return loss
Exemple #27
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""
    tf.logging.info("*** Current mode: %s ***" % mode)
    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape))

    input_ids_1 = features["input_ids_1"]
    input_mask_1 = features["input_mask_1"]
    if train_mode == constants.TRAIN_MODE_FINETUNE:
      masked_lm_positions_1 = tf.zeros([1])
      masked_lm_ids_1 = tf.zeros([1])
      masked_lm_weights_1 = tf.zeros([1])
    else:
      masked_lm_positions_1 = features["masked_lm_positions_1"]
      masked_lm_ids_1 = features["masked_lm_ids_1"]
      masked_lm_weights_1 = features["masked_lm_weights_1"]

    input_ids_2 = features["input_ids_2"]
    input_mask_2 = features["input_mask_2"]
    if train_mode == constants.TRAIN_MODE_FINETUNE:
      masked_lm_positions_2 = tf.zeros([1])
      masked_lm_ids_2 = tf.zeros([1])
      masked_lm_weights_2 = tf.zeros([1])
    else:
      masked_lm_positions_2 = features["masked_lm_positions_2"]
      masked_lm_ids_2 = features["masked_lm_ids_2"]
      masked_lm_weights_2 = features["masked_lm_weights_2"]
    documents_match_labels = features["documents_match_labels"]
    # Since the document_match_labels might contain labels like 0/1/2, we need
    # to transfer these labels to binary labels like 0/1.
    documents_match_labels = tf.cast(documents_match_labels > 0, tf.float32)
    is_real_example = None
    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(
          tf.shape(documents_match_labels), dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    if (dual_encoder_config.encoder_config.model_name ==
        constants.MODEL_NAME_SMITH_DUAL_ENCODER):
      # For the smith model, since the actual looped number of sentences per
      # document maybe smaller than max_doc_length_by_sentence, we need to
      # overwrite the lm weights with the actual lm weights returned by the
      # function.
      (masked_lm_loss_1, masked_lm_loss_2, masked_lm_example_loss_1,
       masked_lm_example_loss_2, masked_lm_weights_1, masked_lm_weights_2,
       masked_sent_lm_loss_1, masked_sent_lm_loss_2,
       masked_sent_per_example_loss_1, masked_sent_per_example_loss_2,
       masked_sent_weight_1, masked_sent_weight_2, seq_embed_1, seq_embed_2,
       input_sent_embed_1, input_sent_embed_2, output_sent_embed_1,
       output_sent_embed_2, siamese_loss,
       siamese_example_loss, siamese_logits) = build_smith_dual_encoder(
           dual_encoder_config, train_mode, is_training, input_ids_1,
           input_mask_1, masked_lm_positions_1, masked_lm_ids_1,
           masked_lm_weights_1, input_ids_2, input_mask_2,
           masked_lm_positions_2, masked_lm_ids_2, masked_lm_weights_2,
           use_one_hot_embeddings, documents_match_labels, debugging)
    else:
      raise ValueError(
          "Only smith_dual_encoder is supported: %s" %
          dual_encoder_config.encoder_config.model_name)

    # There are three different modes for training in the smith model.
    # 1. joint_train: a multi-task learning setting which combines the masked
    # word LM losses for doc1/doc2 and the siamese matching loss. If we add the
    # masked sentence LM task, we also add the masked sentence LM losses for
    # the two documents.
    # 2. pretrain: only contains the masked word LM losses for doc1/doc2. We
    # currently didn't include the NSP loss since NSP loss is not very useful
    # according to the XLNet/ RoBERTa/ ALBERT paper. If we add the masked
    # sentence LM task, we also add the masked sentence LM losses for the
    # two documents.
    # 3. finetune: fine tune the model with loaded pretrained checkpoint only
    # with the siamese matching loss. If we add the masked sentence LM task,
    # we also add the masked sentence LM losses for the two documents.
    if train_mode == constants.TRAIN_MODE_JOINT_TRAIN:
      total_loss = masked_lm_loss_1 + masked_lm_loss_2 + siamese_loss
    elif train_mode == constants.TRAIN_MODE_PRETRAIN:
      total_loss = masked_lm_loss_1 + masked_lm_loss_2
    elif train_mode == constants.TRAIN_MODE_FINETUNE:
      total_loss = siamese_loss
    else:
      raise ValueError("Only joint_train, pretrain, finetune are supported.")
    # If we add the masked sentence LM task, we also add the masked sentence
    # LM losses for the two documents.
    if dual_encoder_config.encoder_config.use_masked_sentence_lm_loss:
      total_loss += (masked_sent_lm_loss_1 + masked_sent_lm_loss_2)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    init_checkpoint = dual_encoder_config.encoder_config.init_checkpoint
    # Load pretrained BERT checkpoints if there is a specified path.
    if init_checkpoint:
      tf.logging.info("**** Passed pretrained BERT checkpoint = %s ****",
                      init_checkpoint)
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ", *INIT_RANDOMLY*"
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)
    output_spec = None
    predicted_score = tf.sigmoid(siamese_logits)
    predicted_class = tf.round(predicted_score)

    if dual_encoder_config.encoder_config.model_name == constants.MODEL_NAME_SMITH_DUAL_ENCODER:
      _, prediction_dict = utils.get_export_outputs_prediction_dict_smith_de(
          seq_embed_1, seq_embed_2, predicted_score, predicted_class,
          documents_match_labels, input_sent_embed_1, input_sent_embed_2,
          output_sent_embed_1, output_sent_embed_2)
    else:
      raise ValueError("Unsupported model: %s" %
                       dual_encoder_config.encoder_config.model_name)

    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = optimization.create_optimizer(total_loss, learning_rate,
                                               num_train_steps,
                                               num_warmup_steps, use_tpu)
      output_spec = tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)

    elif mode == tf.estimator.ModeKeys.EVAL:
      if (train_mode == constants.TRAIN_MODE_JOINT_TRAIN or
          train_mode == constants.TRAIN_MODE_PRETRAIN):
        eval_metrics = (metric_fns.metric_fn_pretrain, [
            masked_lm_example_loss_1, masked_lm_weights_1,
            masked_sent_per_example_loss_1, masked_sent_weight_1,
            masked_lm_example_loss_2, masked_lm_weights_2,
            masked_sent_per_example_loss_2, masked_sent_weight_2,
            predicted_class, documents_match_labels, is_real_example
        ])
      elif train_mode == constants.TRAIN_MODE_FINETUNE:
        eval_metrics = (metric_fns.metric_fn_finetune, [
            predicted_class, documents_match_labels, siamese_example_loss,
            is_real_example
        ])
      else:
        raise ValueError("Only joint_train, pretrain, finetune are supported.")
      output_spec = tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)

    elif mode == tf.estimator.ModeKeys.PREDICT:
      output_spec = tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode, predictions=prediction_dict, scaffold_fn=scaffold_fn)
    else:
      raise ValueError("Only TRAIN, EVAL, PREDICT modes are supported: %s" %
                       mode)

    return output_spec
    def _build_outputs(self, images, labels, mode):
        is_training = (mode == mode_keys.TRAIN)

        if 'anchor_boxes' in labels:
            anchor_boxes = labels['anchor_boxes']
        else:
            anchor_boxes = anchor.Anchor(
                self._params.architecture.min_level,
                self._params.architecture.max_level,
                self._params.anchor.num_scales,
                self._params.anchor.aspect_ratios,
                self._params.anchor.anchor_size,
                images.get_shape().as_list()[1:3]).multilevel_boxes

            batch_size = tf.shape(images)[0]
            for level in anchor_boxes:
                anchor_boxes[level] = tf.tile(
                    tf.expand_dims(anchor_boxes[level], 0), [batch_size, 1, 1])

        backbone_features = self._backbone_fn(images, is_training=is_training)
        fpn_features = self._fpn_fn(backbone_features, is_training=is_training)
        cls_outputs, box_outputs = self._retinanet_head_fn(
            fpn_features, is_training=is_training)
        # Shapemask mask prediction.
        if is_training:
            boxes = labels['mask_boxes']
            outer_boxes = labels['mask_outer_boxes']
            classes = labels['mask_classes']
        else:
            detection_results = self._generate_detections_fn(
                box_outputs, cls_outputs, anchor_boxes,
                labels['image_info'][:, 1:2, :])
            boxes = detection_results['detection_boxes']
            scores = detection_results['detection_scores']
            classes = detection_results['detection_classes']
            valid_detections = detection_results['num_detections']

            # Use list as input to avoide segmentation fault on TPU.
            image_size = images.get_shape().as_list()[1:3]
            outer_boxes = box_utils.compute_outer_boxes(
                tf.reshape(boxes, [-1, 4]),
                image_size,
                scale=self._outer_box_scale)
            outer_boxes = tf.reshape(outer_boxes, tf.shape(boxes))
            classes = tf.cast(classes, tf.int32)

        instance_features, prior_masks = self._shape_prior_head_fn(
            fpn_features, boxes, outer_boxes, classes, is_training)
        coarse_mask_logits = self._coarse_mask_fn(instance_features,
                                                  prior_masks, classes,
                                                  is_training)
        fine_mask_logits = self._fine_mask_fn(instance_features,
                                              coarse_mask_logits, classes,
                                              is_training)
        model_outputs = {
            'cls_outputs': cls_outputs,
            'box_outputs': box_outputs,
            'fine_mask_logits': fine_mask_logits,
            'coarse_mask_logits': coarse_mask_logits,
            'prior_masks': prior_masks,
            'fpn_features': fpn_features,
        }

        if not is_training:
            model_outputs.update({
                'num_detections':
                valid_detections,
                'detection_boxes':
                boxes,
                'detection_outer_boxes':
                outer_boxes,
                'detection_masks':
                tf.sigmoid(fine_mask_logits),
                'detection_classes':
                tf.cast(classes, dtype=tf.int32),
                'detection_scores':
                scores,
            })
        return model_outputs
def update_conv_routing_fast(wx, input_activation, activation_biases,
                             sigma_biases, logit_shape, num_out_atoms,
                             input_dim, num_routing, output_dim, final_beta,
                             min_var, stride, layer_name):
    """Fast Convolutional Routing with EM for Mixture of Gaussians.

  The main difference with conv_routing is replacing extract_image_patches with
  utils.kernel_tile which uses a special conv-deconv operation.
  Args:
   wx: [batch, indim, outdim, outatom, height, width, kernel, kernel]
   input_activation: [batch, indim, 1, 1, height, width, kernel, kernel]
   activation_biases: [1, 1, outdim, 1, height, width]
   sigma_biases: [1, 1, outdim, 1, height, width]
   logit_shape: [indim, outdim, 1, height, width, kernel, kernel]
   num_out_atoms: number of atoms in each capsule, e.g. 9 or 16.
   input_dim: number of input capsule types, e.g. 32.
   num_routing: number of routing iterations, e.g. 3.
   output_dim: number of output capsule types, e.g. 32.
   final_beta: the temperature for making routing factors sharper.
   min_var: minimum variance for each capsule to avoid NaNs.
   stride: the stride with which wx was calculated, e.g. 2 or 1.
   layer_name: the name of this layer, e.g. conv_capsule1.

  Returns:
    out_activation and out_center: final activation and capsule values.
  """

    # prior = utils.bias_variable([1] + logit_shape, name='prior')
    tf.logging.info(
        'update_conv_routing_fast: Wx=%s act=%s act_bias=%s sigma_bias=%s logit_shape=%s',
        wx, input_activation, activation_biases, sigma_biases, logit_shape)
    with tf.name_scope('update_conv_routing_fast'):

        # With known shapes, these could all be replaced with tf.zeros
        with tf.name_scope('start_posterior'):
            start_posterior = tf.nn.softmax(tf.fill(
                tf.stack([
                    tf.shape(input_activation)[0], logit_shape[0],
                    logit_shape[1], logit_shape[2], logit_shape[3],
                    logit_shape[4], logit_shape[5], logit_shape[6]
                ]), 0.0),
                                            dim=2)
        with tf.name_scope('start_center'):
            start_center = tf.fill(
                tf.stack([
                    tf.shape(input_activation)[0], 1, output_dim,
                    num_out_atoms, logit_shape[3], logit_shape[4], 1, 1
                ]), 0.0)

        b = tf.shape(input_activation)[0]
        c = output_dim
        h = logit_shape[3]
        k = logit_shape[5]
        s = stride
        ih = h + (h - 1) * (s - 1) + (k - 1)
        tile_filter = np.zeros(shape=[k, k, 1, k * k], dtype=np.float32)
        for i in range(k):
            for j in range(k):
                tile_filter[i, j, :, i * k + j] = 1.0

        # Body of routing loop.
        def _body(i, posterior, center, wx, activation_biases, sigma_biases,
                  input_activation, tile_filter):
            """Body of EM while loop."""
            tf.logging.info('  Wx: %s', wx)

            beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32)))

            posterior = tf.Print(posterior, [
                layer_name, i, h, ih,
                tf.reduce_min(posterior),
                tf.reduce_max(posterior)
            ],
                                 message='posterior')
            # route: [outdim, height?, width?, batch, indim]
            with tf.name_scope('vote_conf'):
                vote_conf = posterior * input_activation
                vote_conf = tf.maximum(vote_conf, 0.0)

            # masses: [batch, 1, outdim, 1, height, width, 1, 1]
            with tf.name_scope('masses'):
                masses = tf.reduce_sum(vote_conf,
                                       axis=[1, -1, -2],
                                       keepdims=True,
                                       name='masses_calculation') + 0.0000001
            with tf.name_scope('preactivate_unrolled'):
                preactivate_unrolled = vote_conf * wx

            # center: [batch, 1, outdim, outatom, height, width]
            with tf.name_scope('center'):
                center = .9 * tf.reduce_sum(
                    preactivate_unrolled, axis=[1, -1, -2],
                    keepdims=True) / masses + .1 * center

            # Rematerialization to save GPU memory. (+22ms/-1.6GB)
            # @tf.contrib.layers.recompute_grad
            def compute_noise_and_variance(wx, center, vote_conf, masses):
                noise = tf.squared_difference(wx, center)
                variance = min_var + tf.reduce_sum(
                    vote_conf * noise,
                    axis=[1, -1, -2],
                    keepdims=True,
                    name='variance_calculation') / masses
                return noise, variance

            with tf.name_scope('compute_noise_and_variance'):
                noise, variance = compute_noise_and_variance(
                    wx, center, vote_conf, masses)

            with tf.name_scope('win'):
                log_variance = tf.log(variance)
                p_i = -1 * tf.reduce_sum(log_variance, axis=3, keepdims=True)
                log_2pi = tf.log(2 * math.pi)
                sigma_b = tf.log(sigma_biases * sigma_biases + min_var)
                win = masses * (p_i - num_out_atoms *
                                (sigma_b + log_2pi + 1.0))
            with tf.name_scope('logit'):
                logit = beta * (win - activation_biases * 50 * num_out_atoms)
            with tf.name_scope('activation_update'):
                activation_update = tf.minimum(
                    0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit)))
            with tf.name_scope('sigma_update'):
                log_det_sigma = -1 * p_i
                sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0
            with tf.name_scope('exp_update'):
                exp_update = tf.reduce_sum(noise / (2 * variance),
                                           axis=3,
                                           keep_dims=True)
            prior_update = tf.subtract(activation_update - sigma_update,
                                       exp_update,
                                       name='prior_update_sub')
            max_prior_update = tf.reduce_max(prior_update,
                                             axis=[2, 3, 4, 5, 6, 7],
                                             keepdims=True,
                                             name='max_prior_opdate')
            prior_normal = tf.add(prior_update, -1 * max_prior_update)
            prior_exp = tf.exp(prior_normal)
            prior_exp_out = tf.reduce_sum(prior_exp,
                                          axis=2,
                                          keepdims=True,
                                          name='prior_exp_out')
            prior_exp_reshape = tf.reshape(prior_exp_out, [-1, h, h, k * k],
                                           name='prior_exp_reshape')

            sum_prior = tf.nn.conv2d_transpose(prior_exp_reshape,
                                               tile_filter,
                                               output_shape=[b * c, ih, ih, 1],
                                               strides=[1, s, s, 1],
                                               padding='VALID')
            sum_prior = tf.maximum(1e-6, sum_prior)

            sum_prior_patch = utils.kernel_tile(sum_prior,
                                                k,
                                                s,
                                                1,
                                                name='sum_prior_patch')

            with utils.maybe_jit_scope(), tf.name_scope('posterior'):
                sum_prior_reshape = tf.reshape(
                    sum_prior_patch, [-1, input_dim, 1, 1, h, h, k, k])
                posterior = prior_exp / sum_prior_reshape

            return (i + 1, posterior, logit, center, masses)

        posterior, center = start_posterior, start_center
        for j in range(num_routing):
            with tf.name_scope('iter{}'.format(j)):
                tf.logging.info('iteration %d %s', j, '=' * 80)
                jj = tf.constant(j, dtype=tf.int32)
                _, posterior, activation, center, mass = _body(
                    jj, posterior, center, wx, activation_biases, sigma_biases,
                    input_activation, tile_filter)
        post, out_activation, out_center, out_mass = posterior, activation, center, mass

        with tf.name_scope('out_activation'):
            utils.activation_summary(tf.sigmoid(out_activation))
        with tf.name_scope('masses'):
            utils.activation_summary(tf.sigmoid(out_mass))
        with tf.name_scope('posterior'):
            utils.activation_summary(post)

    return out_activation, out_center
Exemple #30
0
def update_em_routing(wx, input_activation, activation_biases, sigma_biases,
                      logit_shape, num_out_atoms, num_routing, output_dim,
                      leaky, final_beta, min_var):
    """Fully connected routing with EM for Mixture of Gaussians."""
    # Wx: [batch, indim, outdim, outatom, height, width]
    # logit_shape: [indim, outdim, 1, height, width]
    # input_activations: [batch, indim, 1, 1, 1, 1]
    # activation_biases: [1, 1, outdim, 1, height, width]
    # prior = utils.bias_variable([1] + logit_shape, name='prior')
    update = tf.fill(
        tf.stack([
            tf.shape(input_activation)[0], logit_shape[0], logit_shape[1],
            logit_shape[2], logit_shape[3], logit_shape[4]
        ]), 0.0)
    out_activation = tf.fill(
        tf.stack([
            tf.shape(input_activation)[0], 1, output_dim, 1, logit_shape[3],
            logit_shape[4]
        ]), 0.0)
    out_center = tf.fill(
        tf.stack([
            tf.shape(input_activation)[0], 1, output_dim, num_out_atoms,
            logit_shape[3], logit_shape[4]
        ]), 0.0)

    def _body(i, update, activation, center):
        """Body of the EM while loop."""
        del activation
        beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32)))
        # beta = final_beta
        # route: [outdim, height?, width?, batch, indim]
        if leaky:
            posterior = layers.leaky_routing(update, output_dim)
        else:
            posterior = tf.nn.softmax(update, dim=2)
        vote_conf = posterior * input_activation
        # masses: [batch, 1, outdim, 1, height, width]
        masses = tf.reduce_sum(vote_conf, axis=1, keep_dims=True) + 0.00001
        preactivate_unrolled = vote_conf * wx
        # center: [batch, 1, outdim, outatom, height, width]
        center = .9 * tf.reduce_sum(preactivate_unrolled,
                                    axis=1,
                                    keep_dims=True) / masses + .1 * center

        noise = (wx - center) * (wx - center)
        variance = min_var + tf.reduce_sum(
            vote_conf * noise, axis=1, keep_dims=True) / masses
        log_variance = tf.log(variance)
        p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True)
        log_2pi = tf.log(2 * math.pi)
        win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0))
        logit = beta * (win - activation_biases * 5000)
        activation_update = tf.minimum(
            0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit)))
        # return activation, center
        log_det_sigma = tf.reduce_sum(log_variance, axis=3, keep_dims=True)
        sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0
        exp_update = tf.reduce_sum(noise / (2 * variance),
                                   axis=3,
                                   keep_dims=True)
        prior_update = activation_update - sigma_update - exp_update
        return (prior_update, logit, center)

    # activations = tf.TensorArray(
    #     dtype=tf.float32, size=num_routing, clear_after_read=False)
    # centers = tf.TensorArray(
    #     dtype=tf.float32, size=num_routing, clear_after_read=False)
    # updates = tf.TensorArray(
    #     dtype=tf.float32, size=num_routing, clear_after_read=False)
    # updates.write(0, prior_update)
    for i in range(num_routing):
        update, out_activation, out_center = _body(i, update, out_activation,
                                                   out_center)
    # for j in range(num_routing):
    #   _, prior_update, out_activation, out_center = _body(
    #       i, prior_update, start_activation, start_center)
    with tf.name_scope('out_activation'):
        utils.activation_summary(tf.sigmoid(out_activation))
    with tf.name_scope('noise'):
        utils.variable_summaries((wx - out_center) * (wx - out_center))
    with tf.name_scope('Wx'):
        utils.variable_summaries(wx)

    # for i in range(num_routing):
    #   utils.activation_summary(activations.read(i))
    # return activations.read(num_routing - 1), centers.read(num_routing - 1)
    return out_activation, out_center