コード例 #1
0
    def Lock(self):
        """Used to lock in the personalization."""
        lock_ops = [tf.no_op()]  # 什么都不做,仅做为点位符使用控制边界。

        if self.lowrank_adaptation:
            # compute the new W
            left_adapt = tf.squeeze(self.left_adapt)
            right_adapt = tf.squeeze(self.right_adapt)
            # final_w = self.W + tf.matmul(left_adapt, right_adapt)
            final_w = tf.add(tf.matmul(left_adapt, right_adapt),self.W)
            #self.lockedW = final_w
            #lock_ops.append(self.lockedW)
            # self.lockedW.assign(final_w)
            #lock = tf.assign(self.lockedW, final_w) #您可以使用tf.identity取消引用_ref类型
            self.lockedW = final_w
            lock2 = tf.identity(self.lockedW)
            lock_ops.append(lock2) #
        else:
            # self.lockedW = self.W
            # lock_ops.append(self.lockedW)
            lock3 = tf.assign(self.lockedW, self.W)
            lock4 = tf.identity(self.lockedW)
            lock_ops.append(lock4)

            # lock_ops.append(self.lockedW.assign(self.W))

        if self.mikolov_adapt:
            final_bias = tf.squeeze(self.bias + self.delta)
            lock_ops.append(self.lockedBias.assign(final_bias))
        else:
            lock_ops.append(self.lockedBias.assign(self.bias))

        # self.lock_op = tf.group(*lock_ops,name="lock_op") # 没有返回值,只是个op
        self.lock_op = tf.tuple(lock_ops,name="lock_op") #返回的是list of tensor。
コード例 #2
0
ファイル: t2t_attack.py プロジェクト: admariner/tensor2tensor
  def compute_accuracy(x, l, mask):
    """Compute model accuracy."""
    preds = ch_model.get_probs(x)
    preds = tf.squeeze(preds)
    preds = tf.argmax(preds, -1, output_type=l.dtype)

    _, acc_update_op = tf.metrics.accuracy(l, preds, weights=mask)

    if FLAGS.surrogate_attack:
      preds = sur_ch_model.get_probs(x)
      preds = tf.squeeze(preds)
      preds = tf.argmax(preds, -1, output_type=l.dtype)
      acc_update_op = tf.tuple((acc_update_op,
                                tf.metrics.accuracy(l, preds, weights=mask)[1]))

    sess.run(tf.initialize_local_variables())
    for i in range(FLAGS.eval_steps):
      tf.logging.info(
          "\tEvaluating batch [%d / %d]" % (i + 1, FLAGS.eval_steps))
      acc = sess.run(acc_update_op)
    if FLAGS.surrogate_attack:
      tf.logging.info("\tFinal acc: (%.4f, %.4f)" % (acc[0], acc[1]))
    else:
      tf.logging.info("\tFinal acc: %.4f" % acc)
    return acc
コード例 #3
0
    def test_make_is_span_maskable_features(self, seq_len,
                                            max_annotation_length,
                                            annotation_labels):
        np.random.seed(31415)
        annotation_labels = np.array(annotation_labels).astype(np.int32)
        batch_size, num_annotations = annotation_labels.shape
        annotation_begins = np.random.randint(
            seq_len, size=[batch_size, num_annotations], dtype=np.int32)
        annotation_length = np.random.randint(
            max_annotation_length,
            size=[batch_size, num_annotations],
            dtype=np.int32)
        annotation_ends = np.minimum(annotation_begins + annotation_length,
                                     seq_len - 1)

        is_annotation_mask_np = np.zeros((batch_size, seq_len), dtype=np.int32)
        is_annotation_cont_mask_np = np.zeros((batch_size, seq_len),
                                              dtype=np.int32)

        for i in range(batch_size):
            for j in range(seq_len):
                for k in range(num_annotations):
                    if (annotation_labels[i, k] != 0
                            and annotation_begins[i, k] <= j
                            and j <= annotation_ends[i, k]):
                        is_annotation_mask_np[i, j] = 1

        for i in range(batch_size):
            for j in range(seq_len):
                for k in range(num_annotations):
                    if (annotation_labels[i, k] != 0
                            and annotation_begins[i, k] + 1 <= j
                            and j <= annotation_ends[i, k]):
                        is_annotation_cont_mask_np[i, j] = 1

        is_annotation_mask_np = is_annotation_mask_np.reshape(-1)
        is_annotation_cont_mask_np = is_annotation_cont_mask_np.reshape(-1)

        def to_tensor(np_array):
            return tf.convert_to_tensor(np_array.reshape(-1), dtype=tf.int32)

        is_annotation_mask_tf_obj, is_annotation_cont_mask_tf_obj = (
            input_utils.make_is_span_maskable_features(
                batch_size,
                seq_len,
                num_annotations,
                to_tensor(annotation_begins),
                to_tensor(annotation_ends),
                to_tensor(annotation_labels),
            ))
        is_annotation_mask_tf, is_annotation_cont_mask_tf = self.evaluate(
            tf.tuple(
                (is_annotation_mask_tf_obj, is_annotation_cont_mask_tf_obj)))
        self.assertAllEqual(is_annotation_mask_np, is_annotation_mask_tf)
        self.assertAllEqual(is_annotation_cont_mask_np,
                            is_annotation_cont_mask_tf)
コード例 #4
0
ファイル: rnn.py プロジェクト: RevitaAI/Parser-v2
def birnn(cell,
          inputs,
          sequence_length,
          initial_state_fw=None,
          initial_state_bw=None,
          ff_keep_prob=1.,
          recur_keep_prob=1.,
          enforce_dropout=False,
          dtype=tf.float32,
          scope=None):
    """ """

    # Forward direction
    with tf.variable_scope(scope or 'BiRNN_FW') as fw_scope:
        output_fw, output_state_fw = rnn(cell,
                                         inputs,
                                         sequence_length,
                                         initial_state_fw,
                                         ff_keep_prob,
                                         recur_keep_prob,
                                         enforce_dropout,
                                         dtype,
                                         scope=fw_scope)

    # Backward direction
    rev_inputs = tf.reverse_sequence(inputs, sequence_length, 1, 0)
    with tf.variable_scope(scope or 'BiRNN_BW') as bw_scope:
        output_bw, output_state_bw = rnn(cell,
                                         rev_inputs,
                                         sequence_length,
                                         initial_state_bw,
                                         ff_keep_prob,
                                         recur_keep_prob,
                                         enforce_dropout,
                                         dtype,
                                         scope=bw_scope)
    output_bw = tf.reverse_sequence(output_bw, sequence_length, 1, 0)
    # Concat each of the forward/backward outputs
    outputs = tf.concat([output_fw, output_bw], 2)

    return outputs, tf.tuple([output_state_fw, output_state_bw])
コード例 #5
0
    def build(self, eta, loss, metrics):
        """Constructs the model's graph from the provided layers with the
            specified loss and metrics.
            
        Args:
            eta (float): A scalar representing the learning rate for
                stochastic gradient descent.
            loss (Layer): A layer used to construct the objective for
                stochastic gradient descent.
            metrics (list of Layers): A list of layers to use when
                evaluating model performance.
                
        """
        # This ensures that the graph we add our variables to the graph
        # unique to the model.
        with self.graph.as_default():
        
            self.X = tf.placeholder(name='X', shape=(self.m, None), dtype=tf.float32)
            self.Y = tf.placeholder(name='Y', shape=(self.n, None), dtype=tf.float32)

            for layer in self.layers + [loss] + metrics:
                layer.build()

            self.forward = self.build_forward(self.X)
            self.loss_forward = loss.build_forward(self.forward, self.Y)
            self.metrics_forward = tf.tuple([metric.build_forward(self.forward, self.Y) for metric in metrics])

            loss_backward = loss.build_backward()
            self.build_backward(loss_backward)

            self.build_sgd_step(eta)
            
            initializer = tf.global_variables_initializer()
        
        # This initializes the variables in our graph using the current 
        # instance session
        self.sess.run(initializer)
コード例 #6
0
    def __call__(self, dataset, moving_params=None):
        """"""

        vocabs = dataset.vocabs
        inputs = dataset.inputs
        targets = dataset.targets

        reuse = (moving_params is not None)
        self.tokens_to_keep3D = tf.expand_dims(
            tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2)
        self.sequence_lengths = tf.reshape(
            tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1])
        self.n_tokens = tf.reduce_sum(self.sequence_lengths)
        self.moving_params = moving_params

        word_inputs, pret_inputs = vocabs[0].embedding_lookup(
            inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params)
        tag_inputs = vocabs[1].embedding_lookup(
            inputs[:, :, 2], moving_params=self.moving_params)

        top_recur = self.embed_concat(word_inputs + pret_inputs, tag_inputs)
        for i in xrange(self.n_recur):
            with tf.variable_scope('RNN%d' % i, reuse=reuse):
                top_recur, _ = self.RNN(top_recur)

        top_mlp = top_recur
        with tf.variable_scope('MLP0', reuse=reuse):
            parse_mlp, rel_mlp = self.double_MLP(top_mlp, n_splits=2)

        with tf.variable_scope('Parses', reuse=reuse):
            parse_logits = tf.squeeze(self.linear_classifier(parse_mlp, 1))
            parse_output = self.output(parse_logits, targets[:, :, 1])
            if moving_params is None:
                predictions = targets[:, :, 1]
            else:
                predictions = parse_output['predictions']
        with tf.variable_scope('Rels', reuse=reuse):
            rel_logits, rel_logits_cond = self.conditional_linear_classifier(
                rel_mlp, len(vocabs[2]), predictions)
            rel_output = self.output(rel_logits, targets[:, :, 2])
            rel_output['probabilities'] = self.conditional_probabilities(
                rel_logits_cond, transpose=False)

        output = {}
        output['probabilities'] = tf.tuple(
            [parse_output['probabilities'], rel_output['probabilities']])
        output['predictions'] = tf.stack(
            [parse_output['predictions'], rel_output['predictions']])
        output['correct'] = parse_output['correct'] * rel_output['correct']
        output['tokens'] = parse_output['tokens']
        output['n_correct'] = tf.reduce_sum(output['correct'])
        output['n_tokens'] = self.n_tokens
        output['accuracy'] = output['n_correct'] / output['n_tokens']
        output['loss'] = parse_output['loss'] + rel_output['loss']

        output['embed'] = tf.stack([word_inputs, tag_inputs])
        output['recur'] = top_recur
        output['parse_mlp'] = parse_mlp
        output['rel_mlp'] = rel_mlp
        output['parse_logits'] = parse_logits
        output['rel_logits'] = rel_logits
        return output
コード例 #7
0
    def test_masked_lm_metrics(self, block_ids):
        np.random.seed(31415)
        if isinstance(block_ids, list):
            batch_size = len(block_ids)
            block_ids_np = np.array(block_ids).astype(np.int32)
        else:
            batch_size = block_ids
            block_ids_np = np.random.randint(10,
                                             size=[batch_size],
                                             dtype=np.int32)

        multi_block_mask_np = np.zeros(batch_size, dtype=np.float32)
        for i in range(batch_size):
            if block_ids_np[i] == 0:
                continue
            for j in range(batch_size):
                if i != j and block_ids_np[i] == block_ids_np[j]:
                    multi_block_mask_np[i] = 1
        single_block_mask_np = 1 - multi_block_mask_np
        mlm_loss_per_sample_np = np.random.random(batch_size).astype(
            np.float32)
        mlm_accuracy_per_sample_np = np.random.random(batch_size).astype(
            np.float32)
        mlm_weight_per_sample_np = np.random.random(batch_size).astype(
            np.float32)

        block_ids_tf = tf.compat.v1.placeholder_with_default(block_ids_np,
                                                             shape=[None])
        mlm_loss_per_sample_tf = tf.compat.v1.placeholder_with_default(
            mlm_loss_per_sample_np, shape=[None])
        mlm_accuracy_per_sample_tf = tf.compat.v1.placeholder_with_default(
            mlm_accuracy_per_sample_np, shape=[None])
        mlm_weight_per_sample_tf = tf.compat.v1.placeholder_with_default(
            mlm_weight_per_sample_np, shape=[None])

        metric_dict = metric_utils.masked_lm_metrics(
            mlm_loss_per_sample_tf,
            mlm_accuracy_per_sample_tf,
            mlm_weight_per_sample_tf,
            block_ids_tf,
            mlm_loss_per_entity_sample=None,
            mlm_accuracy_per_entity_sample=None,
            mlm_weight_per_entity_sample=None,
            mlm_loss_per_non_entity_sample=None,
            mlm_accuracy_per_non_entity_sample=None,
            mlm_weight_per_non_entity_sample=None,
            is_train=True,
            metrics_name="abracadabra")

        (masked_lm_loss, masked_lm_accuracy, masked_lm_loss_multi_blocks,
         masked_lm_loss_single_blocks, masked_lm_accuracy_multi_blocks,
         masked_lm_accuracy_single_blocks, pct_multi_blocks,
         pct_single_blocks) = self.evaluate(
             tf.tuple((metric_dict["abracadabra/mlm_loss"],
                       metric_dict["abracadabra/mlm_accuracy"],
                       metric_dict["abracadabra/mlm_loss_multi_blocks"],
                       metric_dict["abracadabra/mlm_loss_single_blocks"],
                       metric_dict["abracadabra/mlm_accuracy_multi_blocks"],
                       metric_dict["abracadabra/mlm_accuracy_single_blocks"],
                       metric_dict["abracadabra/pct_multi_blocks"],
                       metric_dict["abracadabra/pct_single_blocks"])))

        def weighted_avg(values, weights):
            return values.dot(weights) / (weights.sum() + 1e-5)

        self.assertNear(
            masked_lm_loss,
            weighted_avg(mlm_loss_per_sample_np, mlm_weight_per_sample_np),
            1e-5)
        self.assertNear(
            masked_lm_accuracy,
            weighted_avg(mlm_accuracy_per_sample_np, mlm_weight_per_sample_np),
            1e-5)

        mlm_weight_per_multi_block = mlm_weight_per_sample_np * multi_block_mask_np
        mlm_weight_per_single_block = mlm_weight_per_sample_np * single_block_mask_np
        self.assertNear(
            masked_lm_loss_multi_blocks,
            weighted_avg(mlm_loss_per_sample_np, mlm_weight_per_multi_block),
            1e-5)
        self.assertNear(
            masked_lm_loss_single_blocks,
            weighted_avg(mlm_loss_per_sample_np, mlm_weight_per_single_block),
            1e-5)
        self.assertNear(
            masked_lm_accuracy_multi_blocks,
            weighted_avg(mlm_accuracy_per_sample_np,
                         mlm_weight_per_multi_block), 1e-5)
        self.assertNear(
            masked_lm_accuracy_single_blocks,
            weighted_avg(mlm_accuracy_per_sample_np,
                         mlm_weight_per_single_block), 1e-5)
        self.assertNear(pct_multi_blocks, multi_block_mask_np.mean(), 1e-5)
        self.assertNear(pct_single_blocks, single_block_mask_np.mean(), 1e-5)
コード例 #8
0
    def __call__(self, dataset, moving_params=None):
        """"""

        vocabs = dataset.vocabs
        inputs = dataset.inputs
        targets = dataset.targets

        reuse = (moving_params is not None)
        self.tokens_to_keep3D = tf.expand_dims(
            tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2)
        self.sequence_lengths = tf.reshape(
            tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1])
        self.n_tokens = tf.reduce_sum(self.sequence_lengths)
        self.moving_params = moving_params

        word_inputs, pret_inputs = vocabs[0].embedding_lookup(
            inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params)

        top_recur = embed_inputs = self.embed_concat(word_inputs + pret_inputs)
        for i in xrange(self.n_recur):
            with tf.variable_scope('RNN%d' % i, reuse=reuse):
                top_recur, _ = self.RNN(top_recur)

        with tf.variable_scope('MLP', reuse=reuse):
            dep_mlp, head_mlp = self.MLP(top_recur,
                                         self.class_mlp_size +
                                         self.attn_mlp_size,
                                         n_splits=2)
            dep_arc_mlp, dep_rel_mlp = dep_mlp[:, :, :self.
                                               attn_mlp_size], dep_mlp[:, :,
                                                                       self.
                                                                       attn_mlp_size:]
            head_arc_mlp, head_rel_mlp = head_mlp[:, :, :self.
                                                  attn_mlp_size], head_mlp[:, :,
                                                                           self
                                                                           .
                                                                           attn_mlp_size:]

        with tf.variable_scope('Arcs', reuse=reuse):
            arc_logits = self.bilinear_classifier(dep_arc_mlp, head_arc_mlp)
            arc_output = self.output(arc_logits, targets[:, :, 1])
            if moving_params is None:
                predictions = targets[:, :, 1]
            else:
                predictions = arc_output['predictions']
        with tf.variable_scope('Rels', reuse=reuse):
            rel_logits, rel_logits_cond = self.conditional_bilinear_classifier(
                dep_rel_mlp, head_rel_mlp, len(vocabs[2]), predictions)
            rel_output = self.output(rel_logits, targets[:, :, 2])
            rel_output['probabilities'] = self.conditional_probabilities(
                rel_logits_cond)

        output = {}
        output['probabilities'] = tf.tuple(
            [arc_output['probabilities'], rel_output['probabilities']])
        output['predictions'] = tf.stack(
            [arc_output['predictions'], rel_output['predictions']])
        output['correct'] = arc_output['correct'] * rel_output['correct']
        output['tokens'] = arc_output['tokens']
        output['n_correct'] = tf.reduce_sum(output['correct'])
        output['n_tokens'] = self.n_tokens
        output['accuracy'] = output['n_correct'] / output['n_tokens']
        output['loss'] = arc_output['loss'] + rel_output['loss']
        if self.word_l2_reg > 0:
            output['loss'] += word_loss

        output['embed'] = embed_inputs
        output['recur'] = top_recur
        output['dep_arc'] = dep_arc_mlp
        output['head_dep'] = head_arc_mlp
        output['dep_rel'] = dep_rel_mlp
        output['head_rel'] = head_rel_mlp
        output['arc_logits'] = arc_logits
        output['rel_logits'] = rel_logits
        return output
コード例 #9
0
    def test_language_model_test(self, num_positions, padding_token_id,
                                 use_label_weights, use_entity_mask, seed):
        np.random.seed(seed)

        seq_length = 13
        batch_size = 7
        vocab_size = 11
        hidden_size = 3
        embedding_size = 5

        embedding_table_np = np.random.random(
            (vocab_size, embedding_size)).astype(np.float32)
        embedding_table = tf.compat.v1.placeholder_with_default(
            embedding_table_np, shape=[vocab_size, embedding_size])

        input_tensor_np = np.random.random(
            (batch_size, seq_length, hidden_size)).astype(np.float32)
        input_tensor = tf.compat.v1.placeholder_with_default(
            input_tensor_np, shape=[None, None, hidden_size])

        num_labels_ids = num_positions or seq_length
        label_ids_np = np.random.randint(vocab_size,
                                         size=[batch_size, num_labels_ids],
                                         dtype=np.int32)
        label_ids = tf.compat.v1.placeholder_with_default(
            label_ids_np, shape=[None, num_labels_ids])

        if num_positions:
            positions_np = np.random.randint(seq_length,
                                             size=[batch_size, num_positions],
                                             dtype=np.int32)
            positions = tf.compat.v1.placeholder_with_default(
                positions_np, shape=[None, num_positions])
        else:
            positions = None

        if padding_token_id is not None:
            pad_mask = (label_ids_np != padding_token_id).astype(np.float32)
        else:
            pad_mask = np.ones((batch_size, num_labels_ids))

        if use_label_weights:
            label_weights_np = np.random.random(
                (batch_size, num_labels_ids)).astype(np.float32)
            label_weights = tf.compat.v1.placeholder_with_default(
                label_weights_np, shape=[None, num_labels_ids])
        else:
            label_weights_np = np.ones((batch_size, num_labels_ids))
            label_weights = None
        label_weights_np *= pad_mask

        if use_entity_mask:
            entity_mask_np = np.random.binomial(1,
                                                0.5,
                                                size=(batch_size,
                                                      num_labels_ids))
            entity_mask = tf.compat.v1.placeholder_with_default(
                entity_mask_np.astype(np.float32),
                shape=[None, num_labels_ids])
            non_entity_mask = 1 - entity_mask
        else:
            entity_mask = None
            non_entity_mask = None

        loss_fn = losses.LanguageModelLoss(embedding_table,
                                           activation="relu",
                                           hidden_size=hidden_size)

        loss_obj = loss_fn(input_tensor, label_ids, positions, label_weights,
                           padding_token_id, entity_mask, non_entity_mask)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        self.evaluate(init_op)

        self.assertEqual(
            loss_fn.linear_fn.bias.name,
            "language_model_loss/cls/predictions/transform/dense/bias:0")
        self.assertEqual(
            loss_fn.linear_fn.kernel.name,
            "language_model_loss/cls/predictions/transform/dense/kernel:0")

        weight_np = self.evaluate(loss_fn.linear_fn.kernel)

        if num_positions:
            input_tensor_np_new = np.zeros(
                (batch_size, num_positions, hidden_size))
            for i in range(batch_size):
                for j in range(num_positions):
                    input_tensor_np_new[i,
                                        j] = input_tensor_np[i,
                                                             positions_np[i,
                                                                          j]]
            input_tensor_np = input_tensor_np_new
        x = np.dot(
            input_tensor_np.reshape(batch_size * num_labels_ids, hidden_size),
            weight_np)
        x = np.maximum(x, 0)
        x -= x.mean(axis=1, keepdims=True)
        var_x = (x**2).mean(axis=1, keepdims=True)
        x /= np.sqrt(var_x + 0.001)
        logits = np.dot(x, np.transpose(embedding_table_np))
        log_probs = np.log(scipy.special.softmax(logits, axis=1)).reshape(
            batch_size, num_labels_ids, vocab_size)

        loss_np = 0
        mlm_loss_per_sample_np = np.zeros(batch_size)
        mlm_accuracy_per_sample_np = np.zeros(batch_size)
        mlm_loss_per_entity_sample_np = np.zeros(batch_size)
        mlm_accuracy_per_entity_sample_np = np.zeros(batch_size)
        mlm_loss_per_non_entity_sample_np = np.zeros(batch_size)
        mlm_accuracy_per_non_entity_sample_np = np.zeros(batch_size)
        for i in range(batch_size):
            for j in range(num_labels_ids):
                current_loss = -log_probs[i, j, label_ids_np[i, j]]
                current_loss *= label_weights_np[i, j]
                current_accuracy = int(
                    np.argmax(log_probs[i, j]) == label_ids_np[i, j])
                current_accuracy *= label_weights_np[i, j]
                loss_np += current_loss
                mlm_loss_per_sample_np[i] += current_loss
                mlm_accuracy_per_sample_np[i] += current_accuracy
                if use_entity_mask:
                    if entity_mask_np[i, j] == 1:
                        mlm_loss_per_entity_sample_np[i] += current_loss
                        mlm_accuracy_per_entity_sample_np[
                            i] += current_accuracy
                    else:
                        mlm_loss_per_non_entity_sample_np[i] += current_loss
                        mlm_accuracy_per_non_entity_sample_np[
                            i] += current_accuracy
        loss_np /= (label_weights_np.sum() + 1e-5)
        mlm_weight_per_sample_np = label_weights_np.sum(axis=1)
        mlm_loss_per_sample_np /= (mlm_weight_per_sample_np + 1e-5)
        mlm_accuracy_per_sample_np /= (mlm_weight_per_sample_np + 1e-5)
        if use_entity_mask:
            mlm_loss_per_entity_sample_np /= (
                (label_weights_np * entity_mask_np).sum(axis=1) + 1e-5)
            mlm_accuracy_per_entity_sample_np /= (
                (label_weights_np * entity_mask_np).sum(axis=1) + 1e-5)
            mlm_loss_per_non_entity_sample_np /= (
                (label_weights_np * (1 - entity_mask_np)).sum(axis=1) + 1e-5)
            mlm_accuracy_per_non_entity_sample_np /= (
                (label_weights_np * (1 - entity_mask_np)).sum(axis=1) + 1e-5)

        if use_entity_mask:
            (loss, mlm_loss_per_sample, mlm_accuracy_per_sample,
             mlm_weight_per_sample, mlm_loss_per_entity_sample,
             mlm_accuracy_per_entity_sample, mlm_weight_per_entity_sample,
             mlm_loss_per_non_entity_sample,
             mlm_accuracy_per_non_entity_sample,
             mlm_weight_per_non_entity_sample) = self.evaluate(
                 tf.tuple((loss_obj.loss, loss_obj.mlm_loss_per_sample,
                           loss_obj.mlm_accuracy_per_sample,
                           loss_obj.mlm_weight_per_sample,
                           loss_obj.mlm_loss_per_entity_sample,
                           loss_obj.mlm_accuracy_per_entity_sample,
                           loss_obj.mlm_weight_per_entity_sample,
                           loss_obj.mlm_loss_per_non_entity_sample,
                           loss_obj.mlm_accuracy_per_non_entity_sample,
                           loss_obj.mlm_weight_per_non_entity_sample)))
        else:
            (loss, mlm_loss_per_sample, mlm_accuracy_per_sample,
             mlm_weight_per_sample) = self.evaluate(
                 tf.tuple((loss_obj.loss, loss_obj.mlm_loss_per_sample,
                           loss_obj.mlm_accuracy_per_sample,
                           loss_obj.mlm_weight_per_sample)))

        self.assertAllEqual(loss.shape, [])
        self.assertNear(loss, loss_np, err=1e-4)

        self.assertAllEqual(mlm_loss_per_sample.shape, [batch_size])
        self.assertArrayNear(mlm_loss_per_sample,
                             mlm_loss_per_sample_np,
                             err=1e-4)

        self.assertAllEqual(mlm_accuracy_per_sample.shape, [batch_size])
        self.assertArrayNear(mlm_accuracy_per_sample,
                             mlm_accuracy_per_sample_np,
                             err=1e-4)
        self.assertAllEqual(mlm_weight_per_sample.shape, [batch_size])
        self.assertArrayNear(mlm_accuracy_per_sample,
                             mlm_accuracy_per_sample_np,
                             err=1e-4)

        if use_entity_mask:
            self.assertArrayNear(mlm_weight_per_entity_sample,
                                 (label_weights_np *
                                  entity_mask_np).sum(axis=1),
                                 err=1e-4)
            self.assertArrayNear(mlm_loss_per_entity_sample,
                                 mlm_loss_per_entity_sample_np,
                                 err=1e-4)
            self.assertArrayNear(mlm_accuracy_per_entity_sample,
                                 mlm_accuracy_per_entity_sample_np,
                                 err=1e-4)
            self.assertArrayNear(mlm_weight_per_non_entity_sample,
                                 (label_weights_np *
                                  (1 - entity_mask_np)).sum(axis=1),
                                 err=1e-4)
            self.assertArrayNear(mlm_loss_per_non_entity_sample,
                                 mlm_loss_per_non_entity_sample_np,
                                 err=1e-4)
            self.assertArrayNear(mlm_accuracy_per_non_entity_sample,
                                 mlm_accuracy_per_non_entity_sample_np,
                                 err=1e-4)
コード例 #10
0
    def train(imPath, logPath, modelPath, pmPath, nTrain, nValid, nTest,
              restoreVariables, nSteps, gpuIndex, testPMIndex):
        os.environ['CUDA_VISIBLE_DEVICES'] = '%d' % gpuIndex

        outLogPath = logPath
        trainWriterPath = pathjoin(logPath, 'Train')
        validWriterPath = pathjoin(logPath, 'Valid')
        outModelPath = pathjoin(modelPath, 'model.ckpt')
        outPMPath = pmPath

        batchSize = UNet2D.hp['batchSize']
        imSize = UNet2D.hp['imSize']
        nChannels = UNet2D.hp['nChannels']
        nClasses = UNet2D.hp['nClasses']

        # --------------------------------------------------
        # data
        # --------------------------------------------------

        Train = np.zeros((nTrain, imSize, imSize, nChannels))
        Valid = np.zeros((nValid, imSize, imSize, nChannels))
        Test = np.zeros((nTest, imSize, imSize, nChannels))
        LTrain = np.zeros((nTrain, imSize, imSize, nClasses))
        LValid = np.zeros((nValid, imSize, imSize, nClasses))
        LTest = np.zeros((nTest, imSize, imSize, nClasses))

        print('loading data, computing mean / st dev')
        if not os.path.exists(modelPath):
            os.makedirs(modelPath)
        if restoreVariables:
            datasetMean = loadData(pathjoin(modelPath, 'datasetMean.data'))
            datasetStDev = loadData(pathjoin(modelPath, 'datasetStDev.data'))
        else:
            datasetMean = 0
            datasetStDev = 0
            for iSample in range(nTrain + nValid + nTest):
                I = im2double(tifread('%s/I%05d_Img.tif' % (imPath, iSample)))
                datasetMean += np.mean(I)
                datasetStDev += np.std(I)
            datasetMean /= (nTrain + nValid + nTest)
            datasetStDev /= (nTrain + nValid + nTest)
            saveData(datasetMean, pathjoin(modelPath, 'datasetMean.data'))
            saveData(datasetStDev, pathjoin(modelPath, 'datasetStDev.data'))

        perm = np.arange(nTrain + nValid + nTest)
        np.random.shuffle(perm)

        for iSample in range(0, nTrain):
            path = '%s/I%05d_Img.tif' % (imPath, perm[iSample])
            im = im2double(tifread(path))
            Train[iSample, :, :, 0] = (im - datasetMean) / datasetStDev
            path = '%s/I%05d_Ant.tif' % (imPath, perm[iSample])
            im = tifread(path)
            for i in range(nClasses):
                LTrain[iSample, :, :, i] = (im == i + 1)

        for iSample in range(0, nValid):
            path = '%s/I%05d_Img.tif' % (imPath, perm[nTrain + iSample])
            im = im2double(tifread(path))
            Valid[iSample, :, :, 0] = (im - datasetMean) / datasetStDev
            path = '%s/I%05d_Ant.tif' % (imPath, perm[nTrain + iSample])
            im = tifread(path)
            for i in range(nClasses):
                LValid[iSample, :, :, i] = (im == i + 1)

        for iSample in range(0, nTest):
            path = '%s/I%05d_Img.tif' % (imPath,
                                         perm[nTrain + nValid + iSample])
            im = im2double(tifread(path))
            Test[iSample, :, :, 0] = (im - datasetMean) / datasetStDev
            path = '%s/I%05d_Ant.tif' % (imPath,
                                         perm[nTrain + nValid + iSample])
            im = tifread(path)
            for i in range(nClasses):
                LTest[iSample, :, :, i] = (im == i + 1)

        # --------------------------------------------------
        # optimization
        # --------------------------------------------------

        tfLabels = tf.placeholder("float",
                                  shape=[None, imSize, imSize, nClasses],
                                  name='labels')

        globalStep = tf.Variable(0, trainable=False)
        learningRate0 = 0.01
        decaySteps = 1000
        decayRate = 0.95
        learningRate = tf.train.exponential_decay(learningRate0,
                                                  globalStep,
                                                  decaySteps,
                                                  decayRate,
                                                  staircase=True)

        with tf.name_scope('optim'):
            loss = tf.reduce_mean(
                -tf.reduce_sum(tf.multiply(tfLabels, tf.log(UNet2D.nn)), 3))
            updateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            # optimizer = tf.train.MomentumOptimizer(1e-3,0.9)
            optimizer = tf.train.MomentumOptimizer(learningRate, 0.9)
            # optimizer = tf.train.GradientDescentOptimizer(learningRate)
            with tf.control_dependencies(updateOps):
                optOp = optimizer.minimize(loss, global_step=globalStep)

        with tf.name_scope('eval'):
            error = []
            for iClass in range(nClasses):
                labels0 = tf.reshape(
                    tf.to_int32(
                        tf.slice(tfLabels, [0, 0, 0, iClass],
                                 [-1, -1, -1, 1])),
                    [batchSize, imSize, imSize])
                predict0 = tf.reshape(
                    tf.to_int32(tf.equal(tf.argmax(UNet2D.nn, 3), iClass)),
                    [batchSize, imSize, imSize])
                correct = tf.multiply(labels0, predict0)
                nCorrect0 = tf.reduce_sum(correct)
                nLabels0 = tf.reduce_sum(labels0)
                error.append(1 -
                             tf.to_float(nCorrect0) / tf.to_float(nLabels0))
            errors = tf.tuple(error)

        # --------------------------------------------------
        # inspection
        # --------------------------------------------------

        with tf.name_scope('scalars'):
            tf.summary.scalar('avg_cross_entropy', loss)
            for iClass in range(nClasses):
                tf.summary.scalar('avg_pixel_error_%d' % iClass, error[iClass])
            tf.summary.scalar('learning_rate', learningRate)
        with tf.name_scope('images'):
            split0 = tf.slice(UNet2D.nn, [0, 0, 0, 0], [-1, -1, -1, 1])
            split1 = tf.slice(UNet2D.nn, [0, 0, 0, 1], [-1, -1, -1, 1])
            if nClasses > 2:
                split2 = tf.slice(UNet2D.nn, [0, 0, 0, 2], [-1, -1, -1, 1])
            tf.summary.image('pm0', split0)
            tf.summary.image('pm1', split1)
            if nClasses > 2:
                tf.summary.image('pm2', split2)
        merged = tf.summary.merge_all()

        # --------------------------------------------------
        # session
        # --------------------------------------------------

        saver = tf.train.Saver()
        sess = tf.Session(
            config=tf.ConfigProto(allow_soft_placement=True)
        )  # config parameter needed to save variables when using GPU

        if os.path.exists(outLogPath):
            shutil.rmtree(outLogPath)
        trainWriter = tf.summary.FileWriter(trainWriterPath, sess.graph)
        validWriter = tf.summary.FileWriter(validWriterPath, sess.graph)

        if restoreVariables:
            saver.restore(sess, outModelPath)
            print("Model restored.")
        else:
            sess.run(tf.global_variables_initializer())

        # --------------------------------------------------
        # train
        # --------------------------------------------------

        batchData = np.zeros((batchSize, imSize, imSize, nChannels))
        batchLabels = np.zeros((batchSize, imSize, imSize, nClasses))
        for i in range(nSteps):
            # train

            perm = np.arange(nTrain)
            np.random.shuffle(perm)

            for j in range(batchSize):
                batchData[j, :, :, :] = Train[perm[j], :, :, :]
                batchLabels[j, :, :, :] = LTrain[perm[j], :, :, :]

            summary, _ = sess.run(
                [merged, optOp],
                feed_dict={
                    UNet2D.tfData: batchData,
                    tfLabels: batchLabels,
                    UNet2D.tfTraining: 1
                })
            trainWriter.add_summary(summary, i)

            # validation

            perm = np.arange(nValid)
            np.random.shuffle(perm)

            for j in range(batchSize):
                batchData[j, :, :, :] = Valid[perm[j], :, :, :]
                batchLabels[j, :, :, :] = LValid[perm[j], :, :, :]

            summary, es = sess.run(
                [merged, errors],
                feed_dict={
                    UNet2D.tfData: batchData,
                    tfLabels: batchLabels,
                    UNet2D.tfTraining: 0
                })
            validWriter.add_summary(summary, i)

            e = np.mean(es)
            print('step %05d, e: %f' % (i, e))

            if i == 0:
                if restoreVariables:
                    lowestError = e
                else:
                    lowestError = np.inf

            if np.mod(i, 100) == 0 and e < lowestError:
                lowestError = e
                print("Model saved in file: %s" %
                      saver.save(sess, outModelPath))

        # --------------------------------------------------
        # test
        # --------------------------------------------------

        if not os.path.exists(outPMPath):
            os.makedirs(outPMPath)

        for i in range(nTest):
            j = np.mod(i, batchSize)

            batchData[j, :, :, :] = Test[i, :, :, :]
            batchLabels[j, :, :, :] = LTest[i, :, :, :]

            if j == batchSize - 1 or i == nTest - 1:

                output = sess.run(UNet2D.nn,
                                  feed_dict={
                                      UNet2D.tfData: batchData,
                                      tfLabels: batchLabels,
                                      UNet2D.tfTraining: 0
                                  })

                for k in range(j + 1):
                    pm = output[k, :, :, testPMIndex]
                    gt = batchLabels[k, :, :, testPMIndex]
                    im = np.sqrt(normalize(batchData[k, :, :, 0]))
                    imwrite(
                        np.uint8(255 * np.concatenate(
                            (im, np.concatenate((pm, gt), axis=1)), axis=1)),
                        '%s/I%05d.png' % (outPMPath, i - j + k + 1))

        # --------------------------------------------------
        # save hyper-parameters, clean-up
        # --------------------------------------------------

        saveData(UNet2D.hp, pathjoin(modelPath, 'hp.data'))

        trainWriter.close()
        validWriter.close()
        sess.close()
コード例 #11
0
 def __call__(self, dataset, moving_params=None):
   """"""
   
   vocabs = dataset.vocabs
   inputs = dataset.inputs
   targets = dataset.targets
   
   reuse = (moving_params is not None)
   self.tokens_to_keep3D = tf.expand_dims(tf.to_float(tf.greater(inputs[:,:,0], vocabs[0].ROOT)), 2)
   self.sequence_lengths = tf.reshape(tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1,1])
   self.n_tokens = tf.reduce_sum(self.sequence_lengths)
   self.moving_params = moving_params
   
   word_inputs, pret_inputs = vocabs[0].embedding_lookup(inputs[:,:,0], inputs[:,:,1], moving_params=self.moving_params)
   tag_inputs = vocabs[1].embedding_lookup(inputs[:,:,2], moving_params=self.moving_params)
   if self.add_to_pretrained and not self.char_based:
     word_inputs += pret_inputs
   if self.word_l2_reg > 0:
     unk_mask = tf.expand_dims(tf.to_float(tf.greater(inputs[:,:,1], vocabs[0].UNK)),2)
     word_loss = self.word_l2_reg*tf.nn.l2_loss((word_inputs - pret_inputs) * unk_mask)
   embed_inputs = self.embed_concat(word_inputs, tag_inputs)
   
   top_recur = embed_inputs
   recur_diag_bilin = False#self.recur_diag_bilin and tag_inputs.get_shape().as_list()[-1] == word_inputs.get_shape().as_list()[-1]
   for i in xrange(self.n_recur):
     with tf.variable_scope('RNN%d' % i, reuse=reuse):
       top_recur, _ = self.RNN(top_recur, recur_diag_bilin=recur_diag_bilin)
       recur_diag_bilin = self.recur_diag_bilin
   if self.attn_based:
     top_recur = self.soft_attn(top_recur, recur_diag_bilin=recur_diag_bilin)
     recur_diag_bilin = False
   
   with tf.variable_scope('Arcs', reuse=reuse):
     arc_logits = self.bilinear_classifier(top_recur,top_recur)
     arc_output = self.output(arc_logits, targets[:,:,1])
     if moving_params is None:
       predictions = targets[:,:,1]
     else:
       predictions = arc_output['predictions']
   with tf.variable_scope('Rels', reuse=reuse):
     rel_logits, rel_logits_cond = self.conditional_bilinear_classifier(top_recur, top_recur, len(vocabs[2]), predictions)
     rel_output = self.output(rel_logits, targets[:,:,2])
     rel_output['probabilities'] = self.conditional_probabilities(rel_logits_cond)
   
   output = {}
   output['probabilities'] = tf.tuple([arc_output['probabilities'],
                                       rel_output['probabilities']])
   output['predictions'] = tf.stack([arc_output['predictions'],
                                    rel_output['predictions']])
   output['correct'] = arc_output['correct'] * rel_output['correct']
   output['tokens'] = arc_output['tokens']
   output['n_correct'] = tf.reduce_sum(output['correct'])
   output['n_tokens'] = self.n_tokens
   output['accuracy'] = output['n_correct'] / output['n_tokens']
   output['loss'] = arc_output['loss'] + rel_output['loss'] 
   if self.word_l2_reg > 0:
     output['loss'] += word_loss
   
   output['embed'] = embed_inputs
   output['recur'] = top_recur
   output['arc_logits'] = arc_logits
   output['rel_logits'] = rel_logits
   return output