コード例 #1
0
 def get_input_tensor(self,
                      embed_keep_prob=None,
                      variable_scope=None,
                      reuse=True):
     """"""
     #pdb.set_trace()
     embed_keep_prob = embed_keep_prob or self.embed_keep_prob
     #pdb.set_trace()
     with tf.variable_scope('elmo_vocab'):
         if self.strategy == 'three_layers':
             weight = tf.get_variable('softmax_weight',
                                      shape=[3],
                                      initializer=tf.ones_initializer)
             softmax_weight = tf.nn.softmax(weight)
             layer = tf.einsum('nabc,b->nac', self.placeholder, weight)
         elif self.strategy == 'two_layers':
             weight = tf.get_variable('weight', initializer=0.5)
             layer = self.placeholder[:, :, -1] * weight + (
                 1 - weight) * self.placeholder[:, :, -2]
         elif self.strategy == 'third_layer':
             layer = self.placeholder[:, :, -1, :]
         elif self.strategy == 'second_layer':
             layer = self.placeholder[:, :, -2, :]
         else:
             assert 0, 'please specify the strategy'
         scalar = tf.get_variable('scalar',
                                  shape=[1],
                                  initializer=tf.ones_initializer)
         layer = scalar * layer
         layer = classifiers.hidden(layer,
                                    self.linear_size,
                                    hidden_func=self.hidden_func)
     return layer
コード例 #2
0
 def get_input_tensor(self, embed_keep_prob=None, variable_scope=None, reuse=True):
   """"""
   #pdb.set_trace()
   embed_keep_prob = embed_keep_prob or self.embed_keep_prob
   #pdb.set_trace()
   outputs=self.placeholder
   with tf.variable_scope('dephead_bert_vocab'):
     layer=classifiers.hidden(outputs,self.linear_size,hidden_func=self.hidden_func)
   return layer
コード例 #3
0
ファイル: token_vocabs.py プロジェクト: JZXXX/Semi-SDP
 def get_linear_classifier(self, layer, token_weights, last_output=None, variable_scope=None, reuse=False):
   """"""
   
   if last_output is not None:
     n_layers = 0
     layer = last_output['hidden_layer']
     recur_layer = last_output['recur_layer']
   else:
     n_layers = self.n_layers
     recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   with tf.variable_scope(variable_scope or self.classname):
     for i in six.moves.range(0, n_layers):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, self.hidden_size,
                                   hidden_func=self.hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('Classifier'):
       logits = classifiers.linear_classifier(layer, len(self), hidden_keep_prob=hidden_keep_prob)
   targets = self.placeholder
  
   #-----------------------------------------------------------
   # Compute probabilities/cross entropy
   # (n x m x c) -> (n x m x c)
   probabilities = tf.nn.softmax(logits)
   # (n x m), (n x m x c), (n x m) -> ()
   loss = tf.losses.sparse_softmax_cross_entropy(targets, logits, weights=token_weights)
   
   #-----------------------------------------------------------
   # Compute predictions/accuracy
   # (n x m x c) -> (n x m)
   predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
   # (n x m) (*) (n x m) -> (n x m)
   correct_tokens = nn.equal(targets, predictions) * token_weights
   # (n x m) -> (n)
   tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
   # (n x m) -> (n)
   correct_tokens_per_sequence = tf.reduce_sum(correct_tokens, axis=-1)
   # (n), (n) -> (n)
   correct_sequences = nn.equal(tokens_per_sequence, correct_tokens_per_sequence)
   
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer 
   outputs['hidden_layer'] = layer
   outputs['targets'] = targets
   outputs['probabilities'] = probabilities
   outputs['loss'] = loss
   
   outputs['predictions'] = predictions
   outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
   outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
   return outputs
コード例 #4
0
	def forward(self, layers, decoder_embeddings, sentence_feat, token_weights, sequence_length, input_feed=None, target_copy_hidden_states=None, coverage=None,\
				 variable_scope=None, reuse=False, debug=False):
		"""
		decoder embeddings [batch_size, decoder_seq_length, embedding_size]
		layers: outputs of BiLSTM [batch_size, seq_length, hidden_size]
		sentence_feat: the final output state of RNN [num_encoder_layers, batch_size, hidden_size]
		token_weights: mask
		input_feed: None or [batch_size, 1, hidden_size]
		target_copy_hidden_states: None or [batch_size, seq_length, hidden_size]
		coverage: None or [batch_size, 1, encode_seq_length]
		"""

		with tf.variable_scope('Seq2SeqDecoder'):
			with tf.variable_scope('linear'):
				sentence_feat = classifiers.hidden(sentence_feat, self.recur_size,hidden_func=self.hidden_func,hidden_keep_prob=self.hidden_keep_prob)
			with tf.variable_scope('memory_linear'):
				layers = classifiers.hidden(layers, self.recur_size,hidden_func=self.hidden_func,hidden_keep_prob=self.hidden_keep_prob)
			with tf.variable_scope('embedding_linear'):
				decoder_embeddings = classifiers.hidden(decoder_embeddings, self.recur_size,hidden_func=self.hidden_func,hidden_keep_prob=self.hidden_keep_prob)
			result = seq2seq_decoder(self.cell,decoder_embeddings,layers,sequence_length,sentence_feat)
		return result
コード例 #5
0
 def get_input_tensor(self, embed_keep_prob=None, variable_scope=None, reuse=True):
   """"""
   #pdb.set_trace()
   embed_keep_prob = embed_keep_prob or self.embed_keep_prob
   #pdb.set_trace()
   if self.pretrained:
     outputs=self.placeholder
   else:
     if self.previous_layer:
       outputs=self.bert_model.get_all_encoder_layers()[-2]
     else:
       outputs=self.bert_model.get_sequence_output()
     mapping=self.placeholder['mapping']
     if self.strategy=="first_value":
       outputs=tf.batch_gather(outputs,mapping)*tf.cast((mapping>0),dtype=tf.float32)[:,:,None]
     elif self.strategy=="average":
       assert 0, "not implemented"
     else:
       assert 0, "please specify bert strategy"
   with tf.variable_scope('bert_vocab'):
     layer=classifiers.hidden(outputs,self.linear_size,hidden_func=self.hidden_func)
   return layer
コード例 #6
0
    def get_input_tensor(self,
                         embed_keep_prob=None,
                         nonzero_init=False,
                         variable_scope=None,
                         reuse=True):
        """"""

        embed_keep_prob = embed_keep_prob or self.embed_keep_prob
        conv_keep_prob = 1. if reuse else self.conv_keep_prob
        recur_keep_prob = 1. if reuse else self.recur_keep_prob
        output_keep_prob = 1. if reuse else self.output_keep_prob

        layers = []
        with tf.variable_scope(variable_scope or self.classname) as scope:
            for i, placeholder in enumerate(
                    self._multibucket.get_placeholders()):
                if i:
                    scope.reuse_variables()
                #with tf.device('/gpu:0'):
                #with tf.device('/gpu:{}'.format(i)):
                with tf.variable_scope('Embeddings'):
                    layer = embeddings.token_embedding_lookup(
                        len(self),
                        self.embed_size,
                        placeholder,
                        nonzero_init=True,
                        reuse=reuse)

                seq_lengths = tf.count_nonzero(placeholder,
                                               axis=1,
                                               dtype=tf.int32)
                for j in six.moves.range(self.n_layers):
                    conv_width = self.first_layer_conv_width if not j else self.conv_width
                    with tf.variable_scope('RNN-{}'.format(j)):
                        layer, final_states = recurrent.directed_RNN(
                            layer,
                            self.recur_size,
                            seq_lengths,
                            bidirectional=self.bidirectional,
                            recur_cell=self.recur_cell,
                            conv_width=conv_width,
                            recur_func=self.recur_func,
                            conv_keep_prob=conv_keep_prob,
                            recur_keep_prob=recur_keep_prob,
                            cifg=self.cifg,
                            highway=self.highway,
                            highway_func=self.highway_func,
                            bilin=self.bilin)

                if not self.squeeze_type.startswith('gated'):
                    if self.squeeze_type == 'linear_attention':
                        with tf.variable_scope('Attention'):
                            _, layer = classifiers.linear_attention(
                                layer, hidden_keep_prob=output_keep_prob)
                    elif self.squeeze_type == 'final_hidden':
                        layer, _ = tf.split(final_states, 2, axis=-1)
                    elif self.squeeze_type == 'final_cell':
                        _, layer = tf.split(final_states, 2, axis=-1)
                    elif self.squeeze_type == 'final_state':
                        layer = final_states
                    elif self.squeeze_type == 'reduce_max':
                        layer = tf.reduce_max(layer, axis=-2)
                    with tf.variable_scope('Linear'):
                        layer = classifiers.hidden(
                            layer,
                            self.output_size,
                            hidden_func=self.output_func,
                            hidden_keep_prob=output_keep_prob)
                else:
                    with tf.variable_scope('Attention'):
                        attn, layer = classifiers.deep_linear_attention(
                            layer,
                            self.output_size,
                            hidden_func=nonlin.identity,
                            hidden_keep_prob=output_keep_prob)
                    if self.squeeze_type == 'gated_reduce_max':
                        layer = tf.nn.relu(tf.reduce_max(
                            layer, axis=-2)) + .1 * tf.reduce_sum(
                                layer, axis=-2) / (tf.count_nonzero(
                                    layer, axis=-2, dtype=tf.float32) + 1e-12)
                    elif self.squeeze_type == 'gated_reduce_sum':
                        layer = self.output_func(tf.reduce_sum(layer, axis=-2))
                #layer = tf.tf.Print(layer, [tf.shape(layer)])
                layers.append(layer)
            # Concatenate all the buckets' embeddings
            layer = tf.concat(layers, 0)
            # Put them in the right order, creating the embedding matrix
            layer = tf.nn.embedding_lookup(layer,
                                           self._multibucket.placeholder)
            #layer = tf.nn.embedding_lookup(layers, self._multibucket.placeholder, partition_strategy='div')
            #layer = tf.Print(layer, [tf.shape(layer)])
            # Get the embeddings from the embedding matrix
            layer = tf.nn.embedding_lookup(layer, self.placeholder)

            if embed_keep_prob < 1:
                layer = self.drop_func(layer, embed_keep_prob)
        return layer
コード例 #7
0
ファイル: index_vocabs.py プロジェクト: JZXXX/Semi-SDP
 def get_bilinear_classifier(self, layer, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   hidden_func = self.hidden_func
   hidden_size = self.hidden_size
   add_linear = self.add_linear
   linearize = self.linearize
   distance = self.distance
   n_splits = 2*(1+linearize+distance)
   with tf.variable_scope(variable_scope or self.field):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, n_splits*hidden_size,
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top'):
       layers = classifiers.hiddens(layer, n_splits*[hidden_size],
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     if linearize:
       lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0)
     if distance:
       dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Attention'):
       if self.diagonal:
         logits, _ = classifiers.diagonal_bilinear_attention(
           layer1, layer2, 
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
         if linearize:
           with tf.variable_scope('Linearization'):
             lin_logits = classifiers.diagonal_bilinear_discriminator(
               lin_layer1, lin_layer2,
               hidden_keep_prob=hidden_keep_prob,
               add_linear=add_linear)
         if distance:
           with tf.variable_scope('Distance'):
             dist_lamda = 1+tf.nn.softplus(classifiers.diagonal_bilinear_discriminator(
               dist_layer1, dist_layer2,
               hidden_keep_prob=hidden_keep_prob,
               add_linear=add_linear))
       else:
         logits, _ = classifiers.bilinear_attention(
           layer1, layer2,
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
         if linearize:
           with tf.variable_scope('Linearization'):
             lin_logits = classifiers.bilinear_discriminator(
               lin_layer1, lin_layer2,
               hidden_keep_prob=hidden_keep_prob,
               add_linear=add_linear)
         if distance:
           with tf.variable_scope('Distance'):
             dist_lamda = 1+tf.nn.softplus(classifiers.bilinear_discriminator(
               dist_layer1, dist_layer2,
               hidden_keep_prob=hidden_keep_prob,
               add_linear=add_linear))
       
       #-----------------------------------------------------------
       # Process the targets
       targets = self.placeholder
       shape = tf.shape(layer1)
       batch_size, bucket_size = shape[0], shape[1]
       # (1 x m)
       ids = tf.expand_dims(tf.range(bucket_size), 0)
       # (1 x m) -> (1 x 1 x m)
       head_ids = tf.expand_dims(ids, -2)
       # (1 x m) -> (1 x m x 1)
       dep_ids = tf.expand_dims(ids, -1)
       if linearize:
         # Wherever the head is to the left
         # (n x m), (1 x m) -> (n x m)
         lin_targets = tf.to_float(tf.less(targets, ids))
         # cross-entropy of the linearization of each i,j pair
         # (1 x 1 x m), (1 x m x 1) -> (n x m x m)
         lin_ids = tf.tile(tf.less(head_ids, dep_ids), [batch_size, 1, 1])
         # (n x 1 x m), (n x m x 1) -> (n x m x m)
         lin_xent = -tf.nn.softplus(tf.where(lin_ids, -lin_logits, lin_logits))
         # add the cross-entropy to the logits
         # (n x m x m), (n x m x m) -> (n x m x m)
         logits += tf.stop_gradient(lin_xent)
       if distance:
         # (n x m) - (1 x m) -> (n x m)
         dist_targets = tf.abs(targets - ids)
         # KL-divergence of the distance of each i,j pair
         # (1 x 1 x m) - (1 x m x 1) -> (n x m x m)
         dist_ids = tf.to_float(tf.tile(tf.abs(head_ids - dep_ids), [batch_size, 1, 1]))+1e-12
         # (n x m x m), (n x m x m) -> (n x m x m)
         #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda)
         dist_kld = -tf.log((dist_ids - dist_lamda)**2/2 + 1)
         # add the KL-divergence to the logits
         # (n x m x m), (n x m x m) -> (n x m x m)
         logits += tf.stop_gradient(dist_kld)
       
       #-----------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m) + (m) -> (n x m)
       non_pads = tf.to_float(token_weights) + tf.to_float(tf.logical_not(tf.cast(tf.range(bucket_size), dtype=tf.bool)))
       # (n x m x m) o (n x 1 x m) -> (n x m x m)
       probabilities = tf.nn.softmax(logits) * tf.expand_dims(non_pads, -2)
       # (n x m), (n x m x m), (n x m) -> ()
       loss = tf.losses.sparse_softmax_cross_entropy(
         targets,
         logits,
         weights=token_weights)
       # (n x m) -> (n x m x m x 1)
       one_hot_targets = tf.expand_dims(tf.one_hot(targets, bucket_size), -1)
       # (n x m) -> ()
       n_tokens = tf.to_float(tf.reduce_sum(token_weights))
       if linearize:
         # (n x m x m) -> (n x m x 1 x m)
         lin_xent_reshaped = tf.expand_dims(lin_xent, -2)
         # (n x m x 1 x m) * (n x m x m x 1) -> (n x m x 1 x 1)
         lin_target_xent = tf.matmul(lin_xent_reshaped, one_hot_targets)
         # (n x m x 1 x 1) -> (n x m)
         lin_target_xent = tf.squeeze(lin_target_xent, [-1, -2])
         # (n x m), (n x m), (n x m) -> ()
         loss -= tf.reduce_sum(lin_target_xent*tf.to_float(token_weights)) / (n_tokens + 1e-12)
       if distance:
         # (n x m x m) -> (n x m x 1 x m)
         dist_kld_reshaped = tf.expand_dims(dist_kld, -2)
         # (n x m x 1 x m) * (n x m x m x 1) -> (n x m x 1 x 1)
         dist_target_kld = tf.matmul(dist_kld_reshaped, one_hot_targets)
         # (n x m x 1 x 1) -> (n x m)
         dist_target_kld = tf.squeeze(dist_target_kld, [-1, -2])
         # (n x m), (n x m), (n x m) -> ()
         loss -= tf.reduce_sum(dist_target_kld*tf.to_float(token_weights)) / (n_tokens + 1e-12)
       
       #-----------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x m) -> (n x m)
       predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
       # (n x m) (*) (n x m) -> (n x m)
       correct_tokens = nn.equal(targets, predictions) * token_weights
       # (n x m) -> (n)
       tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
       # (n x m) -> (n)
       correct_tokens_per_sequence = tf.reduce_sum(correct_tokens, axis=-1)
       # (n), (n) -> (n)
       correct_sequences = nn.equal(tokens_per_sequence, correct_tokens_per_sequence)
   
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer
   outputs['unlabeled_targets'] = self.placeholder
   outputs['probabilities'] = probabilities
   outputs['unlabeled_loss'] = loss
   outputs['loss'] = loss
   
   outputs['unlabeled_predictions'] = predictions
   outputs['predictions'] = predictions
   outputs['correct_unlabeled_tokens'] = correct_tokens
   outputs['n_correct_unlabeled_tokens'] = tf.reduce_sum(correct_tokens)
   outputs['n_correct_unlabeled_sequences'] = tf.reduce_sum(correct_sequences)
   outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
   outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
   return outputs
コード例 #8
0
ファイル: index_vocabs.py プロジェクト: JZXXX/Semi-SDP
  def get_bilinear_discriminator(self, layer, token_weights, variable_scope=None, reuse=False):
    """"""
    
    recur_layer = layer
    hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
    add_linear = self.add_linear
    n_splits = 2*(1+self.linearize+self.distance)
    with tf.variable_scope(variable_scope or self.field):
      for i in six.moves.range(0, self.n_layers-1):
        with tf.variable_scope('FC-%d' % i):
          layer = classifiers.hidden(layer, n_splits*self.hidden_size,
                                     hidden_func=self.hidden_func,
                                     hidden_keep_prob=hidden_keep_prob)
      with tf.variable_scope('FC-top'):
        layers = classifiers.hiddens(layer, n_splits*[self.hidden_size],
                                     hidden_func=self.hidden_func,
                                     hidden_keep_prob=hidden_keep_prob)
      layer1, layer2 = layers.pop(0), layers.pop(0)
      if self.linearize:
        lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0)
      if self.distance:
        dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0)
      
      with tf.variable_scope('Discriminator'):
        if self.diagonal:
          logits = classifiers.diagonal_bilinear_discriminator(
            layer1, layer2,
            hidden_keep_prob=hidden_keep_prob,
            add_linear=add_linear)
          if self.linearize:
            with tf.variable_scope('Linearization'):
              lin_logits = classifiers.diagonal_bilinear_discriminator(
                lin_layer1, lin_layer2,
                hidden_keep_prob=hidden_keep_prob,
                add_linear=add_linear)
          if self.distance:
            with tf.variable_scope('Distance'):
              dist_lamda = 1+tf.nn.softplus(classifiers.diagonal_bilinear_discriminator(
                dist_layer1, dist_layer2,
                hidden_keep_prob=hidden_keep_prob,
                add_linear=add_linear))
        else:
          logits = classifiers.bilinear_discriminator(
            layer1, layer2,
            hidden_keep_prob=hidden_keep_prob,
            add_linear=add_linear)
          if self.linearize:
            with tf.variable_scope('Linearization'):
              lin_logits = classifiers.bilinear_discriminator(
                lin_layer1, lin_layer2,
                hidden_keep_prob=hidden_keep_prob,
                add_linear=add_linear)
          if self.distance:
            with tf.variable_scope('Distance'):
              dist_lamda = 1+tf.nn.softplus(classifiers.bilinear_discriminator(
                dist_layer1, dist_layer2,
                hidden_keep_prob=hidden_keep_prob,
                add_linear=add_linear))
        
        #-----------------------------------------------------------
        # Process the targets
        # (n x m x m) -> (n x m x m)
        unlabeled_targets = self.placeholder
        shape = tf.shape(layer1)
        batch_size, bucket_size = shape[0], shape[1]
        # (1 x m)
        ids = tf.expand_dims(tf.range(bucket_size), 0)
        # (1 x m) -> (1 x 1 x m)
        head_ids = tf.expand_dims(ids, -2)
        # (1 x m) -> (1 x m x 1)
        dep_ids = tf.expand_dims(ids, -1)
        if self.linearize:
          # Wherever the head is to the left
          # (n x m x m), (1 x m x 1) -> (n x m x m)
          lin_targets = tf.to_float(tf.less(unlabeled_targets, dep_ids))
          # cross-entropy of the linearization of each i,j pair
          # (1 x 1 x m), (1 x m x 1) -> (n x m x m)
          lin_ids = tf.tile(tf.less(head_ids, dep_ids), [batch_size, 1, 1])
          # (n x 1 x m), (n x m x 1) -> (n x m x m)
          lin_xent = -tf.nn.softplus(tf.where(lin_ids, -lin_logits, lin_logits))
          # add the cross-entropy to the logits
          # (n x m x m), (n x m x m) -> (n x m x m)
          logits += tf.stop_gradient(lin_xent)
        if self.distance:
          # (n x m x m) - (1 x m x 1) -> (n x m x m)
          dist_targets = tf.abs(unlabeled_targets - dep_ids)
          # KL-divergence of the distance of each i,j pair
          # (1 x 1 x m) - (1 x m x 1) -> (n x m x m)
          dist_ids = tf.to_float(tf.tile(tf.abs(head_ids - dep_ids), [batch_size, 1, 1]))+1e-12
          # (n x m x m), (n x m x m) -> (n x m x m)
          #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda)
          dist_kld = -tf.log((dist_ids - dist_lamda)**2/2 + 1)
          # add the KL-divergence to the logits
          # (n x m x m), (n x m x m) -> (n x m x m)
          logits += tf.stop_gradient(dist_kld)
        
        #-----------------------------------------------------------
        # Compute probabilities/cross entropy
        # (n x m x m) -> (n x m x m)
        probabilities = tf.nn.sigmoid(logits) * tf.to_float(token_weights)
        # (n x m x m), (n x m x m), (n x m x m) -> ()
        loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets, logits, weights=token_weights)
        n_tokens = tf.to_float(tf.reduce_sum(token_weights))
        if self.linearize:
          lin_target_xent = lin_xent * unlabeled_targets
          loss -= tf.reduce_sum(lin_target_xent * tf.to_float(token_weights)) / (n_tokens + 1e-12)
        if self.distance:
          dist_target_kld = dist_kld * unlabeled_targets
          loss -= tf.reduce_sum(dist_target_kld * tf.to_float(token_weights)) / (n_tokens + 1e-12)

        #-----------------------------------------------------------
        # Compute predictions/accuracy
        # (n x m x m) -> (n x m x m)
        predictions = nn.greater(logits, 0, dtype=tf.int64) * token_weights
        # (n x m x m) (*) (n x m x m) -> (n x m x m)
        true_positives = predictions * unlabeled_targets
        # (n x m x m) -> ()
        n_predictions = tf.reduce_sum(predictions)
        n_targets = tf.reduce_sum(unlabeled_targets)
        n_true_positives = tf.reduce_sum(true_positives)
        # () - () -> ()
        n_false_positives = n_predictions - n_true_positives
        n_false_negatives = n_targets - n_true_positives
        # (n x m x m) -> (n)
        n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2])
        n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2])
        # (n) x 2 -> ()
        n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence))

    #-----------------------------------------------------------
    # Populate the output dictionary
    outputs = {}
    outputs['unlabeled_targets'] = unlabeled_targets
    outputs['probabilities'] = probabilities
    outputs['unlabeled_loss'] = loss
    outputs['loss'] = loss
    outputs['logits'] = logits

    outputs['unlabeled_predictions'] = predictions
    outputs['n_unlabeled_true_positives'] = n_true_positives
    outputs['n_unlabeled_false_positives'] = n_false_positives
    outputs['n_unlabeled_false_negatives'] = n_false_negatives
    outputs['n_correct_unlabeled_sequences'] = n_correct_sequences
    outputs['predictions'] = predictions
    outputs['n_true_positives'] = n_true_positives
    outputs['n_false_positives'] = n_false_positives
    outputs['n_false_negatives'] = n_false_negatives
    outputs['n_correct_sequences'] = n_correct_sequences
    return outputs
コード例 #9
0
    def get_linear_classifier(self,
                              layer,
                              token_weights,
                              last_output=None,
                              variable_scope=None,
                              reuse=False):
        """"""

        if last_output:
            n_layers = 0
            layer = last_output['hidden_layer']
            recur_layer = last_output['recur_layer']
        else:
            n_layers = self.n_layers
            recur_layer = layer
        hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
        with tf.variable_scope(variable_scope or self.classname):
            for i in six.moves.range(0, self.n_layers):
                with tf.variable_scope('FC-%d' % i):
                    layer = classifiers.hidden(
                        layer,
                        self.hidden_size,
                        hidden_func=self.hidden_func,
                        hidden_keep_prob=hidden_keep_prob)
            with tf.variable_scope('Classifier'):
                probabilities = []
                loss = []
                predictions = []
                correct_tokens = []
                for i, feat in enumerate(self._feats):
                    vs_feat = str(feat).replace('[',
                                                '-RSB-').replace(']', '-LSB-')
                    with tf.variable_scope(vs_feat):
                        logits = classifiers.linear_classifier(
                            layer,
                            self.getlen(feat),
                            hidden_keep_prob=hidden_keep_prob)
                        targets = self.placeholder[:, :, i]

                        #---------------------------------------------------
                        # Compute probabilities/cross entropy
                        # (n x m x c) -> (n x m x c)
                        probabilities.append(tf.nn.softmax(logits))
                        # (n x m), (n x m x c), (n x m) -> ()
                        loss.append(
                            tf.losses.sparse_softmax_cross_entropy(
                                targets, logits, weights=token_weights))

                        #---------------------------------------------------
                        # Compute predictions/accuracy
                        # (n x m x c) -> (n x m)
                        predictions.append(
                            tf.argmax(logits, axis=-1, output_type=tf.int32))
                        # (n x m) (*) (n x m) -> (n x m)
                        correct_tokens.append(
                            nn.equal(targets, predictions[-1]))
                # (n x m) x f -> (n x m x f)
                predictions = tf.stack(predictions, axis=-1)
                # (n x m) x f -> (n x m x f)
                correct_tokens = tf.stack(correct_tokens, axis=-1)
                # (n x m x f) -> (n x m)
                correct_tokens = tf.reduce_prod(correct_tokens,
                                                axis=-1) * token_weights
                # (n x m) -> (n)
                tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
                # (n x m) -> (n)
                correct_tokens_per_sequence = tf.reduce_sum(correct_tokens,
                                                            axis=-1)
                # (n), (n) -> (n)
                correct_sequences = nn.equal(tokens_per_sequence,
                                             correct_tokens_per_sequence)

        #-----------------------------------------------------------
        # Populate the output dictionary
        outputs = {}
        outputs['recur_layer'] = recur_layer
        outputs['targets'] = self.placeholder
        outputs['probabilities'] = probabilities
        outputs['loss'] = tf.add_n(loss)

        outputs['predictions'] = predictions
        outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
        outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
        return outputs
コード例 #10
0
ファイル: token_vocabs.py プロジェクト: JZXXX/Semi-SDP
 def get_unfactored_bilinear_classifier(self, layer, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.field):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*self.hidden_size,
                                    hidden_func=self.hidden_func,
                                    hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top' % i):
       layers = classifiers.hidden(layer, 2*[self.hidden_size],
                                   hidden_func=self.hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Classifier'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       
       #-----------------------------------------------------------
       # Process the targets
       targets = self.placeholder
       # (n x m x m) -> (n x m x m)
       unlabeled_targets = nn.greater(targets, 0)
       
       #-----------------------------------------------------------
       # Process the logits
       # (n x m x c x m) -> (n x m x m x c)
       transposed_logits = tf.transpose(logits, [0,1,3,2])
       
       #-----------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m x m x c) -> (n x m x m x c)
       probabilities = tf.nn.softmax(transposed_logits) * tf.to_float(tf.expand_dims(token_weights, axis=-1))
       # (n x m x m), (n x m x m x c), (n x m x m) -> ()
       loss = tf.losses.sparse_softmax_cross_entropy(targets, transposed_logits, weights=token_weights)
       
       #-----------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x m x c) -> (n x m x m)
       predictions = tf.argmax(transposed_logits, axis=-1, output_type=tf.int32) * token_weights
       # (n x m x m) -> (n x m x m)
       unlabeled_predictions = nn.greater(predictions, 0)
       # (n x m x m) (*) (n x m x m) -> (n x m x m)
       unlabeled_true_positives = unlabeled_predictions * unlabeled_targets
       true_positives = nn.equal(targets, predictions) * unlabeled_true_positives
       # (n x m x m) -> ()
       n_predictions = tf.reduce_sum(unlabeled_predictions)
       n_targets = tf.reduce_sum(unlabeled_targets)
       n_unlabeled_true_positives = tf.reduce_sum(unlabeled_true_positives)
       n_true_positives = tf.reduce_sum(true_positives)
       # () - () -> ()
       n_unlabeled_false_positives = n_predictions - n_unlabeled_true_positives
       n_unlabeled_false_negatives = n_targets - n_unlabeled_true_positives
       n_false_positives = n_predictions - n_true_positives
       n_false_negatives = n_targets - n_true_positives
       # (n x m x m) -> (n)
       n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2])
       n_unlabeled_true_positives_per_sequence = tf.reduce_sum(unlabeled_true_positives, axis=[1,2])
       n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2])
       # (n) x 2 -> ()
       n_correct_unlabeled_sequences = tf.reduce_sum(nn.equal(n_unlabeled_true_positives_per_sequence, n_targets_per_sequence))
       n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence))
       
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer
   outputs['unlabeled_targets'] = unlabeled_targets
   outputs['label_targets'] = self.placeholder
   outputs['probabilities'] = probabilities
   outputs['unlabeled_loss'] = tf.constant(0.)
   outputs['loss'] = loss
   
   outputs['unlabeled_predictions'] = unlabeled_predictions
   outputs['label_predictions'] = predictions
   outputs['n_unlabeled_true_positives'] = n_unlabeled_true_positives
   outputs['n_unlabeled_false_positives'] = n_unlabeled_false_positives
   outputs['n_unlabeled_false_negatives'] = n_unlabeled_false_negatives
   outputs['n_correct_unlabeled_sequences'] = n_correct_unlabeled_sequences
   outputs['n_true_positives'] = n_true_positives
   outputs['n_false_positives'] = n_false_positives
   outputs['n_false_negatives'] = n_false_negatives
   outputs['n_correct_sequences'] = n_correct_sequences
   return outputs
コード例 #11
0
ファイル: token_vocabs.py プロジェクト: JZXXX/Semi-SDP
  def get_bilinear_classifier(self, layer, outputs, token_weights, variable_scope=None, reuse=False):
    """"""
    
    recur_layer = layer
    hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
    add_linear = self.add_linear
    with tf.variable_scope(variable_scope or self.field):
      for i in six.moves.range(0, self.n_layers-1):
        with tf.variable_scope('FC-%d' % i):
          layer = classifiers.hidden(layer, 2*self.hidden_size,
                                      hidden_func=self.hidden_func,
                                      hidden_keep_prob=hidden_keep_prob)
      with tf.variable_scope('FC-top'):
        layers = classifiers.hiddens(layer, 2*[self.hidden_size],
                                    hidden_func=self.hidden_func,
                                    hidden_keep_prob=hidden_keep_prob)
      layer1, layer2 = layers.pop(0), layers.pop(0)
      
      with tf.variable_scope('Classifier'):
        if self.diagonal:
          logits = classifiers.diagonal_bilinear_classifier(
            layer1, layer2, len(self),
            hidden_keep_prob=hidden_keep_prob,
            add_linear=add_linear)
        else:
          logits = classifiers.bilinear_classifier(
            layer1, layer2, len(self),
            hidden_keep_prob=hidden_keep_prob,
            add_linear=add_linear)
    
    #-----------------------------------------------------------
    # Process the targets
    # (n x m x m)
    label_targets = self.placeholder
    unlabeled_predictions = outputs['unlabeled_predictions']
    unlabeled_targets = outputs['unlabeled_targets']
    
    #-----------------------------------------------------------
    # Process the logits
    # (n x m x c x m) -> (n x m x m x c)
    transposed_logits = tf.transpose(logits, [0,1,3,2])
    
    #-----------------------------------------------------------
    # Compute the probabilities/cross entropy
    # (n x m x m) -> (n x m x m x 1)
    head_probabilities = tf.expand_dims(tf.stop_gradient(outputs['probabilities']), axis=-1)
    # (n x m x m x c) -> (n x m x m x c)
    label_probabilities = tf.nn.softmax(transposed_logits) * tf.to_float(tf.expand_dims(token_weights, axis=-1))
    # (n x m x m), (n x m x m x c), (n x m x m) -> ()
    label_loss = tf.losses.sparse_softmax_cross_entropy(label_targets, transposed_logits, weights=token_weights*unlabeled_targets)
    
    #-----------------------------------------------------------
    # Compute the predictions/accuracy
    # (n x m x m x c) -> (n x m x m)
    predictions = tf.argmax(transposed_logits, axis=-1, output_type=tf.int64)
    # (n x m x m) (*) (n x m x m) -> (n x m x m)
    true_positives = nn.equal(label_targets, predictions) * unlabeled_predictions
    correct_label_tokens = nn.equal(label_targets, predictions) * unlabeled_targets
    # (n x m x m) -> ()
    n_unlabeled_predictions = tf.reduce_sum(unlabeled_predictions)
    n_unlabeled_targets = tf.reduce_sum(unlabeled_targets)
    n_true_positives = tf.reduce_sum(true_positives)
    n_correct_label_tokens = tf.reduce_sum(correct_label_tokens)
    # () - () -> ()
    n_false_positives = n_unlabeled_predictions - n_true_positives
    n_false_negatives = n_unlabeled_targets - n_true_positives
    # (n x m x m) -> (n)
    n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2])
    n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2])
    n_correct_label_tokens_per_sequence = tf.reduce_sum(correct_label_tokens, axis=[1,2])
    # (n) x 2 -> ()
    n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence))
    n_correct_label_sequences = tf.reduce_sum(nn.equal(n_correct_label_tokens_per_sequence, n_targets_per_sequence))
    
    #-----------------------------------------------------------
    # Populate the output dictionary
    rho = self.loss_interpolation
    outputs['label_targets'] = label_targets
    outputs['probabilities'] = label_probabilities * head_probabilities
    outputs['label_loss'] = label_loss
    outputs['loss'] = 2*((1-rho) * outputs['loss'] + rho * label_loss)
    
    outputs['n_true_positives'] = n_true_positives
    outputs['n_false_positives'] = n_false_positives
    outputs['n_false_negatives'] = n_false_negatives
    outputs['n_correct_sequences'] = n_correct_sequences
    outputs['n_correct_label_tokens'] = n_correct_label_tokens
    outputs['n_correct_label_sequences'] = n_correct_label_sequences
    # ==============================================================
    outputs['label_predictions'] = predictions
    outputs['label_targets'] = label_targets
    outputs['label_logits'] = transposed_logits
    # pdb.set_trace()
    #===============================================================

    return outputs
コード例 #12
0
ファイル: token_vocabs.py プロジェクト: JZXXX/Semi-SDP
 def get_bilinear_discriminator(self, layer, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.classname):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*self.hidden_size,
                                    hidden_func=self.hidden_func,
                                    hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top' % i):
       layers = classifiers.hiddens(layer, 2*[self.hidden_size],
                                  hidden_func=self.hidden_func,
                                  hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Discriminator'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_discriminator(
           layer1, layer2,
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_discriminator(
           layer1, layer2,
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       
       #-----------------------------------------------------------
       # Process the targets
       # (n x m x m) -> (n x m x m)
       unlabeled_targets = nn.greater(self.placeholder, 0)
       
       #-----------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m x m) -> (n x m x m)
       probabilities = tf.nn.sigmoid(logits)
       # (n x m x m), (n x m x m x c), (n x m x m) -> ()
       loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets, logits, weights=token_weights)
       
       #-----------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x m x c) -> (n x m x m)
       predictions = nn.greater(logits, 0, dtype=tf.int32) * token_weights
       # (n x m x m) (*) (n x m x m) -> (n x m x m)
       true_positives = predictions * unlabeled_targets
       # (n x m x m) -> ()
       n_predictions = tf.reduce_sum(predictions)
       n_targets = tf.reduce_sum(unlabeled_targets)
       n_true_positives = tf.reduce_sum(true_positives)
       # () - () -> ()
       n_false_positives = n_predictions - n_true_positives
       n_false_negatives = n_targets - n_true_positives
       # (n x m x m) -> (n)
       n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2])
       n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2])
       # (n) x 2 -> ()
       n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence))
   
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer
   outputs['unlabeled_targets'] = unlabeled_targets
   outputs['probabilities'] = probabilities
   outputs['unlabeled_loss'] = loss
   outputs['loss'] = loss
   
   outputs['unlabeled_predictions'] = predictions
   outputs['n_unlabeled_true_positives'] = n_true_positives
   outputs['n_unlabeled_false_positives'] = n_false_positives
   outputs['n_unlabeled_false_negatives'] = n_false_negatives
   outputs['n_correct_unlabeled_sequences'] = n_correct_sequences
   outputs['predictions'] = predictions
   outputs['n_true_positives'] = n_true_positives
   outputs['n_false_positives'] = n_false_positives
   outputs['n_false_negatives'] = n_false_negatives
   outputs['n_correct_sequences'] = n_correct_sequences
   return outputs
コード例 #13
0
ファイル: token_vocabs.py プロジェクト: JZXXX/Semi-SDP
 def get_unfactored_bilinear_classifier(self, layer, unlabeled_targets, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   hidden_func = self.hidden_func
   hidden_size = self.hidden_size
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.classname):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*hidden_size,
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top'):
       layers = classifiers.hiddens(layer, 2*[hidden_size],
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Classifier'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       bucket_size = tf.shape(layer)[-2]
       
       #-------------------------------------------------------
       # Process the targets
       # c (*) (n x m) + (n x m)
       #targets = len(self) * unlabeled_targets + self.placeholder
       targets = bucket_size * self.placeholder + unlabeled_targets
       
       #-------------------------------------------------------
       # Process the logits
       # (n x m x c x m) -> (n x m x cm)
       reshaped_logits = tf.reshape(logits, tf.stack([-1, bucket_size, bucket_size * len(self)]))
       
       #-------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m x cm) -> (n x m x cm)
       probabilities = tf.nn.softmax(reshaped_logits)
       # (n x m x cm) -> (n x m x c x m)
       probabilities = tf.reshape(probabilities, tf.stack([-1, bucket_size, len(self), bucket_size]))
       # (n x m x c x m) -> (n x m x m x c)
       probabilities = tf.transpose(probabilities, [0,1,3,2])
       # (n x m), (n x m x cm), (n x m) -> ()
       loss = tf.losses.sparse_softmax_cross_entropy(targets, reshaped_logits, weights=token_weights)
       
       #-------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x cm) -> (n x m)
       predictions = tf.argmax(reshaped_logits, axis=-1, output_type=tf.int32)
       # (n x m), () -> (n x m)
       unlabeled_predictions = tf.mod(predictions, bucket_size)
       # (n x m) (*) (n x m) -> (n x m)
       correct_tokens = nn.equal(predictions, targets) * token_weights
       correct_unlabeled_tokens = nn.equal(unlabeled_predictions, unlabeled_targets) * token_weights
       
       # (n x m) -> (n)
       tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
       # (n x m) -> (n)
       correct_tokens_per_sequence = tf.reduce_sum(correct_tokens, axis=-1)
       correct_unlabeled_tokens_per_sequence = tf.reduce_sum(correct_unlabeled_tokens, axis=-1)
       # (n), (n) -> (n)
       correct_sequences = nn.equal(tokens_per_sequence, correct_tokens_per_sequence)
       correct_unlabeled_sequences = nn.equal(tokens_per_sequence, correct_unlabeled_tokens_per_sequence)
       
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer
   outputs['unlabeled_targets'] = unlabeled_targets
   outputs['probabilities'] = probabilities
   outputs['unlabeled_loss'] = tf.constant(0.)
   outputs['loss'] = loss
   
   outputs['unlabeled_predictions'] = unlabeled_predictions
   outputs['label_predictions'] = predictions
   outputs['n_correct_unlabeled_tokens'] = tf.reduce_sum(correct_unlabeled_tokens)
   outputs['n_correct_unlabeled_sequences'] = tf.reduce_sum(correct_unlabeled_sequences)
   outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
   outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
   
   return outputs
コード例 #14
0
ファイル: token_vocabs.py プロジェクト: JZXXX/Semi-SDP
 def get_bilinear_classifier(self, layer, outputs, token_weights, variable_scope=None, reuse=False):
   """"""
   
   layer1 = layer2 = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   hidden_func = self.hidden_func
   hidden_size = self.hidden_size
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.classname):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*hidden_size,
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top'):
       layers = classifiers.hiddens(layer, 2*[hidden_size],
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Classifier'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       bucket_size = tf.shape(layer)[-2]
       
       #-------------------------------------------------------
       # Process the targets
       # (n x m)
       label_targets = self.placeholder
       unlabeled_predictions = outputs['unlabeled_predictions']
       unlabeled_targets = outputs['unlabeled_targets']
       # (n x m) -> (n x m x m)
       unlabeled_predictions = tf.one_hot(unlabeled_predictions, bucket_size)
       unlabeled_targets = tf.one_hot(unlabeled_targets, bucket_size)
       # (n x m x m) -> (n x m x m x 1)
       unlabeled_predictions = tf.expand_dims(unlabeled_predictions, axis=-1)
       unlabeled_targets = tf.expand_dims(unlabeled_targets, axis=-1)
       
       #-------------------------------------------------------
       # Process the logits
       # We use the gold heads for computing the label score and the predicted
       # heads for computing the unlabeled attachment score
       # (n x m x c x m) -> (n x m x m x c)
       transposed_logits = tf.transpose(logits, [0,1,3,2])
       # (n x m x c x m) * (n x m x m x 1) -> (n x m x c x 1)
       predicted_logits = tf.matmul(logits, unlabeled_predictions)
       oracle_logits = tf.matmul(logits, unlabeled_targets)
       # (n x m x c x 1) -> (n x m x c)
       predicted_logits = tf.squeeze(predicted_logits, axis=-1)
       oracle_logits = tf.squeeze(oracle_logits, axis=-1)
       
       #-------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m x m) -> (n x m x m x 1)
       head_probabilities = tf.expand_dims(tf.stop_gradient(outputs['probabilities']), axis=-1)
       # (n x m x m x c) -> (n x m x m x c)
       label_probabilities = tf.nn.softmax(transposed_logits)
       # (n x m), (n x m x c), (n x m) -> ()
       label_loss = tf.losses.sparse_softmax_cross_entropy(label_targets, oracle_logits, weights=token_weights)
       
       #-------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x c) -> (n x m)
       label_predictions = tf.argmax(predicted_logits, axis=-1, output_type=tf.int32)
       label_oracle_predictions = tf.argmax(oracle_logits, axis=-1, output_type=tf.int32)
       # (n x m) (*) (n x m) -> (n x m)
       correct_label_tokens = nn.equal(label_targets, label_oracle_predictions) * token_weights
       correct_tokens = nn.equal(label_targets, label_predictions) * outputs['correct_unlabeled_tokens']
       
       # (n x m) -> (n)
       tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
       # (n x m) -> (n)
       correct_label_tokens_per_sequence = tf.reduce_sum(correct_label_tokens, axis=-1)
       correct_tokens_per_sequence = tf.reduce_sum(correct_tokens, axis=-1)
       # (n), (n) -> (n)
       correct_label_sequences = nn.equal(tokens_per_sequence, correct_label_tokens_per_sequence)
       correct_sequences = nn.equal(tokens_per_sequence, correct_tokens_per_sequence)
   
   #-----------------------------------------------------------
   # Populate the output dictionary
   rho = self.loss_interpolation
   outputs['label_targets'] = label_targets
   # This way we can reconstruct the head_probabilities by exponentiating and summing along the last axis
   outputs['probabilities'] = label_probabilities * head_probabilities
   outputs['label_loss'] = label_loss
   outputs['loss'] = 2*((1-rho) * outputs['loss'] + rho * label_loss)
   
   outputs['label_predictions'] = label_predictions
   outputs['n_correct_label_tokens'] = tf.reduce_sum(correct_label_tokens)
   outputs['n_correct_label_sequences'] = tf.reduce_sum(correct_label_sequences)
   outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
   outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
   
   return outputs
コード例 #15
0
ファイル: token_vocabs.py プロジェクト: JZXXX/Semi-SDP
  def get_sampled_linear_classifier(self, layer, n_samples, token_weights=None, variable_scope=None, reuse=False):
    """"""
    
    recur_layer = layer
    hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
    with tf.variable_scope(variable_scope or self.classname):
      for i in six.moves.range(0, self.n_layers):
        with tf.variable_scope('FC-%d' % i):
          layer = classifiers.hidden(layer, self.hidden_size,
                                    hidden_func=self.hidden_func,
                                    hidden_keep_prob=hidden_keep_prob)
      batch_size, bucket_size, input_size = nn.get_sizes(layer)
      layer = nn.dropout(layer, hidden_keep_prob, noise_shape=[batch_size, 1, input_size])
      layer = nn.reshape(layer, [-1, input_size])


      with tf.variable_scope('Classifier'):
        # (s)
        samples, _, _ = tf.nn.log_uniform_candidate_sampler(
          nn.zeros([bucket_size,1], dtype=tf.int64),
          1, n_samples, unique=True, range_max=len(self))
        with tf.device('/gpu:1'):
          weights = tf.get_variable('Weights', shape=[len(self), input_size], initializer=tf.zeros_initializer)
          biases = tf.get_variable('Biases', shape=len(self), initializer=tf.zeros_initializer)
          tf.add_to_collection('non_save_variables', weights)
          tf.add_to_collection('non_save_variables', biases)

          # (nm x 1)
          targets = nn.reshape(self.placeholder, [-1, 1])
          # (1 x s)
          samples = tf.expand_dims(samples, 0)
          # (nm x s)
          samples = tf.to_int32(nn.tile(samples, [batch_size*bucket_size, 1]))
          # (nm x s)
          sample_weights = tf.to_float(nn.not_equal(samples, targets))
          # (nm x 1+s)
          cands = tf.stop_gradient(tf.concat([targets, samples], axis=-1))
          # (nm x 1), (nm x s) -> (nm x 1+s)
          cand_weights = tf.stop_gradient(tf.concat([nn.ones([batch_size*bucket_size, 1]), sample_weights], axis=-1))
          # (c x d), (nm x 1+s) -> (nm x 1+s x d)
          weights = tf.nn.embedding_lookup(weights, cands)
          # (c), (nm x 1+s) -> (nm x 1+s)
          biases = tf.nn.embedding_lookup(biases, cands)
          # (n x m x d) -> (nm x d x 1)
          layer_reshaped = nn.reshape(layer, [-1, input_size, 1])
          # (nm x 1+s x d) * (nm x d x 1) -> (nm x 1+s x 1)
          logits = tf.matmul(weights, layer_reshaped)
          # (nm x 1+s x 1) -> (nm x 1+s)
          logits = tf.squeeze(logits, -1)
   
          #-----------------------------------------------------------
          # Compute probabilities/cross entropy
          # (nm x 1+s)
          logits = logits - tf.reduce_max(logits, axis=-1, keep_dims=True)
          # (nm x 1+s)
          exp_logits = tf.exp(logits) * cand_weights
          # (nm x 1)
          exp_logit_sum = tf.reduce_sum(exp_logits, axis=-1, keep_dims=True)
          # (nm x 1+s)
          probabilities = exp_logits / exp_logit_sum
          # (nm x 1+s) -> (n x m x 1+s)
          probabilities = nn.reshape(probabilities, [batch_size, bucket_size, 1+n_samples])
          # (nm x 1+s) -> (n x m x 1+s)
          samples = nn.reshape(samples, [batch_size, bucket_size, 1+n_samples])
          # (nm x 1+s) -> (nm x 1), (nm x s)
          target_logits, _ = tf.split(logits, [1, n_samples], axis=1)
          # (nm x 1) - (nm x 1) -> (nm x 1)
          loss = tf.log(exp_logit_sum) - target_logits
          # (n x m) -> (nm x 1)
          token_weights1D = tf.to_float(nn.reshape(token_weights, [-1,1]))
          # (nm x 1) -> ()
          loss = tf.reduce_sum(loss*token_weights1D) / tf.reduce_sum(token_weights1D)
          
          #-----------------------------------------------------------
          # Compute predictions/accuracy
          # (nm x 1+s) -> (n x m x 1+s)
          logits = nn.reshape(logits, [batch_size, bucket_size, -1])
          # (n x m x 1+s) -> (n x m)
          predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
          # (n x m) (*) (n x m) -> (n x m)
          correct_tokens = nn.equal(predictions, 0) * token_weights
          # (n x m) -> (n)
          tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
          # (n x m) -> (n)
          correct_tokens_per_sequence = tf.reduce_sum(correct_tokens, axis=-1)
          # (n), (n) -> (n)
          correct_sequences = nn.equal(tokens_per_sequence, correct_tokens_per_sequence)
          
          #-----------------------------------------------------------
          # Populate the output dictionary
          outputs = {}
          outputs['recur_layer'] = recur_layer
          outputs['targets'] = targets
          outputs['probabilities'] = tf.tuple([samples, probabilities])
          outputs['loss'] = loss
          
          outputs['predictions'] = predictions
          outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
          outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
    return outputs
コード例 #16
0
    def get_bilinear_discriminator(self,
                                   layer,
                                   token_weights,
                                   variable_scope=None,
                                   reuse=False,
                                   debug=False):
        """"""
        #pdb.set_trace()
        outputs = {}
        recur_layer = layer
        hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
        add_linear = self.add_linear
        n_splits = 2 * (1 + self.linearize + self.distance)
        with tf.variable_scope(variable_scope or self.field):
            for i in six.moves.range(0, self.n_layers - 1):
                with tf.variable_scope('FC-%d' % i):  #here is FNN? did not run
                    layer = classifiers.hidden(
                        layer,
                        n_splits * self.hidden_size,
                        hidden_func=self.hidden_func,
                        hidden_keep_prob=hidden_keep_prob)
            with tf.variable_scope(
                    'FC-top'):  #FNN output and split two layer? FNN+split
                layers = classifiers.hiddens(layer,
                                             n_splits * [self.hidden_size],
                                             hidden_func=self.hidden_func,
                                             hidden_keep_prob=hidden_keep_prob)
            layer1, layer2 = layers.pop(0), layers.pop(
                0
            )  #layer1 and layer2 are one sentence with different word combination? layer1 head layer2 tail
            if self.linearize:  #false
                lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0)
            if self.distance:  #false in graph
                dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0)

            with tf.variable_scope('Discriminator'):
                if self.diagonal:
                    logits = classifiers.diagonal_bilinear_discriminator(
                        layer1,
                        layer2,
                        hidden_keep_prob=hidden_keep_prob,
                        add_linear=add_linear)
                    if self.linearize:
                        with tf.variable_scope('Linearization'):
                            lin_logits = classifiers.diagonal_bilinear_discriminator(
                                lin_layer1,
                                lin_layer2,
                                hidden_keep_prob=hidden_keep_prob,
                                add_linear=add_linear)
                    if self.distance:
                        with tf.variable_scope('Distance'):
                            dist_lamda = 1 + tf.nn.softplus(
                                classifiers.diagonal_bilinear_discriminator(
                                    dist_layer1,
                                    dist_layer2,
                                    hidden_keep_prob=hidden_keep_prob,
                                    add_linear=add_linear))
                else:
                    #only run here
                    logits = classifiers.bilinear_discriminator(
                        layer1,
                        layer2,
                        hidden_keep_prob=hidden_keep_prob,
                        add_linear=add_linear)
                    if self.linearize:
                        with tf.variable_scope('Linearization'):
                            lin_logits = classifiers.bilinear_discriminator(
                                lin_layer1,
                                lin_layer2,
                                hidden_keep_prob=hidden_keep_prob,
                                add_linear=add_linear)
                    if self.distance:
                        with tf.variable_scope('Distance'):
                            dist_lamda = 1 + tf.nn.softplus(
                                classifiers.bilinear_discriminator(
                                    dist_layer1,
                                    dist_layer2,
                                    hidden_keep_prob=hidden_keep_prob,
                                    add_linear=add_linear))

                #-----------------------------------------------------------
                # Process the targets
                # (n x m x m) -> (n x m x m)
                #here in fact is a graph, which is m*m representing the connection between each edge
                unlabeled_targets = self.placeholder  #ground truth graph, what is self.placeholder?
                #USELESS
                shape = tf.shape(layer1)
                batch_size, bucket_size = shape[0], shape[1]
                # (1 x m)
                ids = tf.expand_dims(tf.range(bucket_size), 0)
                # (1 x m) -> (1 x 1 x m)
                head_ids = tf.expand_dims(ids, -2)
                # (1 x m) -> (1 x m x 1)
                dep_ids = tf.expand_dims(ids, -1)

                #no running here
                if self.linearize:  #So what is linearize? The linear part of bilinear?
                    # Wherever the head is to the left
                    # (n x m x m), (1 x m x 1) -> (n x m x m)
                    lin_targets = tf.to_float(
                        tf.less(unlabeled_targets, dep_ids))
                    # cross-entropy of the linearization of each i,j pair
                    # (1 x 1 x m), (1 x m x 1) -> (n x m x m)
                    lin_ids = tf.tile(tf.less(head_ids, dep_ids),
                                      [batch_size, 1, 1])
                    # (n x 1 x m), (n x m x 1) -> (n x m x m)
                    lin_xent = -tf.nn.softplus(
                        tf.where(lin_ids, -lin_logits, lin_logits))
                    # add the cross-entropy to the logits
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    logits += tf.stop_gradient(lin_xent)
                if self.distance:
                    # (n x m x m) - (1 x m x 1) -> (n x m x m)
                    dist_targets = tf.abs(unlabeled_targets - dep_ids)
                    # KL-divergence of the distance of each i,j pair
                    # (1 x 1 x m) - (1 x m x 1) -> (n x m x m)
                    dist_ids = tf.to_float(
                        tf.tile(tf.abs(head_ids - dep_ids),
                                [batch_size, 1, 1])) + 1e-12
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda)
                    dist_kld = -tf.log((dist_ids - dist_lamda)**2 / 2 + 1)
                    # add the KL-divergence to the logits
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    logits += tf.stop_gradient(dist_kld)

                if debug:
                    outputs['printdata'] = {}
                    outputs['printdata']['logits'] = logits
                #-----------------------------------------------------------
                # Compute probabilities/cross entropy
                # (n x m x m) -> (n x m x m)
                probabilities = tf.nn.sigmoid(logits) * tf.to_float(
                    token_weights)  #token weights is sentence length?
                # (n x m x m), (n x m x m), (n x m x m) -> ()
                loss = tf.losses.sigmoid_cross_entropy(
                    unlabeled_targets, logits, weights=token_weights
                )  #here label_smoothing is 0, the sigmoid XE have any effect?
                n_tokens = tf.to_float(tf.reduce_sum(token_weights))
                if self.linearize:
                    lin_target_xent = lin_xent * unlabeled_targets
                    loss -= tf.reduce_sum(
                        lin_target_xent *
                        tf.to_float(token_weights)) / (n_tokens + 1e-12)
                if self.distance:
                    dist_target_kld = dist_kld * unlabeled_targets
                    loss -= tf.reduce_sum(
                        dist_target_kld *
                        tf.to_float(token_weights)) / (n_tokens + 1e-12)

                #-----------------------------------------------------------
                # Compute predictions/accuracy
                # precision/recall
                # (n x m x m) -> (n x m x m)
                predictions = nn.greater(
                    logits, 0,
                    dtype=tf.int32) * token_weights  #edge that predicted
                # if self.compare_precision:
                # 		#pdb.set_trace()
                # 		# (n x m x m) -> (n x m)
                # 		temp_predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # 		# (n x m) -> (n x m x m)
                # 		cond = tf.equal(logits, tf.expand_dims(tf.reduce_max(logits,-1),-1))
                # 		predictions = tf.where(cond, tf.cast(cond,tf.float32), tf.zeros_like(logits))
                # 		predictions = tf.cast(predictions,tf.int32) * token_weights
                # 		# # (n x m) (*) (n x m) -> (n x m)
                # 		# n_true_positives = tf.reduce_sum(nn.equal(tf.argmax(unlabeled_targets,axis=-1, output_type=tf.int32), temp_predictions) * self.token_weights)
                # 		# n_predictions_temp = tf.reduce_sum(temp_predictions)
                # 		# n_false_positives = n_predictions_temp - n_true_positives

                # (n x m x m) (*) (n x m x m) -> (n x m x m)
                true_positives = predictions * unlabeled_targets
                # (n x m x m) -> ()
                n_predictions = tf.reduce_sum(predictions)
                n_targets = tf.reduce_sum(unlabeled_targets)
                n_true_positives = tf.reduce_sum(true_positives)
                # () - () -> ()
                n_false_positives = n_predictions - n_true_positives
                n_false_negatives = n_targets - n_true_positives
                # (n x m x m) -> (n)
                n_targets_per_sequence = tf.reduce_sum(unlabeled_targets,
                                                       axis=[1, 2])
                n_true_positives_per_sequence = tf.reduce_sum(true_positives,
                                                              axis=[1, 2])
                # (n) x 2 -> ()
                n_correct_sequences = tf.reduce_sum(
                    nn.equal(n_true_positives_per_sequence,
                             n_targets_per_sequence))

        #-----------------------------------------------------------
        # Populate the output dictionary
        outputs['unlabeled_targets'] = unlabeled_targets
        outputs['probabilities'] = probabilities
        outputs['unlabeled_loss'] = loss
        outputs['loss'] = loss
        if debug:
            outputs['temp_targets'] = tf.argmax(unlabeled_targets,
                                                axis=-1,
                                                output_type=tf.int32)
            # outputs['temp_predictions'] = temp_predictions
        outputs['unlabeled_predictions'] = predictions
        outputs['n_unlabeled_true_positives'] = n_true_positives
        outputs['n_unlabeled_false_positives'] = n_false_positives
        outputs['n_unlabeled_false_negatives'] = n_false_negatives
        outputs['n_correct_unlabeled_sequences'] = n_correct_sequences
        outputs['predictions'] = predictions
        outputs['n_true_positives'] = n_true_positives
        outputs['n_false_positives'] = n_false_positives
        outputs['n_false_negatives'] = n_false_negatives
        outputs['n_correct_sequences'] = n_correct_sequences
        return outputs