def build_graph(self, input_network_outputs={}, reuse=True): """""" outputs = {} with tf.variable_scope('Embeddings'): input_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs ] for input_network, output in input_network_outputs: with tf.variable_scope(input_network.classname): input_tensors.append( input_network.get_input_tensor(output, reuse=reuse)) layer = tf.concat(input_tensors, 2) n_nonzero = tf.to_float( tf.count_nonzero(layer, axis=-1, keep_dims=True)) batch_size, bucket_size, input_size = nn.get_sizes(layer) layer *= input_size / (n_nonzero + tf.constant(1e-12)) token_weights = nn.greater(self.id_vocab.placeholder, 0, dtype=tf.int32) tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) n_tokens = tf.reduce_sum(tokens_per_sequence) n_sequences = tf.count_nonzero(tokens_per_sequence) seq_lengths = tokens_per_sequence + 1 tokens = { 'n_tokens': n_tokens, 'tokens_per_sequence': tokens_per_sequence, 'token_weights': token_weights, 'n_sequences': n_sequences } conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob recur_include_prob = 1. if reuse else self.recur_include_prob rev_layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2) for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width with tf.variable_scope('RNN_FW-{}'.format(i)): layer, _ = recurrent.directed_RNN( layer, self.recur_size, seq_lengths, bidirectional=False, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_include_prob=recur_include_prob, recur_keep_prob=recur_keep_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func) if self.bidirectional: with tf.variable_scope('RNN_BW-{}'.format(i)): rev_layer, _ = recurrent.directed_RNN( rev_layer, self.recur_size, seq_lengths, bidirectional=False, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_keep_prob=recur_keep_prob, recur_include_prob=recur_include_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func) ones = tf.ones([batch_size, 1, 1]) with tf.variable_scope('RNN_FW-{}/RNN/Loop'.format(i), reuse=True): fw_initial_state = tf.get_variable('Initial_state') n_splits = fw_initial_state.get_shape().as_list( )[-1] / self.recur_size fw_initial_state = tf.split(fw_initial_state, int(n_splits), -1)[0] start_token = ones * fw_initial_state layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2) layer = layer[:, 1:] layer = tf.reverse_sequence(layer, seq_lengths - 1, seq_axis=2) layer = tf.concat([start_token, layer], axis=1) if self.bidirectional: with tf.variable_scope('RNN_BW-{}/RNN/Loop'.format(i), reuse=True): bw_initial_state = tf.get_variable('Initial_state') n_splits = bw_initial_state.get_shape().as_list( )[-1] / self.recur_size bw_initial_state = tf.split(bw_initial_state, int(n_splits), -1)[0] stop_token = ones * bw_initial_state rev_layer = tf.concat([stop_token, layer], axis=1) rev_layer = tf.reverse_sequence(rev_layer, seq_lengths + 1, seq_axis=2)[:, 1:] if self.bilin: layer = tf.concat([layer * rev_layer, layer, rev_layer], axis=2) else: layer = tf.concat([layer, rev_layer], axis=2) output_vocabs = {vocab.field: vocab for vocab in self.output_vocabs} outputs = {} with tf.variable_scope('Classifiers'): if 'form' in output_vocabs: vocab = output_vocabs['form'] outputs[vocab.field] = vocab.get_sampled_linear_classifier( layer, self.n_samples, token_weights=token_weights, reuse=reuse) self._evals.add('form') if 'upos' in output_vocabs: vocab = output_vocabs['upos'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights=token_weights, reuse=reuse) self._evals.add('upos') if 'xpos' in output_vocabs: vocab = output_vocabs['xpos'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights=token_weights, reuse=reuse) self._evals.add('xpos') return outputs, tokens
def get_bilinear_discriminator(self, layer, token_weights, variable_scope=None, reuse=False): """""" recur_layer = layer hidden_keep_prob = 1 if reuse else self.hidden_keep_prob add_linear = self.add_linear n_splits = 2 * (1 + self.linearize + self.distance) with tf.variable_scope(variable_scope or self.field): for i in six.moves.range(0, self.n_layers - 1): with tf.variable_scope('FC-%d' % i): layer = classifiers.hidden( layer, n_splits * self.hidden_size, hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) with tf.variable_scope('FC-top'): layers = classifiers.hiddens(layer, n_splits * [self.hidden_size], hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) layer1, layer2 = layers.pop(0), layers.pop(0) if self.linearize: lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0) if self.distance: dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0) with tf.variable_scope('Discriminator'): if self.diagonal: logits = classifiers.diagonal_bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.linearize: with tf.variable_scope('Linearization'): lin_logits = classifiers.diagonal_bilinear_discriminator( lin_layer1, lin_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.distance: with tf.variable_scope('Distance'): dist_lamda = 1 + tf.nn.softplus( classifiers.diagonal_bilinear_discriminator( dist_layer1, dist_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear)) else: logits = classifiers.bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.linearize: with tf.variable_scope('Linearization'): lin_logits = classifiers.bilinear_discriminator( lin_layer1, lin_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.distance: with tf.variable_scope('Distance'): dist_lamda = 1 + tf.nn.softplus( classifiers.bilinear_discriminator( dist_layer1, dist_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear)) #----------------------------------------------------------- # Process the targets # (n x m x m) -> (n x m x m) unlabeled_targets = self.placeholder shape = tf.shape(layer1) batch_size, bucket_size = shape[0], shape[1] # (1 x m) ids = tf.expand_dims(tf.range(bucket_size), 0) # (1 x m) -> (1 x 1 x m) head_ids = tf.expand_dims(ids, -2) # (1 x m) -> (1 x m x 1) dep_ids = tf.expand_dims(ids, -1) if self.linearize: # Wherever the head is to the left # (n x m x m), (1 x m x 1) -> (n x m x m) lin_targets = tf.to_float( tf.less(unlabeled_targets, dep_ids)) # cross-entropy of the linearization of each i,j pair # (1 x 1 x m), (1 x m x 1) -> (n x m x m) lin_ids = tf.tile(tf.less(head_ids, dep_ids), [batch_size, 1, 1]) # (n x 1 x m), (n x m x 1) -> (n x m x m) lin_xent = -tf.nn.softplus( tf.where(lin_ids, -lin_logits, lin_logits)) # add the cross-entropy to the logits # (n x m x m), (n x m x m) -> (n x m x m) logits += tf.stop_gradient(lin_xent) if self.distance: # (n x m x m) - (1 x m x 1) -> (n x m x m) dist_targets = tf.abs(unlabeled_targets - dep_ids) # KL-divergence of the distance of each i,j pair # (1 x 1 x m) - (1 x m x 1) -> (n x m x m) dist_ids = tf.to_float( tf.tile(tf.abs(head_ids - dep_ids), [batch_size, 1, 1])) + 1e-12 # (n x m x m), (n x m x m) -> (n x m x m) #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda) dist_kld = -tf.log((dist_ids - dist_lamda)**2 / 2 + 1) # add the KL-divergence to the logits # (n x m x m), (n x m x m) -> (n x m x m) logits += tf.stop_gradient(dist_kld) #----------------------------------------------------------- # Compute probabilities/cross entropy # (n x m x m) -> (n x m x m) probabilities = tf.nn.sigmoid(logits) * tf.to_float( token_weights) # (n x m x m), (n x m x m), (n x m x m) -> () loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets, logits, weights=token_weights) n_tokens = tf.to_float(tf.reduce_sum(token_weights)) if self.linearize: lin_target_xent = lin_xent * unlabeled_targets loss -= tf.reduce_sum( lin_target_xent * tf.to_float(token_weights)) / (n_tokens + 1e-12) if self.distance: dist_target_kld = dist_kld * unlabeled_targets loss -= tf.reduce_sum( dist_target_kld * tf.to_float(token_weights)) / (n_tokens + 1e-12) #----------------------------------------------------------- # Compute predictions/accuracy # (n x m x m) -> (n x m x m) predictions = nn.greater(logits, 0, dtype=tf.int32) * token_weights # (n x m x m) (*) (n x m x m) -> (n x m x m) true_positives = predictions * unlabeled_targets # (n x m x m) -> () n_predictions = tf.reduce_sum(predictions) n_targets = tf.reduce_sum(unlabeled_targets) n_true_positives = tf.reduce_sum(true_positives) # () - () -> () n_false_positives = n_predictions - n_true_positives n_false_negatives = n_targets - n_true_positives # (n x m x m) -> (n) n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1, 2]) n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1, 2]) # (n) x 2 -> () n_correct_sequences = tf.reduce_sum( nn.equal(n_true_positives_per_sequence, n_targets_per_sequence)) #----------------------------------------------------------- # Populate the output dictionary outputs = {} outputs['unlabeled_targets'] = unlabeled_targets outputs['probabilities'] = probabilities outputs['unlabeled_loss'] = loss outputs['loss'] = loss outputs['unlabeled_predictions'] = predictions outputs['n_unlabeled_true_positives'] = n_true_positives outputs['n_unlabeled_false_positives'] = n_false_positives outputs['n_unlabeled_false_negatives'] = n_false_negatives outputs['n_correct_unlabeled_sequences'] = n_correct_sequences outputs['predictions'] = predictions outputs['n_true_positives'] = n_true_positives outputs['n_false_positives'] = n_false_positives outputs['n_false_negatives'] = n_false_negatives outputs['n_correct_sequences'] = n_correct_sequences return outputs
def get_unfactored_bilinear_classifier(self, layer, token_weights, variable_scope=None, reuse=False): """""" recur_layer = layer hidden_keep_prob = 1 if reuse else self.hidden_keep_prob add_linear = self.add_linear with tf.variable_scope(variable_scope or self.field): for i in six.moves.range(0, self.n_layers-1): with tf.variable_scope('FC-%d' % i): layer = classifiers.hidden(layer, 2*self.hidden_size, hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) with tf.variable_scope('FC-top' % i): layers = classifiers.hidden(layer, 2*[self.hidden_size], hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) layer1, layer2 = layers.pop(0), layers.pop(0) with tf.variable_scope('Classifier'): if self.diagonal: logits = classifiers.diagonal_bilinear_classifier( layer1, layer2, len(self), hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) else: logits = classifiers.bilinear_classifier( layer1, layer2, len(self), hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) #----------------------------------------------------------- # Process the targets targets = self.placeholder # (n x m x m) -> (n x m x m) unlabeled_targets = nn.greater(targets, 0) #----------------------------------------------------------- # Process the logits # (n x m x c x m) -> (n x m x m x c) transposed_logits = tf.transpose(logits, [0,1,3,2]) #----------------------------------------------------------- # Compute probabilities/cross entropy # (n x m x m x c) -> (n x m x m x c) probabilities = tf.nn.softmax(transposed_logits) * tf.to_float(tf.expand_dims(token_weights, axis=-1)) # (n x m x m), (n x m x m x c), (n x m x m) -> () loss = tf.losses.sparse_softmax_cross_entropy(targets, transposed_logits, weights=token_weights) #----------------------------------------------------------- # Compute predictions/accuracy # (n x m x m x c) -> (n x m x m) predictions = tf.argmax(transposed_logits, axis=-1, output_type=tf.int32) * token_weights # (n x m x m) -> (n x m x m) unlabeled_predictions = nn.greater(predictions, 0) # (n x m x m) (*) (n x m x m) -> (n x m x m) unlabeled_true_positives = unlabeled_predictions * unlabeled_targets true_positives = nn.equal(targets, predictions) * unlabeled_true_positives # (n x m x m) -> () n_predictions = tf.reduce_sum(unlabeled_predictions) n_targets = tf.reduce_sum(unlabeled_targets) n_unlabeled_true_positives = tf.reduce_sum(unlabeled_true_positives) n_true_positives = tf.reduce_sum(true_positives) # () - () -> () n_unlabeled_false_positives = n_predictions - n_unlabeled_true_positives n_unlabeled_false_negatives = n_targets - n_unlabeled_true_positives n_false_positives = n_predictions - n_true_positives n_false_negatives = n_targets - n_true_positives # (n x m x m) -> (n) n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2]) n_unlabeled_true_positives_per_sequence = tf.reduce_sum(unlabeled_true_positives, axis=[1,2]) n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2]) # (n) x 2 -> () n_correct_unlabeled_sequences = tf.reduce_sum(nn.equal(n_unlabeled_true_positives_per_sequence, n_targets_per_sequence)) n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence)) #----------------------------------------------------------- # Populate the output dictionary outputs = {} outputs['recur_layer'] = recur_layer outputs['unlabeled_targets'] = unlabeled_targets outputs['label_targets'] = self.placeholder outputs['probabilities'] = probabilities outputs['unlabeled_loss'] = tf.constant(0.) outputs['loss'] = loss outputs['unlabeled_predictions'] = unlabeled_predictions outputs['label_predictions'] = predictions outputs['n_unlabeled_true_positives'] = n_unlabeled_true_positives outputs['n_unlabeled_false_positives'] = n_unlabeled_false_positives outputs['n_unlabeled_false_negatives'] = n_unlabeled_false_negatives outputs['n_correct_unlabeled_sequences'] = n_correct_unlabeled_sequences outputs['n_true_positives'] = n_true_positives outputs['n_false_positives'] = n_false_positives outputs['n_false_negatives'] = n_false_negatives outputs['n_correct_sequences'] = n_correct_sequences return outputs
def get_bilinear_discriminator(self, layer, token_weights, variable_scope=None, reuse=False): """""" recur_layer = layer hidden_keep_prob = 1 if reuse else self.hidden_keep_prob add_linear = self.add_linear with tf.variable_scope(variable_scope or self.classname): for i in six.moves.range(0, self.n_layers-1): with tf.variable_scope('FC-%d' % i): layer = classifiers.hidden(layer, 2*self.hidden_size, hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) with tf.variable_scope('FC-top' % i): layers = classifiers.hiddens(layer, 2*[self.hidden_size], hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) layer1, layer2 = layers.pop(0), layers.pop(0) with tf.variable_scope('Discriminator'): if self.diagonal: logits = classifiers.diagonal_bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) else: logits = classifiers.bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) #----------------------------------------------------------- # Process the targets # (n x m x m) -> (n x m x m) unlabeled_targets = nn.greater(self.placeholder, 0) #----------------------------------------------------------- # Compute probabilities/cross entropy # (n x m x m) -> (n x m x m) probabilities = tf.nn.sigmoid(logits) # (n x m x m), (n x m x m x c), (n x m x m) -> () loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets, logits, weights=token_weights) #----------------------------------------------------------- # Compute predictions/accuracy # (n x m x m x c) -> (n x m x m) predictions = nn.greater(logits, 0, dtype=tf.int32) * token_weights # (n x m x m) (*) (n x m x m) -> (n x m x m) true_positives = predictions * unlabeled_targets # (n x m x m) -> () n_predictions = tf.reduce_sum(predictions) n_targets = tf.reduce_sum(unlabeled_targets) n_true_positives = tf.reduce_sum(true_positives) # () - () -> () n_false_positives = n_predictions - n_true_positives n_false_negatives = n_targets - n_true_positives # (n x m x m) -> (n) n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2]) n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2]) # (n) x 2 -> () n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence)) #----------------------------------------------------------- # Populate the output dictionary outputs = {} outputs['recur_layer'] = recur_layer outputs['unlabeled_targets'] = unlabeled_targets outputs['probabilities'] = probabilities outputs['unlabeled_loss'] = loss outputs['loss'] = loss outputs['unlabeled_predictions'] = predictions outputs['n_unlabeled_true_positives'] = n_true_positives outputs['n_unlabeled_false_positives'] = n_false_positives outputs['n_unlabeled_false_negatives'] = n_false_negatives outputs['n_correct_unlabeled_sequences'] = n_correct_sequences outputs['predictions'] = predictions outputs['n_true_positives'] = n_true_positives outputs['n_false_positives'] = n_false_positives outputs['n_false_negatives'] = n_false_negatives outputs['n_correct_sequences'] = n_correct_sequences return outputs
def build_graph(self, input_network_outputs={}, reuse=True): """""" with tf.variable_scope('Embeddings'): if self.sum_pos: # TODO this should be done with a `POSMultivocab` pos_vocabs = list(filter(lambda x: 'POS' in x.classname, self.input_vocabs)) pos_tensors = [input_vocab.get_input_tensor(embed_keep_prob=1, reuse=reuse) for input_vocab in pos_vocabs] non_pos_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs if 'POS' not in input_vocab.classname] #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors] #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors] if pos_tensors: pos_tensors = tf.add_n(pos_tensors) if not reuse: pos_tensors = [pos_vocabs[0].drop_func(pos_tensors, pos_vocabs[0].embed_keep_prob)] else: pos_tensors = [pos_tensors] input_tensors = non_pos_tensors + pos_tensors else: input_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs] for input_network, output in input_network_outputs: with tf.variable_scope(input_network.classname): input_tensors.append(input_network.get_input_tensor(output, reuse=reuse)) layer = tf.concat(input_tensors, 2) n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1, keepdims=True)) batch_size, bucket_size, input_size = nn.get_sizes(layer) layer *= input_size / (n_nonzero + tf.constant(1e-12)) token_weights = nn.greater(self.id_vocab.placeholder, 0) tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) n_tokens = tf.reduce_sum(tokens_per_sequence) n_sequences = tf.count_nonzero(tokens_per_sequence) seq_lengths = tokens_per_sequence+1 root_weights = token_weights + (1-nn.greater(tf.range(bucket_size), 0)) token_weights3D = tf.expand_dims(token_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2) tokens = {'n_tokens': n_tokens, 'tokens_per_sequence': tokens_per_sequence, 'token_weights': token_weights, 'token_weights3D': token_weights, 'n_sequences': n_sequences} conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob recur_include_prob = 1. if reuse else self.recur_include_prob for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width with tf.variable_scope('RNN-{}'.format(i)): layer, _ = recurrent.directed_RNN(layer, self.recur_size, seq_lengths, bidirectional=self.bidirectional, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_include_prob=recur_include_prob, recur_keep_prob=recur_keep_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func, bilin=self.bilin) output_fields = {vocab.field: vocab for vocab in self.output_vocabs} outputs = {} with tf.variable_scope('Classifiers'): if 'semrel' in output_fields: vocab = output_fields['semrel'] head_vocab = output_fields['semhead'] if vocab.factorized: with tf.variable_scope('Unlabeled'): unlabeled_outputs = head_vocab.get_bilinear_discriminator( layer, token_weights=token_weights3D, reuse=reuse) with tf.variable_scope('Labeled'): labeled_outputs = vocab.get_bilinear_classifier( layer, unlabeled_outputs, token_weights=token_weights3D, reuse=reuse) else: labeled_outputs = vocab.get_unfactored_bilinear_classifier(layer, head_vocab.placeholder, token_weights=token_weights3D, reuse=reuse) outputs['semgraph'] = labeled_outputs self._evals.add('semgraph') elif 'semhead' in output_fields: vocab = output_fields['semhead'] outputs[vocab.classname] = vocab.get_bilinear_classifier( layer, token_weights=token_weights3D, reuse=reuse) self._evals.add('semhead') return outputs, tokens
def build_graph(self, input_network_outputs={}, reuse=True): """""" with tf.variable_scope('Embeddings'): input_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs] for input_network, output in input_network_outputs: with tf.variable_scope(input_network.classname): input_tensors.append(input_network.get_input_tensor(output, reuse=reuse)) layer = tf.concat(input_tensors, 2) batch_size, bucket_size, input_size = nn.get_sizes(layer) n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1, keep_dims=True)) layer *= input_size / (n_nonzero + tf.constant(1e-12)) token_weights = nn.greater(self.id_vocab.placeholder, 0) tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) n_tokens = tf.reduce_sum(tokens_per_sequence) n_sequences = tf.count_nonzero(tokens_per_sequence) seq_lengths = tokens_per_sequence + 1 tokens = {'n_tokens': n_tokens, 'tokens_per_sequence': tokens_per_sequence, 'token_weights': token_weights, 'n_sequences': n_sequences} conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob recur_include_prob = 1. if reuse else self.recur_include_prob for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width with tf.variable_scope('RNN-{}'.format(i)): layer, _ = recurrent.directed_RNN(layer, self.recur_size, seq_lengths, bidirectional=self.bidirectional, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_keep_prob=recur_keep_prob, recur_include_prob=recur_include_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func, bilin=self.bilin) output_vocabs = {vocab.field: vocab for vocab in self.output_vocabs} outputs = {} with tf.variable_scope('Classifiers'): last_output = None if 'lemma' in output_vocabs: vocab = output_vocabs['lemma'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('lemma') if last_output is None: last_output = outputs[vocab.field] if 'upos' in output_vocabs: vocab = output_vocabs['upos'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('upos') if last_output is None: last_output = outputs[vocab.field] if reuse: upos_idxs = outputs[vocab.field]['predictions'] else: upos_idxs = outputs[vocab.field]['targets'] upos_embed = vocab.get_input_tensor(inputs=upos_idxs, embed_keep_prob=1, reuse=reuse) if 'xpos' in output_vocabs and not self.share_layer: vocab = output_vocabs['xpos'] outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings( layer, upos_embed, token_weights, reuse=reuse) self._evals.add('xpos') if 'ufeats' in output_vocabs and not self.share_layer: vocab = output_vocabs['ufeats'] outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings( layer, upos_embed, token_weights, reuse=reuse) self._evals.add('ufeats') #if 'ufeats' in output_vocabs and not self.share_layer: # vocab = output_vocabs['ufeats'] # outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings( # layer, upos_embed, token_weights, # reuse=reuse) # self._evals.add('ufeats') if 'xpos' in output_vocabs and ('upos' not in output_vocabs or self.share_layer): vocab = output_vocabs['xpos'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('xpos') if last_output is None: last_output = outputs[vocab.field] if 'ufeats' in output_vocabs and ('upos' not in output_vocabs or self.share_layer): #if 'ufeats' in output_vocabs and ('upos' not in output_vocabs or self.share_layer): vocab = output_vocabs['ufeats'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('ufeats') if last_output is None: last_output = outputs[vocab.field] if 'deprel' in output_vocabs: vocab = output_vocabs['deprel'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('deprel') if last_output is None: last_output = outputs[vocab.field] return outputs, tokens