def __init__(self, component): """Initializes layers. Args: component: Parent ComponentBuilderBase object. """ layers = [ network_units.Layer(self, 'lengths', -1), network_units.Layer(self, 'scores', -1), network_units.Layer(self, 'logits', -1), network_units.Layer(self, 'arcs', -1), ] super(MstSolverNetwork, self).__init__(component, init_layers=layers) self._attrs = network_units.get_attrs_with_defaults( component.spec.network_unit.parameters, defaults={ 'forest': False, 'loss': 'softmax', 'crf_max_dynamic_range': 20, }) check.Eq(len(self._fixed_feature_dims.items()), 0, 'Expected no fixed features') check.Eq(len(self._linked_feature_dims.items()), 2, 'Expected two linked features') check.In('lengths', self._linked_feature_dims, 'Missing required linked feature') check.In('scores', self._linked_feature_dims, 'Missing required linked feature')
def CombineArcAndRootPotentials(arcs, roots): """Combines arc and root potentials into a single set of potentials. Args: arcs: [B,N,N] tensor of batched arc potentials. roots: [B,N] matrix of batched root potentials. Returns: [B,N,N] tensor P of combined potentials where P_{b,s,t} = s == t ? roots[b,t] : arcs[b,s,t] """ # All arguments must have statically-known rank. check.Eq(arcs.get_shape().ndims, 3, 'arcs must be rank 3') check.Eq(roots.get_shape().ndims, 2, 'roots must be a matrix') # All arguments must share the same type. dtype = arcs.dtype.base_dtype check.Same([dtype, roots.dtype.base_dtype], 'dtype mismatch') roots_shape = tf.shape(roots) arcs_shape = tf.shape(arcs) batch_size = roots_shape[0] num_tokens = roots_shape[1] with tf.control_dependencies([ tf.assert_equal(batch_size, arcs_shape[0]), tf.assert_equal(num_tokens, arcs_shape[1]), tf.assert_equal(num_tokens, arcs_shape[2]) ]): return tf.matrix_set_diag(arcs, roots)
def calculate_parse_metrics(gold_corpus, annotated_corpus): """Calculate POS/UAS/LAS accuracy based on gold and annotated sentences.""" check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned') num_tokens = 0 num_correct_pos = 0 num_correct_uas = 0 num_correct_las = 0 for gold_str, annotated_str in zip(gold_corpus, annotated_corpus): gold = sentence_pb2.Sentence() annotated = sentence_pb2.Sentence() gold.ParseFromString(gold_str) annotated.ParseFromString(annotated_str) check.Eq(gold.text, annotated.text, 'Text is not aligned') check.Eq(len(gold.token), len(annotated.token), 'Tokens are not aligned') tokens = zip(gold.token, annotated.token) num_tokens += len(tokens) num_correct_pos += sum(1 for x, y in tokens if x.tag == y.tag) num_correct_uas += sum(1 for x, y in tokens if x.head == y.head) num_correct_las += sum(1 for x, y in tokens if x.head == y.head and x.label == y.label) tf.logging.info('Total num documents: %d', len(annotated_corpus)) tf.logging.info('Total num tokens: %d', num_tokens) pos = num_correct_pos * 100.0 / num_tokens uas = num_correct_uas * 100.0 / num_tokens las = num_correct_las * 100.0 / num_tokens tf.logging.info('POS: %.2f%%', pos) tf.logging.info('UAS: %.2f%%', uas) tf.logging.info('LAS: %.2f%%', las) return pos, uas, las
def RootPotentialsFromTokens(root, tokens, weights): r"""Returns root selection potentials computed from tokens and weights. For each batch of token activations, computes a scalar potential for each root selection as the 3-way product between the activations of the artificial root token, the token activations, and the |weights|. Specifically, roots[b,r] = \sum_{i,j} root[i] * weights[i,j] * tokens[b,r,j] Args: root: [S] vector of activations for the artificial root token. tokens: [B,N,T] tensor of batched activations for root tokens. weights: [S,T] matrix of weights. B,N may be statically-unknown, but S,T must be statically-known. The dtype of all arguments must be compatible. Returns: [B,N] matrix R of root-selection potentials as defined above. The dtype of R is the same as that of the arguments. """ # All arguments must have statically-known rank. check.Eq(root.get_shape().ndims, 1, 'root must be a vector') check.Eq(tokens.get_shape().ndims, 3, 'tokens must be rank 3') check.Eq(weights.get_shape().ndims, 2, 'weights must be a matrix') # All activation dimensions must be statically-known. num_source_activations = weights.get_shape().as_list()[0] num_target_activations = weights.get_shape().as_list()[1] check.NotNone(num_source_activations, 'unknown source activation dimension') check.NotNone(num_target_activations, 'unknown target activation dimension') check.Eq(root.get_shape().as_list()[0], num_source_activations, 'dimension mismatch between weights and root') check.Eq(tokens.get_shape().as_list()[2], num_target_activations, 'dimension mismatch between weights and tokens') # All arguments must share the same type. check.Same([ weights.dtype.base_dtype, root.dtype.base_dtype, tokens.dtype.base_dtype ], 'dtype mismatch') root_1xs = tf.expand_dims(root, 0) tokens_shape = tf.shape(tokens) batch_size = tokens_shape[0] num_tokens = tokens_shape[1] # Flatten out the batch dimension so we can use a couple big matmuls. tokens_bnxt = tf.reshape(tokens, [-1, num_target_activations]) weights_targets_bnxs = tf.matmul(tokens_bnxt, weights, transpose_b=True) roots_1xbn = tf.matmul(root_1xs, weights_targets_bnxs, transpose_b=True) # Restore the batch dimension in the output. roots_bxn = tf.reshape(roots_1xbn, [batch_size, num_tokens]) return roots_bxn
def __init__(self, component): """Initializes weights and layers. Args: component: Parent ComponentBuilderBase object. """ super(BiaffineLabelNetwork, self).__init__(component) parameters = component.spec.network_unit.parameters self._num_labels = int(parameters['num_labels']) check.Gt(self._num_labels, 0, 'Expected some labels') check.Eq(len(self._fixed_feature_dims.items()), 0, 'Expected no fixed features') check.Eq(len(self._linked_feature_dims.items()), 2, 'Expected two linked features') check.In('sources', self._linked_feature_dims, 'Missing required linked feature') check.In('targets', self._linked_feature_dims, 'Missing required linked feature') self._source_dim = self._linked_feature_dims['sources'] self._target_dim = self._linked_feature_dims['targets'] # TODO(googleuser): Make parameter initialization configurable. self._weights = [] self._weights.append( tf.get_variable( 'weights_pair', [self._num_labels, self._source_dim, self._target_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._weights.append( tf.get_variable( 'weights_source', [self._num_labels, self._source_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._weights.append( tf.get_variable( 'weights_target', [self._num_labels, self._target_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._biases = [] self._biases.append( tf.get_variable( 'biases', [self._num_labels], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._params.extend(self._weights + self._biases) self._regularized_weights.extend(self._weights) self._layers.append( network_units.Layer(self, 'labels', self._num_labels))
def calculate_segmentation_metrics(gold_corpus, annotated_corpus): """Calculate precision/recall/f1 based on gold and annotated sentences.""" check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned') num_gold_tokens = 0 num_test_tokens = 0 num_correct_tokens = 0 def token_span(token): check.Ge(token.end, token.start) return (token.start, token.end) def ratio(numerator, denominator): check.Ge(numerator, 0) check.Ge(denominator, 0) if denominator > 0: return numerator / denominator elif numerator == 0: return 0.0 # map 0/0 to 0 else: return float('inf') # map x/0 to inf for gold_str, annotated_str in zip(gold_corpus, annotated_corpus): gold = sentence_pb2.Sentence() annotated = sentence_pb2.Sentence() gold.ParseFromString(gold_str) annotated.ParseFromString(annotated_str) check.Eq(gold.text, annotated.text, 'Text is not aligned') gold_spans = set() test_spans = set() for token in gold.token: check.NotIn(token_span(token), gold_spans, 'Duplicate token') gold_spans.add(token_span(token)) for token in annotated.token: check.NotIn(token_span(token), test_spans, 'Duplicate token') test_spans.add(token_span(token)) num_gold_tokens += len(gold_spans) num_test_tokens += len(test_spans) num_correct_tokens += len(gold_spans.intersection(test_spans)) tf.logging.info('Total num documents: %d', len(annotated_corpus)) tf.logging.info('Total gold tokens: %d', num_gold_tokens) tf.logging.info('Total test tokens: %d', num_test_tokens) precision = 100 * ratio(num_correct_tokens, num_test_tokens) recall = 100 * ratio(num_correct_tokens, num_gold_tokens) f1 = ratio(2 * precision * recall, precision + recall) tf.logging.info('Precision: %.2f%%', precision) tf.logging.info('Recall: %.2f%%', recall) tf.logging.info('F1: %.2f%%', f1) return round(precision, 2), round(recall, 2), round(f1, 2)
def ArcSourcePotentialsFromTokens(tokens, weights): r"""Returns arc source potentials computed from tokens and weights. For each batch of token activations, computes a scalar potential for each arc as the product between the activations of the source token and the |weights|. Specifically, arc[b,s,:] = \sum_{i} weights[i] * tokens[b,s,i] Args: tokens: [B,N,S] tensor of batched activations for source tokens. weights: [S] vector of weights. B,N may be statically-unknown, but S must be statically-known. The dtype of all arguments must be compatible. Returns: [B,N,N] tensor A of arc potentials as defined above. The dtype of A is the same as that of the arguments. Note that the diagonal entries (i.e., where s==t) represent self-loops and may not be meaningful. """ # All arguments must have statically-known rank. check.Eq(tokens.get_shape().ndims, 3, 'tokens must be rank 3') check.Eq(weights.get_shape().ndims, 1, 'weights must be a vector') # All activation dimensions must be statically-known. num_source_activations = weights.get_shape().as_list()[0] check.NotNone(num_source_activations, 'unknown source activation dimension') check.Eq(tokens.get_shape().as_list()[2], num_source_activations, 'dimension mismatch between weights and tokens') # All arguments must share the same type. check.Same([weights.dtype.base_dtype, tokens.dtype.base_dtype], 'dtype mismatch') tokens_shape = tf.shape(tokens) batch_size = tokens_shape[0] num_tokens = tokens_shape[1] # Flatten out the batch dimension so we can use a couple big matmuls. tokens_bnxs = tf.reshape(tokens, [-1, num_source_activations]) weights_sx1 = tf.expand_dims(weights, 1) sources_bnx1 = tf.matmul(tokens_bnxs, weights_sx1) sources_bnxn = tf.tile(sources_bnx1, [1, num_tokens]) # Restore the batch dimension in the output. sources_bxnxn = tf.reshape(sources_bnxn, [batch_size, num_tokens, num_tokens]) return sources_bxnxn
def generate_target_per_step_schedule(pretrain_steps, train_steps): """Generates a sampled training schedule. Arguments: pretrain_steps: List, number of pre-training steps per each target. train_steps: List, number of sampled training steps per each target. Returns: Python list of length sum(pretrain_steps + train_steps), containing target numbers per step. """ check.Eq(len(pretrain_steps), len(train_steps)) # Arbitrary seed to make sure the return is deterministic. random.seed(0x31337) tf.logging.info('Determining the training schedule...') target_per_step = [] for target_idx in xrange(len(pretrain_steps)): target_per_step += [target_idx] * pretrain_steps[target_idx] train_steps = list(train_steps) while sum(train_steps) > 0: step = random.randint(0, sum(train_steps) - 1) cumulative_steps = 0 for target_idx in xrange(len(train_steps)): cumulative_steps += train_steps[target_idx] if step < cumulative_steps: break assert train_steps[target_idx] > 0 train_steps[target_idx] -= 1 target_per_step.append(target_idx) tf.logging.info('Training schedule defined!') return target_per_step
def LabelPotentialsFromTokens(tokens, weights): r"""Computes label potentials from tokens and weights. For each batch of token activations, computes a scalar potential for each label as the product between the activations of the source token and the |weights|. Specifically, labels[b,t,l] = \sum_{i} weights[l,i] * tokens[b,t,i] Args: tokens: [B,N,T] tensor of batched token activations. weights: [L,T] matrix of weights. B,N may be dynamic, but L,T must be static. The dtype of all arguments must be compatible. Returns: [B,N,L] tensor of label potentials as defined above, with the same dtype as the arguments. """ check.Eq(tokens.get_shape().ndims, 3, 'tokens must be rank 3') check.Eq(weights.get_shape().ndims, 2, 'weights must be a matrix') num_labels = weights.get_shape().as_list()[0] num_activations = weights.get_shape().as_list()[1] check.NotNone(num_labels, 'unknown number of labels') check.NotNone(num_activations, 'unknown activation dimension') check.Eq(tokens.get_shape().as_list()[2], num_activations, 'activation mismatch between weights and tokens') tokens_shape = tf.shape(tokens) batch_size = tokens_shape[0] num_tokens = tokens_shape[1] check.Same([tokens.dtype.base_dtype, weights.dtype.base_dtype], 'dtype mismatch') # Flatten out the batch dimension so we can use one big matmul(). tokens_bnxt = tf.reshape(tokens, [-1, num_activations]) labels_bnxl = tf.matmul(tokens_bnxt, weights, transpose_b=True) # Restore the batch dimension in the output. labels_bxnxl = tf.reshape(labels_bnxl, [batch_size, num_tokens, num_labels]) return labels_bxnxl
def __init__(self, component): """Initializes weights and layers. Args: component: Parent ComponentBuilderBase object. """ super(BiaffineDigraphNetwork, self).__init__(component) check.Eq(len(self._fixed_feature_dims.items()), 0, 'Expected no fixed features') check.Eq(len(self._linked_feature_dims.items()), 2, 'Expected two linked features') check.In('sources', self._linked_feature_dims, 'Missing required linked feature') check.In('targets', self._linked_feature_dims, 'Missing required linked feature') self._source_dim = self._linked_feature_dims['sources'] self._target_dim = self._linked_feature_dims['targets'] # TODO(googleuser): Make parameter initialization configurable. self._weights = [] self._weights.append( tf.get_variable( 'weights_arc', [self._source_dim, self._target_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._weights.append( tf.get_variable( 'weights_source', [self._source_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._weights.append( tf.get_variable( 'root', [self._source_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._params.extend(self._weights) self._regularized_weights.extend(self._weights) # Negative Layer.dim indicates that the dimension is dynamic. self._layers.append(network_units.Layer(self, 'adjacency', -1))
def __init__(self, component): """Initializes weights and layers. Args: component: Parent ComponentBuilderBase object. """ super(BiaffineDigraphNetwork, self).__init__(component) check.Eq(len(self._fixed_feature_dims.items()), 0, 'Expected no fixed features') check.Eq(len(self._linked_feature_dims.items()), 2, 'Expected two linked features') check.In('sources', self._linked_feature_dims, 'Missing required linked feature') check.In('targets', self._linked_feature_dims, 'Missing required linked feature') self._source_dim = self._linked_feature_dims['sources'] self._target_dim = self._linked_feature_dims['targets'] self._weights = [] self._weights.append( tf.get_variable('weights_arc', [self._source_dim, self._target_dim], tf.float32, tf.orthogonal_initializer())) self._weights.append( tf.get_variable('weights_source', [self._source_dim], tf.float32, tf.zeros_initializer())) self._weights.append( tf.get_variable('root', [self._source_dim], tf.float32, tf.zeros_initializer())) self._params.extend(self._weights) self._regularized_weights.extend(self._weights) # Add runtime hooks for pre-computed weights. self._derived_params.append(self._get_root_weights) self._derived_params.append(self._get_root_bias) # Negative Layer.dim indicates that the dimension is dynamic. self._layers.append(network_units.Layer(component, 'adjacency', -1))
def __init__(self, component): super(BulkBiLSTMNetwork, self).__init__(component) check.In('lengths', self._linked_feature_dims, 'Missing required linked feature') check.Eq(self._linked_feature_dims['lengths'], 1, 'Wrong dimension for "lengths" feature') self._input_dim = self._concatenated_input_dim - 1 # exclude 'lengths' self._output_dim = self.get_layer_size('outputs') tf.logging.info('[%s] Bulk bi-LSTM with input_dim=%d output_dim=%d', component.name, self._input_dim, self._output_dim) # Create one training and inference cell per layer and direction. self._train_cells_forward = self._create_train_cells() self._train_cells_backward = self._create_train_cells() self._inference_cells_forward = self._create_inference_cells() self._inference_cells_backward = self._create_inference_cells() def _bilstm_closure(scope): """Applies the bi-LSTM to placeholder inputs and lengths.""" # Use singleton |stride| and |steps| because their values don't affect the # weight variables. stride, steps = 1, 1 placeholder_inputs = tf.placeholder( dtype=tf.float32, shape=[stride, steps, self._input_dim]) placeholder_lengths = tf.placeholder(dtype=tf.int64, shape=[stride]) # Omit the initial states and sequence lengths for simplicity; they don't # affect the weight variables. tf.contrib.rnn.stack_bidirectional_dynamic_rnn( self._train_cells_forward, self._train_cells_backward, placeholder_inputs, dtype=tf.float32, sequence_length=placeholder_lengths, scope=scope) self._capture_variables_as_params(_bilstm_closure) # Allocate parameters for the initial states. Note that an LSTM state is a # tuple of two substates (c, h), so there are 4 variables per layer. for index, num_units in enumerate(self._hidden_layer_sizes): for direction in ['forward', 'backward']: for substate in ['c', 'h']: self._params.append( tf.get_variable( 'initial_state_%s_%s_%d' % (direction, substate, index), [1, num_units ], # leading 1 for later batch-wise tiling dtype=tf.float32, initializer=tf.constant_initializer(0.0)))
def __init__(self, master, component_spec): """Initializes the feature ID extractor component. Args: master: dragnn.MasterBuilder object. component_spec: dragnn.ComponentSpec proto to be built. """ super(BulkFeatureIdExtractorComponentBuilder, self).__init__( master, component_spec) check.Eq(len(self.spec.linked_feature), 0, 'Linked features are forbidden') for feature_spec in self.spec.fixed_feature: check.Lt(feature_spec.embedding_dim, 0, 'Features must be non-embedded: %s' % feature_spec)
def get_segmenter_corpus(input_data_path, use_text_format): """Reads in a character corpus for segmenting.""" # Read in the documents. tf.logging.info('Reading documents...') if use_text_format: char_corpus = sentence_io.FormatSentenceReader(input_data_path, 'untokenized-text').corpus() else: input_corpus = sentence_io.ConllSentenceReader(input_data_path).corpus() with tf.Session(graph=tf.Graph()) as tmp_session: char_input = gen_parser_ops.char_token_generator(input_corpus) char_corpus = tmp_session.run(char_input) check.Eq(len(input_corpus), len(char_corpus)) return char_corpus
def create(self, fixed_embeddings, linked_embeddings, context_tensor_arrays, attention_tensor, during_training, stride=None): """See base class.""" # NB: This cell pulls the lstm's h and c vectors from context_tensor_arrays # instead of through linked features. check.Eq(len(context_tensor_arrays), 2 * len(self._hidden_layer_sizes), 'require two context tensors per hidden layer') # Rearrange the context tensors into a tuple of LSTM sub-states. length = context_tensor_arrays[0].size() substates = [] for index, num_units in enumerate(self._hidden_layer_sizes): state_c = context_tensor_arrays[2 * index].read(length - 1) state_h = context_tensor_arrays[2 * index + 1].read(length - 1) # Fix shapes that for some reason are not set properly for an unknown # reason. TODO(googleuser): Why are the shapes not set? state_c.set_shape([tf.Dimension(None), num_units]) state_h.set_shape([tf.Dimension(None), num_units]) substates.append(tf.contrib.rnn.LSTMStateTuple(state_c, state_h)) state = tuple(substates) input_tensor = dragnn.get_input_tensor(fixed_embeddings, linked_embeddings) cell = self._train_cell if during_training else self._inference_cell def _cell_closure(scope): """Applies the LSTM cell to the current inputs and state.""" return cell(input_tensor, state, scope) unused_h, state = self._apply_with_captured_variables(_cell_closure) # Return tensors to be put into the tensor arrays / used to compute # objective. output_tensors = [] for new_substate in state: new_c, new_h = new_substate output_tensors.append(new_c) output_tensors.append(new_h) return self._append_base_layers(output_tensors)
def extract_fixed_feature_ids(comp, state, stride): """Extracts fixed feature IDs. Args: comp: Component whose fixed feature IDs we wish to extract. state: Live MasterState object for the component. stride: Tensor containing current batch * beam size. Returns: state handle: Updated state handle to be used after this call. ids: List of [stride * num_steps, 1] feature IDs per channel. Missing IDs (e.g., due to batch padding) are set to -1. """ num_channels = len(comp.spec.fixed_feature) if not num_channels: return state.handle, [] for feature_spec in comp.spec.fixed_feature: check.Eq(feature_spec.size, 1, 'All features must have size=1') check.Lt(feature_spec.embedding_dim, 0, 'All features must be non-embedded') state.handle, indices, ids, _, num_steps = dragnn_ops.bulk_fixed_features( state.handle, component=comp.name, num_channels=num_channels) size = stride * num_steps fixed_ids = [] for channel, feature_spec in enumerate(comp.spec.fixed_feature): tf.logging.info('[%s] Adding fixed feature IDs "%s"', comp.name, feature_spec.name) # The +1 and -1 increments ensure that missing IDs default to -1. # # TODO(googleuser): This formula breaks if multiple IDs are extracted at some # step. Try using tf.unique() to enforce the unique-IDS precondition. sums = tf.unsorted_segment_sum(ids[channel] + 1, indices[channel], size) - 1 sums = tf.expand_dims(sums, axis=1) fixed_ids.append( network_units.NamedTensor(sums, feature_spec.name, dim=1)) return state.handle, fixed_ids
def testCheckEq(self): check.Eq(1, 1, 'foo') with self.assertRaisesRegexp(ValueError, 'bar'): check.Eq(1, 2, 'bar') with self.assertRaisesRegexp(RuntimeError, 'baz'): check.Eq(1, 2, 'baz', RuntimeError)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) check.NotNone(FLAGS.model_dir, '--model_dir is required') check.Ne( FLAGS.pretrain_steps is None, FLAGS.pretrain_epochs is None, 'Exactly one of --pretrain_steps or --pretrain_epochs is required') check.Ne(FLAGS.train_steps is None, FLAGS.train_epochs is None, 'Exactly one of --train_steps or --train_epochs is required') config_path = os.path.join(FLAGS.model_dir, 'config.txt') master_path = os.path.join(FLAGS.model_dir, 'master.pbtxt') hyperparameters_path = os.path.join(FLAGS.model_dir, 'hyperparameters.pbtxt') targets_path = os.path.join(FLAGS.model_dir, 'targets.pbtxt') checkpoint_path = os.path.join(FLAGS.model_dir, 'checkpoints/best') tensorboard_dir = os.path.join(FLAGS.model_dir, 'tensorboard') with tf.gfile.FastGFile(config_path) as config_file: config = collections.defaultdict(bool, ast.literal_eval(config_file.read())) train_corpus_path = config['train_corpus_path'] tune_corpus_path = config['tune_corpus_path'] projectivize_train_corpus = config['projectivize_train_corpus'] master = _read_text_proto(master_path, spec_pb2.MasterSpec) hyperparameters = _read_text_proto(hyperparameters_path, spec_pb2.GridPoint) targets = spec_builder.default_targets_from_spec(master) if tf.gfile.Exists(targets_path): targets = _read_text_proto(targets_path, spec_pb2.TrainingGridSpec).target # Build the TensorFlow graph. graph = tf.Graph() with graph.as_default(): tf.set_random_seed(hyperparameters.seed) builder = graph_builder.MasterBuilder(master, hyperparameters) trainers = [ builder.add_training_from_config(target) for target in targets ] annotator = builder.add_annotation() builder.add_saver() # Read in serialized protos from training data. train_corpus = sentence_io.ConllSentenceReader( train_corpus_path, projectivize=projectivize_train_corpus).corpus() tune_corpus = sentence_io.ConllSentenceReader(tune_corpus_path, projectivize=False).corpus() gold_tune_corpus = tune_corpus # Convert to char-based corpora, if requested. if config['convert_to_char_corpora']: # NB: Do not convert the |gold_tune_corpus|, which should remain word-based # for segmentation evaluation purposes. train_corpus = _convert_to_char_corpus(train_corpus) tune_corpus = _convert_to_char_corpus(tune_corpus) pretrain_steps = _get_steps(FLAGS.pretrain_steps, FLAGS.pretrain_epochs, len(train_corpus)) train_steps = _get_steps(FLAGS.train_steps, FLAGS.train_epochs, len(train_corpus)) check.Eq(len(targets), len(pretrain_steps), 'Length mismatch between training targets and --pretrain_steps') check.Eq(len(targets), len(train_steps), 'Length mismatch between training targets and --train_steps') # Ready to train! tf.logging.info('Training on %d sentences.', len(train_corpus)) tf.logging.info('Tuning on %d sentences.', len(tune_corpus)) tf.logging.info('Creating TensorFlow checkpoint dir...') summary_writer = trainer_lib.get_summary_writer(tensorboard_dir) checkpoint_dir = os.path.dirname(checkpoint_path) if tf.gfile.IsDirectory(checkpoint_dir): tf.gfile.DeleteRecursively(checkpoint_dir) elif tf.gfile.Exists(checkpoint_dir): tf.gfile.Remove(checkpoint_dir) tf.gfile.MakeDirs(checkpoint_dir) with tf.Session(FLAGS.tf_master, graph=graph) as sess: # Make sure to re-initialize all underlying state. sess.run(tf.global_variables_initializer()) trainer_lib.run_training(sess, trainers, annotator, evaluation.parser_summaries, pretrain_steps, train_steps, train_corpus, tune_corpus, gold_tune_corpus, FLAGS.batch_size, summary_writer, FLAGS.report_every, builder.saver, checkpoint_path) tf.logging.info('Best checkpoint written to:\n%s', checkpoint_path)
def main(unused_argv): # Parse the flags containint lists, using regular expressions. # This matches and extracts key=value pairs. component_beam_sizes = re.findall(r'([^=,]+)=(\d+)', FLAGS.inference_beam_size) # This matches strings separated by a comma. Does not return any empty # strings. components_to_locally_normalize = re.findall(r'[^,]+', FLAGS.locally_normalize) # Reads master spec. master_spec = spec_pb2.MasterSpec() with gfile.FastGFile(FLAGS.master_spec) as fin: text_format.Parse(fin.read(), master_spec) # Rewrite resource locations. if FLAGS.resource_dir: for component in master_spec.component: for resource in component.resource: for part in resource.part: part.file_pattern = os.path.join(FLAGS.resource_dir, part.file_pattern) if FLAGS.complete_master_spec: spec_builder.complete_master_spec(master_spec, None, FLAGS.resource_dir) # Graph building. tf.logging.info('Building the graph') g = tf.Graph() with g.as_default(), tf.device('/device:CPU:0'): hyperparam_config = spec_pb2.GridPoint() hyperparam_config.use_moving_average = True builder = graph_builder.MasterBuilder(master_spec, hyperparam_config) annotator = builder.add_annotation() builder.add_saver() tf.logging.info('Reading documents...') input_corpus = sentence_io.ConllSentenceReader(FLAGS.input_file).corpus() with tf.Session(graph=tf.Graph()) as tmp_session: char_input = gen_parser_ops.char_token_generator(input_corpus) char_corpus = tmp_session.run(char_input) check.Eq(len(input_corpus), len(char_corpus)) session_config = tf.ConfigProto(log_device_placement=False, intra_op_parallelism_threads=FLAGS.threads, inter_op_parallelism_threads=FLAGS.threads) with tf.Session(graph=g, config=session_config) as sess: tf.logging.info('Initializing variables...') sess.run(tf.global_variables_initializer()) tf.logging.info('Loading from checkpoint...') sess.run('save/restore_all', {'save/Const:0': FLAGS.checkpoint_file}) tf.logging.info('Processing sentences...') processed = [] start_time = time.time() run_metadata = tf.RunMetadata() for start in range(0, len(char_corpus), FLAGS.max_batch_size): end = min(start + FLAGS.max_batch_size, len(char_corpus)) feed_dict = {annotator['input_batch']: char_corpus[start:end]} for comp, beam_size in component_beam_sizes: feed_dict['%s/InferenceBeamSize:0' % comp] = beam_size for comp in components_to_locally_normalize: feed_dict['%s/LocallyNormalize:0' % comp] = True if FLAGS.timeline_output_file and end == len(char_corpus): serialized_annotations = sess.run( annotator['annotations'], feed_dict=feed_dict, options=tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) trace = timeline.Timeline(step_stats=run_metadata.step_stats) with open(FLAGS.timeline_output_file, 'w') as trace_file: trace_file.write(trace.generate_chrome_trace_format()) else: serialized_annotations = sess.run(annotator['annotations'], feed_dict=feed_dict) processed.extend(serialized_annotations) tf.logging.info('Processed %d documents in %.2f seconds.', len(char_corpus), time.time() - start_time) evaluation.calculate_segmentation_metrics(input_corpus, processed) if FLAGS.output_file: with gfile.GFile(FLAGS.output_file, 'w') as f: for serialized_sentence in processed: sentence = sentence_pb2.Sentence() sentence.ParseFromString(serialized_sentence) f.write(text_format.MessageToString(sentence) + '\n\n')
def LabelPotentialsFromTokenPairs(sources, targets, weights): r"""Computes label potentials from source and target tokens and weights. For each aligned pair of source and target token activations, computes a scalar potential for each label on the arc from the source to the target. Specifically, labels[b,t,l] = \sum_{i,j} sources[b,t,i] * weights[l,i,j] * targets[b,t,j] Args: sources: [B,N,S] tensor of batched source token activations. targets: [B,N,T] tensor of batched target token activations. weights: [L,S,T] tensor of weights. B,N may be dynamic, but L,S,T must be static. The dtype of all arguments must be compatible. Returns: [B,N,L] tensor of label potentials as defined above, with the same dtype as the arguments. """ check.Eq(sources.get_shape().ndims, 3, 'sources must be rank 3') check.Eq(targets.get_shape().ndims, 3, 'targets must be rank 3') check.Eq(weights.get_shape().ndims, 3, 'weights must be rank 3') num_labels = weights.get_shape().as_list()[0] num_source_activations = weights.get_shape().as_list()[1] num_target_activations = weights.get_shape().as_list()[2] check.NotNone(num_labels, 'unknown number of labels') check.NotNone(num_source_activations, 'unknown source activation dimension') check.NotNone(num_target_activations, 'unknown target activation dimension') check.Eq(sources.get_shape().as_list()[2], num_source_activations, 'activation mismatch between weights and source tokens') check.Eq(targets.get_shape().as_list()[2], num_target_activations, 'activation mismatch between weights and target tokens') check.Same([ sources.dtype.base_dtype, targets.dtype.base_dtype, weights.dtype.base_dtype ], 'dtype mismatch') sources_shape = tf.shape(sources) targets_shape = tf.shape(targets) batch_size = sources_shape[0] num_tokens = sources_shape[1] with tf.control_dependencies([ tf.assert_equal(batch_size, targets_shape[0]), tf.assert_equal(num_tokens, targets_shape[1]) ]): # For each token, we must compute a vector-3tensor-vector product. There is # no op for this, but we can use reshape() and matmul() to compute it. # Reshape |weights| and |targets| so we can use a single matmul(). weights_lsxt = tf.reshape( weights, [num_labels * num_source_activations, num_target_activations]) targets_bnxt = tf.reshape(targets, [-1, num_target_activations]) weights_targets_bnxls = tf.matmul(targets_bnxt, weights_lsxt, transpose_b=True) # Restore all dimensions. weights_targets_bxnxlxs = tf.reshape( weights_targets_bnxls, [batch_size, num_tokens, num_labels, num_source_activations]) # Incorporate the source activations. In this case, we perform a batched # matmul() between the trailing [L,S] matrices of the current result and the # trailing [S] vectors of the tokens. sources_bxnx1xs = tf.expand_dims(sources, 2) labels_bxnxlx1 = tf.matmul(weights_targets_bxnxlxs, sources_bxnx1xs, transpose_b=True) labels_bxnxl = tf.squeeze(labels_bxnxlx1, [3]) return labels_bxnxl
def LaplacianMatrix(lengths, arcs, forest=False): r"""Returns the (root-augmented) Laplacian matrix for a batch of digraphs. Args: lengths: [B] vector of input sequence lengths. arcs: [B,M,M] tensor of arc potentials where entry b,t,s is the potential of the arc from s to t in the b'th digraph, while b,t,t is the potential of t as a root. Entries b,t,s where t or s >= lengths[b] are ignored. forest: Whether to produce a Laplacian for trees or forests. Returns: [B,M,M] tensor L with the Laplacian of each digraph, padded with an identity matrix. More concretely, the padding entries (t or s >= lengths[b]) are: L_{b,t,t} = 1.0 L_{b,t,s} = 0.0 Note that this "identity matrix padding" ensures that the determinant of each padded matrix equals the determinant of the unpadded matrix. The non-padding entries (t,s < lengths[b]) depend on whether the Laplacian is constructed for trees or forests. For trees: L_{b,t,0} = arcs[b,t,t] L_{b,t,t} = \sum_{s < lengths[b], t != s} arcs[b,t,s] L_{b,t,s} = -arcs[b,t,s] For forests: L_{b,t,t} = \sum_{s < lengths[b]} arcs[b,t,s] L_{b,t,s} = -arcs[b,t,s] See http://www.aclweb.org/anthology/D/D07/D07-1015.pdf for details, though note that our matrices are transposed from their notation. """ check.Eq(arcs.get_shape().ndims, 3, 'arcs must be rank 3') dtype = arcs.dtype.base_dtype arcs_shape = tf.shape(arcs) batch_size = arcs_shape[0] max_length = arcs_shape[1] with tf.control_dependencies([tf.assert_equal(max_length, arcs_shape[2])]): valid_arc_bxmxm, valid_token_bxm = ValidArcAndTokenMasks(lengths, max_length, dtype=dtype) invalid_token_bxm = tf.constant(1, dtype=dtype) - valid_token_bxm # Zero out all invalid arcs, to avoid polluting bulk summations. arcs_bxmxm = arcs * valid_arc_bxmxm zeros_bxm = tf.zeros([batch_size, max_length], dtype) if not forest: # For trees, extract the root potentials and exclude them from the sums # computed below. roots_bxm = tf.matrix_diag_part(arcs_bxmxm) # only defined for trees arcs_bxmxm = tf.matrix_set_diag(arcs_bxmxm, zeros_bxm) # Sum inbound arc potentials for each target token. These sums will form # the diagonal of the Laplacian matrix. Note that these sums are zero for # invalid tokens, since their arc potentials were masked out above. sums_bxm = tf.reduce_sum(arcs_bxmxm, 2) if forest: # For forests, zero out the root potentials after computing the sums above # so we don't cancel them out when we subtract the arc potentials. arcs_bxmxm = tf.matrix_set_diag(arcs_bxmxm, zeros_bxm) # The diagonal of the result is the combination of the arc sums, which are # non-zero only on valid tokens, and the invalid token indicators, which are # non-zero only on invalid tokens. Note that the latter form the diagonal # of the identity matrix padding. diagonal_bxm = sums_bxm + invalid_token_bxm # Combine sums and negative arc potentials. Note that the off-diagonal # padding entries will be zero thanks to the arc mask. laplacian_bxmxm = tf.matrix_diag(diagonal_bxm) - arcs_bxmxm if not forest: # For trees, replace the first column with the root potentials. roots_bxmx1 = tf.expand_dims(roots_bxm, 2) laplacian_bxmxm = tf.concat([roots_bxmx1, laplacian_bxmxm[:, :, 1:]], 2) return laplacian_bxmxm
def ArcPotentialsFromTokens(source_tokens, target_tokens, weights): r"""Returns arc potentials computed from token activations and weights. For each batch of source and target token activations, computes a scalar potential for each arc as the 3-way product between the activation vectors of the source and target of the arc and the |weights|. Specifically, arc[b,s,t] = \sum_{i,j} source_tokens[b,s,i] * weights[i,j] * target_tokens[b,t,j] Note that the token activations can be extended with bias terms to implement a "biaffine" model (Dozat and Manning, 2017). Args: source_tokens: [B,N,S] tensor of batched activations for the source token in each arc. target_tokens: [B,N,T] tensor of batched activations for the target token in each arc. weights: [S,T] matrix of weights. B,N may be statically-unknown, but S,T must be statically-known. The dtype of all arguments must be compatible. Returns: [B,N,N] tensor A of arc potentials where A_{b,s,t} is the potential of the arc from s to t in batch element b. The dtype of A is the same as that of the arguments. Note that the diagonal entries (i.e., where s==t) represent self-loops and may not be meaningful. """ # All arguments must have statically-known rank. check.Eq(source_tokens.get_shape().ndims, 3, 'source_tokens must be rank 3') check.Eq(target_tokens.get_shape().ndims, 3, 'target_tokens must be rank 3') check.Eq(weights.get_shape().ndims, 2, 'weights must be a matrix') # All activation dimensions must be statically-known. num_source_activations = weights.get_shape().as_list()[0] num_target_activations = weights.get_shape().as_list()[1] check.NotNone(num_source_activations, 'unknown source activation dimension') check.NotNone(num_target_activations, 'unknown target activation dimension') check.Eq(source_tokens.get_shape().as_list()[2], num_source_activations, 'dimension mismatch between weights and source_tokens') check.Eq(target_tokens.get_shape().as_list()[2], num_target_activations, 'dimension mismatch between weights and target_tokens') # All arguments must share the same type. check.Same([ weights.dtype.base_dtype, source_tokens.dtype.base_dtype, target_tokens.dtype.base_dtype ], 'dtype mismatch') source_tokens_shape = tf.shape(source_tokens) target_tokens_shape = tf.shape(target_tokens) batch_size = source_tokens_shape[0] num_tokens = source_tokens_shape[1] with tf.control_dependencies([ tf.assert_equal(batch_size, target_tokens_shape[0]), tf.assert_equal(num_tokens, target_tokens_shape[1]) ]): # Flatten out the batch dimension so we can use one big multiplication. targets_bnxt = tf.reshape(target_tokens, [-1, num_target_activations]) # Matrices are row-major, so we arrange for the RHS argument of each matmul # to have its transpose flag set. That way no copying is required to align # the rows of the LHS with the columns of the RHS. weights_targets_bnxs = tf.matmul(targets_bnxt, weights, transpose_b=True) # The next computation is over pairs of tokens within each batch element, so # restore the batch dimension. weights_targets_bxnxs = tf.reshape( weights_targets_bnxs, [batch_size, num_tokens, num_source_activations]) # Note that this multiplication is repeated across the batch dimension, # instead of being one big multiplication as in the first matmul. There # doesn't seem to be a way to arrange this as a single multiplication given # the pairwise nature of this computation. arcs_bxnxn = tf.matmul(source_tokens, weights_targets_bxnxs, transpose_b=True) return arcs_bxnxn