def testParsingReaderOp(self): # Runs the reader over the test input for two epochs. num_steps_a = 0 num_actions = 0 num_word_ids = 0 num_tag_ids = 0 num_label_ids = 0 batch_size = 10 with self.test_session() as sess: (words, tags, labels), epochs, gold_actions = (gen_parser_ops.gold_parse_reader( self._task_context, 3, batch_size, corpus_name='training-corpus')) while True: tf_gold_actions, tf_epochs, tf_words, tf_tags, tf_labels = ( sess.run([gold_actions, epochs, words, tags, labels])) num_steps_a += 1 num_actions = max(num_actions, max(tf_gold_actions) + 1) num_word_ids = max(num_word_ids, self.GetMaxId(tf_words) + 1) num_tag_ids = max(num_tag_ids, self.GetMaxId(tf_tags) + 1) num_label_ids = max(num_label_ids, self.GetMaxId(tf_labels) + 1) self.assertIn(tf_epochs, [0, 1, 2]) if tf_epochs > 1: break # Runs the reader again, this time with a lot of added graph nodes. num_steps_b = 0 with self.test_session() as sess: num_features = [6, 6, 4] num_feature_ids = [num_word_ids, num_tag_ids, num_label_ids] embedding_sizes = [8, 8, 8] hidden_layer_sizes = [32, 32] # Here we aim to test the iteration of the reader op in a complex network, # not the GraphBuilder. parser = graph_builder.GreedyParser(num_actions, num_features, num_feature_ids, embedding_sizes, hidden_layer_sizes) parser.AddTraining(self._task_context, batch_size, corpus_name='training-corpus') sess.run(list(parser.inits.values())) while True: tf_epochs, tf_cost, _ = sess.run([ parser.training['epochs'], parser.training['cost'], parser.training['train_op'] ]) num_steps_b += 1 self.assertGreaterEqual(tf_cost, 0) self.assertIn(tf_epochs, [0, 1, 2]) if tf_epochs > 1: break # Assert that the two runs made the exact same number of steps. logging.info('Number of steps in the two runs: %d, %d', num_steps_a, num_steps_b) self.assertEqual(num_steps_a, num_steps_b)
def MakeBuilder(self, use_averaging=True, **kw_args): # Set the seed and gate_gradients to ensure reproducibility. return graph_builder.GreedyParser(self._num_actions, self._num_features, self._num_feature_ids, embedding_sizes=[8, 8, 8], hidden_layer_sizes=[32, 32], seed=42, gate_gradients=True, use_averaging=use_averaging, **kw_args)
def Eval(sess): logging.info('***************%s', FLAGS.arg_prefix) """Builds and evaluates a network.""" task_context = FLAGS.task_context task_context = RewriteContext(task_context) logging.info(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context[0], arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info('Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) for c in task_context: parser.AddEvaluation(c, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=c, corpus_name=FLAGS.output) run_parser(sess, parser, sink, sink_documents)
def __init__(self, processconfig): self._sess = tf.Session() self._pg = processconfig self.stdout_file_path = os.path.join( os.path.dirname(self._pg.custom_file), 'stdout.tmp') # File where syntaxnet output will be written """ Builds and evaluates a network. """ self.task_context = self._pg.task_context if self._pg.resource_dir: self.task_context = RewriteContext(self.task_context, self._pg.resource_dir) # Initiate custom tmp file with open(self._pg.custom_file, 'w') as f: pass self.fdescr_ = open(self._pg.custom_file, 'r') self.fdescr_.close() with tf.variable_scope(self._pg.variable_scope): feature_sizes, domain_sizes, embedding_dims, num_actions = self._sess.run( gen_parser_ops.feature_size(task_context=self.task_context, arg_prefix=self._pg.arg_prefix)) if self._pg.graph_builder_ == 'greedy': self._parser = graph_builder.GreedyParser( num_actions, feature_sizes, domain_sizes, embedding_dims, self._pg.hidden_layer_sizes, gate_gradients=True, arg_prefix=self._pg.arg_prefix) else: self._parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, self._pg.hidden_layer_sizes, gate_gradients=True, arg_prefix=self._pg.arg_prefix, beam_size=self._pg.beam_size, max_steps=self._pg.max_steps) self._parser.AddEvaluation(self.task_context, self._pg.batch_size, corpus_name=self._pg.input_, evaluation_max_steps=self._pg.max_steps) self._parser.AddSaver(self._pg.slim_model) self._sess.run(self._parser.inits.values()) self._parser.saver.restore(self._sess, self._pg.model_path)
def __init__(self, task_context, arg_prefix, hidden_layer_sizes, model_dir, model_path, in_corpus_name, out_corpus_name, batch_size, max_steps, use_slim_model=True): self.model_dir = model_dir self.task_context, self.in_name = self.RewriteContext( task_context, in_corpus_name) self.arg_prefix = arg_prefix self.graph = tf.Graph() self.in_corpus_name = in_corpus_name self.out_corpus_name = out_corpus_name with self.graph.as_default(): self.sess = tf.Session() feature_sizes, domain_sizes, embedding_dims, num_actions = self.sess.run( gen_parser_ops.feature_size(task_context=self.task_context, arg_prefix=self.arg_prefix)) self.feature_sizes = feature_sizes self.domain_sizes = domain_sizes self.embedding_dims = embedding_dims self.num_actions = num_actions self.hidden_layer_sizes = map(int, hidden_layer_sizes.split(',')) self.batch_size = batch_size self.max_steps = max_steps self.use_slim_model = use_slim_model with self.graph.as_default(): self.parser = graph_builder.GreedyParser( self.num_actions, self.feature_sizes, self.domain_sizes, self.embedding_dims, self.hidden_layer_sizes, gate_gradients=True, arg_prefix=self.arg_prefix) self.parser.AddEvaluation(self.task_context, self.batch_size, corpus_name=self.in_corpus_name, evaluation_max_steps=self.max_steps) self.parser.AddSaver(self.use_slim_model) self.sess.run(self.parser.inits.values()) self.parser.saver.restore(self.sess, os.path.join(self.model_dir, model_path)) self.parser.AddEvaluation(self.task_context, self.batch_size, corpus_name=self.in_corpus_name, evaluation_max_steps=self.max_steps)
def EvalForever(sess, num_actions, feature_sizes, domain_sizes, embedding_dims): """Builds and evaluates a network. Args: sess: tensorflow session to use num_actions: number of possible golden actions feature_sizes: size of each feature vector domain_sizes: number of possible feature ids in each feature vector embedding_dims: embedding dimension for each feature group """ t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info('Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) task_context = FLAGS.task_context while True: if not Eval(sess, parser, task_context): break
def EvalForever(sess): """Builds and evaluates a network.""" task_context = FLAGS.task_context if FLAGS.resource_dir: task_context = RewriteContext(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info( 'Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) while True: if not Eval(sess, parser, task_context): break
def Eval(sess): """Builds and evaluates a network.""" task_context = FLAGS.task_context if FLAGS.resource_dir: task_context = RewriteContext(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) LOGGING.info( 'Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name=FLAGS.output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): LOGGING.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break LOGGING.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens LOGGING.info('num correct tokens: %d', num_correct) LOGGING.info('total tokens: %d', num_tokens) LOGGING.info( 'Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)
def Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims): """Builds and trains the network. Args: sess: tensorflow session to use. num_actions: number of possible golden actions. feature_sizes: size of each feature vector. domain_sizes: number of possible feature ids in each feature vector. embedding_dims: embedding dimension to use for each feature group. """ t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info( 'Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, seed=int(FLAGS.seed), gate_gradients=True, averaging_decay=FLAGS.averaging_decay, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, seed=int(FLAGS.seed), gate_gradients=True, averaging_decay=FLAGS.averaging_decay, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) task_context = OutputPath('context') if FLAGS.word_embeddings is not None: parser.AddPretrainedEmbeddings(0, FLAGS.word_embeddings, task_context) corpus_name = ('projectivized-training-corpus' if FLAGS.projectivize_training_set else FLAGS.training_corpus) parser.AddTraining(task_context, FLAGS.batch_size, learning_rate=FLAGS.learning_rate, momentum=FLAGS.momentum, decay_steps=FLAGS.decay_steps, corpus_name=corpus_name) parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.tuning_corpus) parser.AddSaver(FLAGS.slim_model) # Save graph. if FLAGS.output_path: with gfile.FastGFile(OutputPath('graph'), 'w') as f: f.write(sess.graph_def.SerializeToString()) logging.info('Initializing...') num_epochs = 0 cost_sum = 0.0 num_steps = 0 best_eval_metric = 0.0 sess.run(parser.inits.values()) if FLAGS.pretrained_params is not None: logging.info('Loading pretrained params from %s', FLAGS.pretrained_params) feed_dict = {'save/Const:0': FLAGS.pretrained_params} targets = [] for node in sess.graph_def.node: if (node.name.startswith('save/Assign') and node.input[0] in FLAGS.pretrained_params_names.split(',')): logging.info('Loading %s with op %s', node.input[0], node.name) targets.append(node.name) sess.run(targets, feed_dict=feed_dict) logging.info('Training...') while num_epochs < FLAGS.num_epochs: tf_epochs, tf_cost, _ = sess.run([ parser.training['epochs'], parser.training['cost'], parser.training['train_op'] ]) num_epochs = tf_epochs num_steps += 1 cost_sum += tf_cost if num_steps % FLAGS.report_every == 0: logging.info( 'Epochs: %d, num steps: %d, ' 'seconds elapsed: %.2f, avg cost: %.2f, ', num_epochs, num_steps, time.time() - t, cost_sum / FLAGS.report_every) cost_sum = 0.0 if num_steps % FLAGS.checkpoint_every == 0: best_eval_metric = Eval(sess, parser, num_steps, best_eval_metric)