def fill_from_resources(self, resource_path, tf_master=''): """Fills in feature sizes and vocabularies using SyntaxNet lexicon. Must be called before the spec is ready to be used to build TensorFlow graphs. Requires a SyntaxNet lexicon built at the resource_path. Using the lexicon, this will call the SyntaxNet custom ops to return the number of features and vocabulary sizes based on the FML specifications and the lexicons. It will also compute the number of actions of the transition system. This will often CHECK-fail if the spec doesn't correspond to a valid transition system or feature setup. Args: resource_path: Path to the lexicon. tf_master: TensorFlow master executor (string, defaults to '' to use the local instance). """ check.IsTrue( self.spec.transition_system.registered_name, 'Set a transition system before calling fill_from_resources().') context = lexicon.create_lexicon_context(resource_path) for key, value in self.spec.transition_system.parameters.iteritems(): context.parameter.add(name=key, value=value) context.parameter.add( name='brain_parser_embedding_dims', value=';'.join( [str(x.embedding_dim) for x in self.spec.fixed_feature])) context.parameter.add( name='brain_parser_features', value=';'.join([x.fml for x in self.spec.fixed_feature])) context.parameter.add( name='brain_parser_predicate_maps', value=';'.join(['' for x in self.spec.fixed_feature])) context.parameter.add( name='brain_parser_embedding_names', value=';'.join([x.name for x in self.spec.fixed_feature])) context.parameter.add( name='brain_parser_transition_system', value=self.spec.transition_system.registered_name) # Propagate information from SyntaxNet C++ backends into the DRAGNN # self.spec. with tf.Session(tf_master) as sess: feature_sizes, domain_sizes, _, num_actions = sess.run( gen_parser_ops.feature_size(task_context_str=str(context))) self.spec.num_actions = int(num_actions) for i in xrange(len(feature_sizes)): self.spec.fixed_feature[i].size = int(feature_sizes[i]) self.spec.fixed_feature[i].vocabulary_size = int(domain_sizes[i]) for i in xrange(len(self.spec.linked_feature)): self.spec.linked_feature[i].size = len( self.spec.linked_feature[i].fml.split(' ')) for resource in context.input: self.spec.resource.add(name=resource.name).part.add( file_pattern=resource.part[0].file_pattern)
def main(unused_argv): logging.set_verbosity(logging.INFO) with tf.Session() as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=FLAGS.task_context, arg_prefix=FLAGS.arg_prefix)) with tf.Session() as sess: EvalForever(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
def __init__(self, action=None): self._sess = tf.Session() self._variable_scope = action.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) arg_prefix = action task_context = task_context_path print("_init: 0") if action == "brain_tagger": hidden_layer_sizes = [64] model_path = tagger_params_path output = 'output-to-file' input = 'input-from-file' elif action == "brain_parser": hidden_layer_sizes = [512, 512] model_path = parser_params_path output = 'output-to-file-conll' input = 'input-from-file-conll' else: raise Exception("Do not recognize action %s" % action) print("_init: 1") with tf.variable_scope(self._variable_scope): feature_sizes, domain_sizes, embedding_dims, num_actions = self._sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=arg_prefix)) print("_init: 2") beam_size = 8 max_steps = 1000 batch_size = 1024 slim_model = True self._parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=arg_prefix, beam_size=beam_size, max_steps=max_steps) print("_init: 3") self._parser.AddEvaluation(task_context, batch_size, corpus_name=input, evaluation_max_steps=max_steps) print("_init: 4") # with tf.Session() as sess: self._sess.run(self._parser.inits.values()) self._parser.AddSaver(slim_model) self._parser.saver.restore(self._sess, model_path) self._task_context = task_context self._output = 'stdout-conll' #output print("_init: Done")
def Parse(sess, text): """Parses the text""" task_context = TASK_CONTEXT feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix="brain_parser")) t = time.time() hidden_layer_sizes = [512, 512] parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix="brain_parser", beam_size=8, max_steps=1000) parser.AddEvaluation(task_context, 1024, corpus_name="direct-conll", value=text, evaluation_max_steps=1000) parser.AddSaver(True) sess.run(parser.inits.values()) parser.saver.restore(sess, MODEL_BASE + "parser-params") sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.variable_sink(sink_documents, corpus_name="stdout-conll", task_context=task_context) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) logging.info("TF DOCUMENTS: %s" % tf_documents) if len(tf_documents): num_documents += len(tf_documents) result = sess.run(sink, feed_dict={sink_documents: tf_documents}) return result num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break
def Eval(sess): logging.info('***************%s', FLAGS.arg_prefix) """Builds and evaluates a network.""" task_context = FLAGS.task_context task_context = RewriteContext(task_context) logging.info(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context[0], arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info('Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) for c in task_context: parser.AddEvaluation(c, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=c, corpus_name=FLAGS.output) run_parser(sess, parser, sink, sink_documents)
def __init__(self, processconfig): self._sess = tf.Session() self._pg = processconfig self.stdout_file_path = os.path.join( os.path.dirname(self._pg.custom_file), 'stdout.tmp') # File where syntaxnet output will be written """ Builds and evaluates a network. """ self.task_context = self._pg.task_context if self._pg.resource_dir: self.task_context = RewriteContext(self.task_context, self._pg.resource_dir) # Initiate custom tmp file with open(self._pg.custom_file, 'w') as f: pass self.fdescr_ = open(self._pg.custom_file, 'r') self.fdescr_.close() with tf.variable_scope(self._pg.variable_scope): feature_sizes, domain_sizes, embedding_dims, num_actions = self._sess.run( gen_parser_ops.feature_size(task_context=self.task_context, arg_prefix=self._pg.arg_prefix)) if self._pg.graph_builder_ == 'greedy': self._parser = graph_builder.GreedyParser( num_actions, feature_sizes, domain_sizes, embedding_dims, self._pg.hidden_layer_sizes, gate_gradients=True, arg_prefix=self._pg.arg_prefix) else: self._parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, self._pg.hidden_layer_sizes, gate_gradients=True, arg_prefix=self._pg.arg_prefix, beam_size=self._pg.beam_size, max_steps=self._pg.max_steps) self._parser.AddEvaluation(self.task_context, self._pg.batch_size, corpus_name=self._pg.input_, evaluation_max_steps=self._pg.max_steps) self._parser.AddSaver(self._pg.slim_model) self._sess.run(self._parser.inits.values()) self._parser.saver.restore(self._sess, self._pg.model_path)
def __init__(self, task_context, arg_prefix, hidden_layer_sizes, model_dir, model_path, in_corpus_name, out_corpus_name, batch_size, max_steps, use_slim_model=True): self.model_dir = model_dir self.task_context, self.in_name = self.RewriteContext( task_context, in_corpus_name) self.arg_prefix = arg_prefix self.graph = tf.Graph() self.in_corpus_name = in_corpus_name self.out_corpus_name = out_corpus_name with self.graph.as_default(): self.sess = tf.Session() feature_sizes, domain_sizes, embedding_dims, num_actions = self.sess.run( gen_parser_ops.feature_size(task_context=self.task_context, arg_prefix=self.arg_prefix)) self.feature_sizes = feature_sizes self.domain_sizes = domain_sizes self.embedding_dims = embedding_dims self.num_actions = num_actions self.hidden_layer_sizes = map(int, hidden_layer_sizes.split(',')) self.batch_size = batch_size self.max_steps = max_steps self.use_slim_model = use_slim_model with self.graph.as_default(): self.parser = graph_builder.GreedyParser( self.num_actions, self.feature_sizes, self.domain_sizes, self.embedding_dims, self.hidden_layer_sizes, gate_gradients=True, arg_prefix=self.arg_prefix) self.parser.AddEvaluation(self.task_context, self.batch_size, corpus_name=self.in_corpus_name, evaluation_max_steps=self.max_steps) self.parser.AddSaver(self.use_slim_model) self.sess.run(self.parser.inits.values()) self.parser.saver.restore(self.sess, os.path.join(self.model_dir, model_path)) self.parser.AddEvaluation(self.task_context, self.batch_size, corpus_name=self.in_corpus_name, evaluation_max_steps=self.max_steps)
def setUp(self): # Creates a task context with the correct testing paths. initial_task_context = os.path.join(FLAGS.test_srcdir, "syntaxnet/" "testdata/context.pbtxt") self._task_context = os.path.join(FLAGS.test_tmpdir, "context.pbtxt") with open(initial_task_context, "r") as fin: with open(self._task_context, "w") as fout: fout.write(fin.read().replace("SRCDIR", FLAGS.test_srcdir).replace("OUTPATH", FLAGS.test_tmpdir)) # Creates necessary term maps. with self.test_session() as sess: gen_parser_ops.lexicon_builder(task_context=self._task_context, corpus_name="training-corpus").run() self._num_features, self._num_feature_ids, _, self._num_actions = sess.run( gen_parser_ops.feature_size(task_context=self._task_context, arg_prefix="brain_parser") )
def setUp(self): # Creates a task context with the correct testing paths. initial_task_context = os.path.join(FLAGS.test_srcdir, 'syntaxnet/' 'testdata/context.pbtxt') self._task_context = os.path.join(FLAGS.test_tmpdir, 'context.pbtxt') with open(initial_task_context, 'r') as fin: with open(self._task_context, 'w') as fout: fout.write(fin.read().replace('SRCDIR', FLAGS.test_srcdir) .replace('OUTPATH', FLAGS.test_tmpdir)) # Creates necessary term maps. with self.test_session() as sess: gen_parser_ops.lexicon_builder(task_context=self._task_context, corpus_name='training-corpus').run() self._num_features, self._num_feature_ids, _, self._num_actions = ( sess.run(gen_parser_ops.feature_size(task_context=self._task_context, arg_prefix='brain_parser')))
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) # Rewrite context. RewriteContext() # Creates necessary term maps. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') with tf.Session(FLAGS.tf_master) as sess: gen_parser_ops.lexicon_builder(task_context=OutputPath('context'), corpus_name=FLAGS.training_corpus).run() with tf.Session(FLAGS.tf_master) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=OutputPath('context'), arg_prefix=FLAGS.arg_prefix)) # Well formed and projectivize. if FLAGS.projectivize_training_set: logging.info('Preprocessing...') with tf.Session(FLAGS.tf_master) as sess: source, last = gen_parser_ops.document_source( task_context=OutputPath('context'), batch_size=FLAGS.batch_size, corpus_name=FLAGS.training_corpus) sink = gen_parser_ops.document_sink( task_context=OutputPath('context'), corpus_name='projectivized-training-corpus', documents=gen_parser_ops.projectivize_filter( gen_parser_ops.well_formed_filter(source, task_context=OutputPath( 'context')), task_context=OutputPath('context'))) while True: tf_last, _ = sess.run([last, sink]) if tf_last: break logging.info('Training...') with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) # Rewrite context. RewriteContext() # Creates necessary term maps. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') with tf.Session(FLAGS.tf_master) as sess: gen_parser_ops.lexicon_builder( task_context=OutputPath('context'), corpus_name=FLAGS.training_corpus).run() with tf.Session(FLAGS.tf_master) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=OutputPath('context'), arg_prefix=FLAGS.arg_prefix)) # Well formed and projectivize. if FLAGS.projectivize_training_set: logging.info('Preprocessing...') with tf.Session(FLAGS.tf_master) as sess: source, last = gen_parser_ops.document_source( task_context=OutputPath('context'), batch_size=FLAGS.batch_size, corpus_name=FLAGS.training_corpus) sink = gen_parser_ops.document_sink( task_context=OutputPath('context'), corpus_name='projectivized-training-corpus', documents=gen_parser_ops.projectivize_filter( gen_parser_ops.well_formed_filter( source, task_context=OutputPath('context')), task_context=OutputPath('context'))) while True: tf_last, _ = sess.run([last, sink]) if tf_last: break logging.info('Training...') with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
def __init__(self, cfg): super(ProcessorSyntaxNet, self).__init__() self.parser_ = None self.task_context_ = RewriteContext(task_context_file) self.sess_ = tf.Session() self.cfg_ = cfg with open(self.cfg_.custom_file_path, 'w') as f: pass self.fdescr_ = open(self.cfg_.custom_file_path, 'r') hidden_layer_sizes = map(int, self.cfg_.hidden_layer_str.split(',')) with tf.variable_scope(self.cfg_.variable_scope): feature_sizes, domain_sizes, embedding_dims, num_actions = self.sess_.run( gen_parser_ops.feature_size(task_context=self.task_context_, arg_prefix=self.cfg_.arg_prefix)) self.parser_ = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=self.cfg_.arg_prefix, beam_size=self.cfg_.beam_size, max_steps=self.cfg_.max_steps) self.parser_.AddEvaluation( self.task_context_, self.cfg_.batch_size, corpus_name=self.cfg_.input_str, evaluation_max_steps=self.cfg_.max_steps) self.parser_.AddSaver(self.cfg_.slim_model) self.sess_.run(self.parser_.inits.values()) self.parser_.saver.restore(self.sess_, self.cfg_.model_path) self.parse(self.cfg_.init_line)
def MakeGraph(self, max_steps=10, beam_size=2, batch_size=1, **kwargs): """Constructs a structured learning graph.""" assert max_steps > 0, 'Empty network not supported.' logging.info('MakeGraph + %s', kwargs) with self.test_session(graph=tf.Graph()) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=self._task_context)) embedding_dims = [8, 8, 8] hidden_layer_sizes = [] learning_rate = 0.01 builder = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, seed=1, max_steps=max_steps, beam_size=beam_size, gate_gradients=True, use_locking=True, use_averaging=False, check_parameters=False, **kwargs) builder.AddTraining(self._task_context, batch_size, learning_rate=learning_rate, decay_steps=1000, momentum=0.9, corpus_name='training-corpus') builder.AddEvaluation(self._task_context, batch_size, evaluation_max_steps=25, corpus_name=None) builder.training['inits'] = tf.group(*builder.inits.values(), name='inits') return builder
def EvalForever(sess): """Builds and evaluates a network.""" task_context = FLAGS.task_context if FLAGS.resource_dir: task_context = RewriteContext(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info( 'Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) while True: if not Eval(sess, parser, task_context): break
text_format.Merge(fin.read(), context) for resource in context.input: for part in resource.part: if part.file_pattern != '-': part.file_pattern = os.path.join(resource_dir, part.file_pattern) with tempfile.NamedTemporaryFile(delete=False) as fout: fout.write(str(context)) return fout.name sess = tf.Session() task_context = RewriteContext(context_path) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=tagger_arg_prefix)) hidden_layer_sizes = map(int, tagger_hidden_layer_sizes.split(',')) tagger = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=tagger_arg_prefix, beam_size=beam_size, max_steps=max_steps) tagger.AddEvaluation(task_context, batch_size, corpus_name=input_style, evaluation_max_steps=max_steps)
def Eval(sess): """Builds and evaluates a network.""" task_context = FLAGS.task_context if FLAGS.resource_dir: task_context = RewriteContext(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) LOGGING.info( 'Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name=FLAGS.output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): LOGGING.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break LOGGING.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens LOGGING.info('num correct tokens: %d', num_correct) LOGGING.info('total tokens: %d', num_tokens) LOGGING.info( 'Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)
def Eval(sess): """Builds and evaluates a network.""" task_context = FLAGS.task_context if FLAGS.resource_dir: task_context = RewriteContext(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info('Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name=FLAGS.output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): logging.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break logging.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens logging.info('num correct tokens: %d', num_correct) logging.info('total tokens: %d', num_tokens) logging.info('Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)
def GetFeatureSize(task_context, arg_prefix): with tf.variable_scope("fs_" + arg_prefix): with tf.Session() as sess: return sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=arg_prefix))
def _perform_action(action=None): arg_prefix = action task_context = task_context_path if action == "brain_tagger": hidden_layer_sizes = [64] model_path = tagger_params_path output = 'output-to-file' input = 'input-from-file' elif action == "brain_parser": hidden_layer_sizes = [512, 512] model_path = parser_params_path output = 'output-to-file-conll' input = 'input-from-file-conll' else: raise Exception("Do not recognize action %s" % action) with tf.Session() as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=arg_prefix)) beam_size = 8 max_steps = 1000 batch_size = 1024 slim_model = True parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=arg_prefix, beam_size=beam_size, max_steps=max_steps) parser.AddEvaluation(task_context, batch_size, corpus_name=input, evaluation_max_steps=max_steps) with tf.Session() as sess: parser.AddSaver(slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name=output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): logging.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break logging.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens logging.info('num correct tokens: %d', num_correct) logging.info('total tokens: %d', num_tokens) logging.info( 'Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)