Esempio n. 1
0
  def fill_from_resources(self, resource_path, tf_master=''):
    """Fills in feature sizes and vocabularies using SyntaxNet lexicon.

    Must be called before the spec is ready to be used to build TensorFlow
    graphs. Requires a SyntaxNet lexicon built at the resource_path. Using the
    lexicon, this will call the SyntaxNet custom ops to return the number of
    features and vocabulary sizes based on the FML specifications and the
    lexicons. It will also compute the number of actions of the transition
    system.

    This will often CHECK-fail if the spec doesn't correspond to a valid
    transition system or feature setup.

    Args:
      resource_path: Path to the lexicon.
      tf_master: TensorFlow master executor (string, defaults to '' to use the
        local instance).
    """
    check.IsTrue(
        self.spec.transition_system.registered_name,
        'Set a transition system before calling fill_from_resources().')

    context = lexicon.create_lexicon_context(resource_path)
    for key, value in self.spec.transition_system.parameters.iteritems():
      context.parameter.add(name=key, value=value)

    context.parameter.add(
        name='brain_parser_embedding_dims',
        value=';'.join(
            [str(x.embedding_dim) for x in self.spec.fixed_feature]))
    context.parameter.add(
        name='brain_parser_features',
        value=';'.join([x.fml for x in self.spec.fixed_feature]))
    context.parameter.add(
        name='brain_parser_predicate_maps',
        value=';'.join(['' for x in self.spec.fixed_feature]))
    context.parameter.add(
        name='brain_parser_embedding_names',
        value=';'.join([x.name for x in self.spec.fixed_feature]))
    context.parameter.add(
        name='brain_parser_transition_system',
        value=self.spec.transition_system.registered_name)

    # Propagate information from SyntaxNet C++ backends into the DRAGNN
    # self.spec.
    with tf.Session(tf_master) as sess:
      feature_sizes, domain_sizes, _, num_actions = sess.run(
          gen_parser_ops.feature_size(task_context_str=str(context)))
      self.spec.num_actions = int(num_actions)
      for i in xrange(len(feature_sizes)):
        self.spec.fixed_feature[i].size = int(feature_sizes[i])
        self.spec.fixed_feature[i].vocabulary_size = int(domain_sizes[i])

    for i in xrange(len(self.spec.linked_feature)):
      self.spec.linked_feature[i].size = len(
          self.spec.linked_feature[i].fml.split(' '))

    for resource in context.input:
      self.spec.resource.add(name=resource.name).part.add(
          file_pattern=resource.part[0].file_pattern)
Esempio n. 2
0
  def fill_from_resources(self, resource_path, tf_master=''):
    """Fills in feature sizes and vocabularies using SyntaxNet lexicon.

    Must be called before the spec is ready to be used to build TensorFlow
    graphs. Requires a SyntaxNet lexicon built at the resource_path. Using the
    lexicon, this will call the SyntaxNet custom ops to return the number of
    features and vocabulary sizes based on the FML specifications and the
    lexicons. It will also compute the number of actions of the transition
    system.

    This will often CHECK-fail if the spec doesn't correspond to a valid
    transition system or feature setup.

    Args:
      resource_path: Path to the lexicon.
      tf_master: TensorFlow master executor (string, defaults to '' to use the
        local instance).
    """
    check.IsTrue(
        self.spec.transition_system.registered_name,
        'Set a transition system before calling fill_from_resources().')

    context = lexicon.create_lexicon_context(resource_path)
    for key, value in self.spec.transition_system.parameters.iteritems():
      context.parameter.add(name=key, value=value)

    context.parameter.add(
        name='brain_parser_embedding_dims',
        value=';'.join(
            [str(x.embedding_dim) for x in self.spec.fixed_feature]))
    context.parameter.add(
        name='brain_parser_features',
        value=';'.join([x.fml for x in self.spec.fixed_feature]))
    context.parameter.add(
        name='brain_parser_predicate_maps',
        value=';'.join(['' for x in self.spec.fixed_feature]))
    context.parameter.add(
        name='brain_parser_embedding_names',
        value=';'.join([x.name for x in self.spec.fixed_feature]))
    context.parameter.add(
        name='brain_parser_transition_system',
        value=self.spec.transition_system.registered_name)

    # Propagate information from SyntaxNet C++ backends into the DRAGNN
    # self.spec.
    with tf.Session(tf_master) as sess:
      feature_sizes, domain_sizes, _, num_actions = sess.run(
          gen_parser_ops.feature_size(task_context_str=str(context)))
      self.spec.num_actions = int(num_actions)
      for i in xrange(len(feature_sizes)):
        self.spec.fixed_feature[i].size = int(feature_sizes[i])
        self.spec.fixed_feature[i].vocabulary_size = int(domain_sizes[i])

    for i in xrange(len(self.spec.linked_feature)):
      self.spec.linked_feature[i].size = len(
          self.spec.linked_feature[i].fml.split(' '))

    for resource in context.input:
      self.spec.resource.add(name=resource.name).part.add(
          file_pattern=resource.part[0].file_pattern)
Esempio n. 3
0
def main(unused_argv):
  logging.set_verbosity(logging.INFO)
  with tf.Session() as sess:
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=FLAGS.task_context,
                                    arg_prefix=FLAGS.arg_prefix))

  with tf.Session() as sess:
    EvalForever(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
Esempio n. 4
0
    def __init__(self, action=None):
        self._sess = tf.Session()
        self._variable_scope = action.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(6))
        arg_prefix = action
        task_context = task_context_path
        print("_init: 0")
        if action == "brain_tagger":
            hidden_layer_sizes = [64]
            model_path = tagger_params_path
            output = 'output-to-file'
            input = 'input-from-file'
        elif action == "brain_parser":
            hidden_layer_sizes = [512, 512]
            model_path = parser_params_path
            output = 'output-to-file-conll'
            input = 'input-from-file-conll'
        else:
            raise Exception("Do not recognize action %s" % action)

        print("_init: 1")
        with tf.variable_scope(self._variable_scope):
            feature_sizes, domain_sizes, embedding_dims, num_actions = self._sess.run(
                gen_parser_ops.feature_size(task_context=task_context,
                                            arg_prefix=arg_prefix))
            print("_init: 2")
            beam_size = 8
            max_steps = 1000
            batch_size = 1024
            slim_model = True

            self._parser = structured_graph_builder.StructuredGraphBuilder(
                num_actions,
                feature_sizes,
                domain_sizes,
                embedding_dims,
                hidden_layer_sizes,
                gate_gradients=True,
                arg_prefix=arg_prefix,
                beam_size=beam_size,
                max_steps=max_steps)

            print("_init: 3")
            self._parser.AddEvaluation(task_context,
                                       batch_size,
                                       corpus_name=input,
                                       evaluation_max_steps=max_steps)
            print("_init: 4")
            # with tf.Session() as sess:
            self._sess.run(self._parser.inits.values())
            self._parser.AddSaver(slim_model)
            self._parser.saver.restore(self._sess, model_path)

            self._task_context = task_context
            self._output = 'stdout-conll'  #output
        print("_init: Done")
Esempio n. 5
0
def Parse(sess, text):
    """Parses the text"""
    task_context = TASK_CONTEXT
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=task_context,
                                    arg_prefix="brain_parser"))
    t = time.time()
    hidden_layer_sizes = [512, 512]

    parser = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        gate_gradients=True,
        arg_prefix="brain_parser",
        beam_size=8,
        max_steps=1000)

    parser.AddEvaluation(task_context,
                         1024,
                         corpus_name="direct-conll",
                         value=text,
                         evaluation_max_steps=1000)

    parser.AddSaver(True)
    sess.run(parser.inits.values())
    parser.saver.restore(sess, MODEL_BASE + "parser-params")

    sink_documents = tf.placeholder(tf.string)
    sink = gen_parser_ops.variable_sink(sink_documents,
                                        corpus_name="stdout-conll",
                                        task_context=task_context)
    t = time.time()
    num_epochs = None
    num_tokens = 0
    num_correct = 0
    num_documents = 0
    while True:
        tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
            parser.evaluation['epochs'],
            parser.evaluation['eval_metrics'],
            parser.evaluation['documents'],
        ])
        logging.info("TF DOCUMENTS: %s" % tf_documents)
        if len(tf_documents):
            num_documents += len(tf_documents)
            result = sess.run(sink, feed_dict={sink_documents: tf_documents})
            return result

        num_tokens += tf_eval_metrics[0]
        num_correct += tf_eval_metrics[1]
        if num_epochs is None:
            num_epochs = tf_eval_epochs
        elif num_epochs < tf_eval_epochs:
            break
def Eval(sess):
  logging.info('***************%s', FLAGS.arg_prefix)
  """Builds and evaluates a network."""
  task_context = FLAGS.task_context
  task_context = RewriteContext(task_context)
  logging.info(task_context)

  feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
      gen_parser_ops.feature_size(task_context=task_context[0],
                                  arg_prefix=FLAGS.arg_prefix))

  t = time.time()
  hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
  
  logging.info('Building training network with parameters: feature_sizes: %s '
               'domain_sizes: %s', feature_sizes, domain_sizes)
  if FLAGS.graph_builder == 'greedy':
    parser = graph_builder.GreedyParser(num_actions,
                                        feature_sizes,
                                        domain_sizes,
                                        embedding_dims,
                                        hidden_layer_sizes,
                                        gate_gradients=True,
                                        arg_prefix=FLAGS.arg_prefix)
  else:
    parser = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        gate_gradients=True,
        arg_prefix=FLAGS.arg_prefix,
        beam_size=FLAGS.beam_size,
        max_steps=FLAGS.max_steps)

  for c in task_context:
    parser.AddEvaluation(c,
                         FLAGS.batch_size,
                         corpus_name=FLAGS.input,
                         evaluation_max_steps=FLAGS.max_steps)


    parser.AddSaver(FLAGS.slim_model)
    sess.run(parser.inits.values())
    parser.saver.restore(sess, FLAGS.model_path)

    sink_documents = tf.placeholder(tf.string)
    sink = gen_parser_ops.document_sink(sink_documents,
                                        task_context=c,
                                        corpus_name=FLAGS.output)

    run_parser(sess, parser, sink, sink_documents)
Esempio n. 7
0
    def __init__(self, processconfig):
        self._sess = tf.Session()
        self._pg = processconfig
        self.stdout_file_path = os.path.join(
            os.path.dirname(self._pg.custom_file),
            'stdout.tmp')  # File where syntaxnet output will be written
        """
        Builds and evaluates a network.
        """
        self.task_context = self._pg.task_context
        if self._pg.resource_dir:
            self.task_context = RewriteContext(self.task_context,
                                               self._pg.resource_dir)

        # Initiate custom tmp file
        with open(self._pg.custom_file, 'w') as f:
            pass
        self.fdescr_ = open(self._pg.custom_file, 'r')
        self.fdescr_.close()

        with tf.variable_scope(self._pg.variable_scope):
            feature_sizes, domain_sizes, embedding_dims, num_actions = self._sess.run(
                gen_parser_ops.feature_size(task_context=self.task_context,
                                            arg_prefix=self._pg.arg_prefix))

            if self._pg.graph_builder_ == 'greedy':
                self._parser = graph_builder.GreedyParser(
                    num_actions,
                    feature_sizes,
                    domain_sizes,
                    embedding_dims,
                    self._pg.hidden_layer_sizes,
                    gate_gradients=True,
                    arg_prefix=self._pg.arg_prefix)
            else:
                self._parser = structured_graph_builder.StructuredGraphBuilder(
                    num_actions,
                    feature_sizes,
                    domain_sizes,
                    embedding_dims,
                    self._pg.hidden_layer_sizes,
                    gate_gradients=True,
                    arg_prefix=self._pg.arg_prefix,
                    beam_size=self._pg.beam_size,
                    max_steps=self._pg.max_steps)
            self._parser.AddEvaluation(self.task_context,
                                       self._pg.batch_size,
                                       corpus_name=self._pg.input_,
                                       evaluation_max_steps=self._pg.max_steps)
            self._parser.AddSaver(self._pg.slim_model)
            self._sess.run(self._parser.inits.values())
            self._parser.saver.restore(self._sess, self._pg.model_path)
Esempio n. 8
0
 def __init__(self,
              task_context,
              arg_prefix,
              hidden_layer_sizes,
              model_dir,
              model_path,
              in_corpus_name,
              out_corpus_name,
              batch_size,
              max_steps,
              use_slim_model=True):
     self.model_dir = model_dir
     self.task_context, self.in_name = self.RewriteContext(
         task_context, in_corpus_name)
     self.arg_prefix = arg_prefix
     self.graph = tf.Graph()
     self.in_corpus_name = in_corpus_name
     self.out_corpus_name = out_corpus_name
     with self.graph.as_default():
         self.sess = tf.Session()
         feature_sizes, domain_sizes, embedding_dims, num_actions = self.sess.run(
             gen_parser_ops.feature_size(task_context=self.task_context,
                                         arg_prefix=self.arg_prefix))
     self.feature_sizes = feature_sizes
     self.domain_sizes = domain_sizes
     self.embedding_dims = embedding_dims
     self.num_actions = num_actions
     self.hidden_layer_sizes = map(int, hidden_layer_sizes.split(','))
     self.batch_size = batch_size
     self.max_steps = max_steps
     self.use_slim_model = use_slim_model
     with self.graph.as_default():
         self.parser = graph_builder.GreedyParser(
             self.num_actions,
             self.feature_sizes,
             self.domain_sizes,
             self.embedding_dims,
             self.hidden_layer_sizes,
             gate_gradients=True,
             arg_prefix=self.arg_prefix)
         self.parser.AddEvaluation(self.task_context,
                                   self.batch_size,
                                   corpus_name=self.in_corpus_name,
                                   evaluation_max_steps=self.max_steps)
         self.parser.AddSaver(self.use_slim_model)
         self.sess.run(self.parser.inits.values())
         self.parser.saver.restore(self.sess,
                                   os.path.join(self.model_dir, model_path))
         self.parser.AddEvaluation(self.task_context,
                                   self.batch_size,
                                   corpus_name=self.in_corpus_name,
                                   evaluation_max_steps=self.max_steps)
Esempio n. 9
0
    def setUp(self):
        # Creates a task context with the correct testing paths.
        initial_task_context = os.path.join(FLAGS.test_srcdir, "syntaxnet/" "testdata/context.pbtxt")
        self._task_context = os.path.join(FLAGS.test_tmpdir, "context.pbtxt")
        with open(initial_task_context, "r") as fin:
            with open(self._task_context, "w") as fout:
                fout.write(fin.read().replace("SRCDIR", FLAGS.test_srcdir).replace("OUTPATH", FLAGS.test_tmpdir))

        # Creates necessary term maps.
        with self.test_session() as sess:
            gen_parser_ops.lexicon_builder(task_context=self._task_context, corpus_name="training-corpus").run()
            self._num_features, self._num_feature_ids, _, self._num_actions = sess.run(
                gen_parser_ops.feature_size(task_context=self._task_context, arg_prefix="brain_parser")
            )
Esempio n. 10
0
  def setUp(self):
    # Creates a task context with the correct testing paths.
    initial_task_context = os.path.join(FLAGS.test_srcdir,
                                        'syntaxnet/'
                                        'testdata/context.pbtxt')
    self._task_context = os.path.join(FLAGS.test_tmpdir, 'context.pbtxt')
    with open(initial_task_context, 'r') as fin:
      with open(self._task_context, 'w') as fout:
        fout.write(fin.read().replace('SRCDIR', FLAGS.test_srcdir)
                   .replace('OUTPATH', FLAGS.test_tmpdir))

    # Creates necessary term maps.
    with self.test_session() as sess:
      gen_parser_ops.lexicon_builder(task_context=self._task_context,
                                     corpus_name='training-corpus').run()
      self._num_features, self._num_feature_ids, _, self._num_actions = (
          sess.run(gen_parser_ops.feature_size(task_context=self._task_context,
                                               arg_prefix='brain_parser')))
Esempio n. 11
0
  def setUp(self):
    # Creates a task context with the correct testing paths.
    initial_task_context = os.path.join(FLAGS.test_srcdir,
                                        'syntaxnet/'
                                        'testdata/context.pbtxt')
    self._task_context = os.path.join(FLAGS.test_tmpdir, 'context.pbtxt')
    with open(initial_task_context, 'r') as fin:
      with open(self._task_context, 'w') as fout:
        fout.write(fin.read().replace('SRCDIR', FLAGS.test_srcdir)
                   .replace('OUTPATH', FLAGS.test_tmpdir))

    # Creates necessary term maps.
    with self.test_session() as sess:
      gen_parser_ops.lexicon_builder(task_context=self._task_context,
                                     corpus_name='training-corpus').run()
      self._num_features, self._num_feature_ids, _, self._num_actions = (
          sess.run(gen_parser_ops.feature_size(task_context=self._task_context,
                                               arg_prefix='brain_parser')))
Esempio n. 12
0
def main(unused_argv):
  logging.set_verbosity(logging.INFO)
  if not gfile.IsDirectory(OutputPath('')):
    gfile.MakeDirs(OutputPath(''))

  # Rewrite context.
  RewriteContext()

  # Creates necessary term maps.
  if FLAGS.compute_lexicon:
    logging.info('Computing lexicon...')
    with tf.Session(FLAGS.tf_master) as sess:
      gen_parser_ops.lexicon_builder(task_context=OutputPath('context'),
                                     corpus_name=FLAGS.training_corpus).run()
  with tf.Session(FLAGS.tf_master) as sess:
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=OutputPath('context'),
                                    arg_prefix=FLAGS.arg_prefix))

  # Well formed and projectivize.
  if FLAGS.projectivize_training_set:
    logging.info('Preprocessing...')
    with tf.Session(FLAGS.tf_master) as sess:
      source, last = gen_parser_ops.document_source(
          task_context=OutputPath('context'),
          batch_size=FLAGS.batch_size,
          corpus_name=FLAGS.training_corpus)
      sink = gen_parser_ops.document_sink(
          task_context=OutputPath('context'),
          corpus_name='projectivized-training-corpus',
          documents=gen_parser_ops.projectivize_filter(
              gen_parser_ops.well_formed_filter(source,
                                                task_context=OutputPath(
                                                    'context')),
              task_context=OutputPath('context')))
      while True:
        tf_last, _ = sess.run([last, sink])
        if tf_last:
          break

  logging.info('Training...')
  with tf.Session(FLAGS.tf_master) as sess:
    Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
Esempio n. 13
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)
    if not gfile.IsDirectory(OutputPath('')):
        gfile.MakeDirs(OutputPath(''))

    # Rewrite context.
    RewriteContext()

    # Creates necessary term maps.
    if FLAGS.compute_lexicon:
        logging.info('Computing lexicon...')
        with tf.Session(FLAGS.tf_master) as sess:
            gen_parser_ops.lexicon_builder(
                task_context=OutputPath('context'),
                corpus_name=FLAGS.training_corpus).run()
    with tf.Session(FLAGS.tf_master) as sess:
        feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
            gen_parser_ops.feature_size(task_context=OutputPath('context'),
                                        arg_prefix=FLAGS.arg_prefix))

    # Well formed and projectivize.
    if FLAGS.projectivize_training_set:
        logging.info('Preprocessing...')
        with tf.Session(FLAGS.tf_master) as sess:
            source, last = gen_parser_ops.document_source(
                task_context=OutputPath('context'),
                batch_size=FLAGS.batch_size,
                corpus_name=FLAGS.training_corpus)
            sink = gen_parser_ops.document_sink(
                task_context=OutputPath('context'),
                corpus_name='projectivized-training-corpus',
                documents=gen_parser_ops.projectivize_filter(
                    gen_parser_ops.well_formed_filter(
                        source, task_context=OutputPath('context')),
                    task_context=OutputPath('context')))
            while True:
                tf_last, _ = sess.run([last, sink])
                if tf_last:
                    break

    logging.info('Training...')
    with tf.Session(FLAGS.tf_master) as sess:
        Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
Esempio n. 14
0
    def __init__(self, cfg):
        super(ProcessorSyntaxNet, self).__init__()

        self.parser_ = None
        self.task_context_ = RewriteContext(task_context_file)
        self.sess_ = tf.Session()
        self.cfg_ = cfg

        with open(self.cfg_.custom_file_path, 'w') as f:
            pass

        self.fdescr_ = open(self.cfg_.custom_file_path, 'r')

        hidden_layer_sizes = map(int, self.cfg_.hidden_layer_str.split(','))

        with tf.variable_scope(self.cfg_.variable_scope):
            feature_sizes, domain_sizes, embedding_dims, num_actions = self.sess_.run(
                gen_parser_ops.feature_size(task_context=self.task_context_,
                                            arg_prefix=self.cfg_.arg_prefix))

            self.parser_ = structured_graph_builder.StructuredGraphBuilder(
                num_actions,
                feature_sizes,
                domain_sizes,
                embedding_dims,
                hidden_layer_sizes,
                gate_gradients=True,
                arg_prefix=self.cfg_.arg_prefix,
                beam_size=self.cfg_.beam_size,
                max_steps=self.cfg_.max_steps)

            self.parser_.AddEvaluation(
                self.task_context_,
                self.cfg_.batch_size,
                corpus_name=self.cfg_.input_str,
                evaluation_max_steps=self.cfg_.max_steps)

            self.parser_.AddSaver(self.cfg_.slim_model)
            self.sess_.run(self.parser_.inits.values())
            self.parser_.saver.restore(self.sess_, self.cfg_.model_path)

            self.parse(self.cfg_.init_line)
Esempio n. 15
0
  def MakeGraph(self,
                max_steps=10,
                beam_size=2,
                batch_size=1,
                **kwargs):
    """Constructs a structured learning graph."""
    assert max_steps > 0, 'Empty network not supported.'

    logging.info('MakeGraph + %s', kwargs)

    with self.test_session(graph=tf.Graph()) as sess:
      feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
          gen_parser_ops.feature_size(task_context=self._task_context))
    embedding_dims = [8, 8, 8]
    hidden_layer_sizes = []
    learning_rate = 0.01
    builder = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        seed=1,
        max_steps=max_steps,
        beam_size=beam_size,
        gate_gradients=True,
        use_locking=True,
        use_averaging=False,
        check_parameters=False,
        **kwargs)
    builder.AddTraining(self._task_context,
                        batch_size,
                        learning_rate=learning_rate,
                        decay_steps=1000,
                        momentum=0.9,
                        corpus_name='training-corpus')
    builder.AddEvaluation(self._task_context,
                          batch_size,
                          evaluation_max_steps=25,
                          corpus_name=None)
    builder.training['inits'] = tf.group(*builder.inits.values(), name='inits')
    return builder
Esempio n. 16
0
def EvalForever(sess):
    """Builds and evaluates a network."""
    task_context = FLAGS.task_context
    if FLAGS.resource_dir:
        task_context = RewriteContext(task_context)
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=task_context,
                                    arg_prefix=FLAGS.arg_prefix))

    t = time.time()
    hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
    logging.info(
        'Building training network with parameters: feature_sizes: %s '
        'domain_sizes: %s', feature_sizes, domain_sizes)
    if FLAGS.graph_builder == 'greedy':
        parser = graph_builder.GreedyParser(num_actions,
                                            feature_sizes,
                                            domain_sizes,
                                            embedding_dims,
                                            hidden_layer_sizes,
                                            gate_gradients=True,
                                            arg_prefix=FLAGS.arg_prefix)
    else:
        parser = structured_graph_builder.StructuredGraphBuilder(
            num_actions,
            feature_sizes,
            domain_sizes,
            embedding_dims,
            hidden_layer_sizes,
            gate_gradients=True,
            arg_prefix=FLAGS.arg_prefix,
            beam_size=FLAGS.beam_size,
            max_steps=FLAGS.max_steps)
    while True:
        if not Eval(sess, parser, task_context):
            break
Esempio n. 17
0
        text_format.Merge(fin.read(), context)
    for resource in context.input:
        for part in resource.part:
            if part.file_pattern != '-':
                part.file_pattern = os.path.join(resource_dir,
                                                 part.file_pattern)
    with tempfile.NamedTemporaryFile(delete=False) as fout:
        fout.write(str(context))
        return fout.name


sess = tf.Session()

task_context = RewriteContext(context_path)
feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
    gen_parser_ops.feature_size(task_context=task_context,
                                arg_prefix=tagger_arg_prefix))
hidden_layer_sizes = map(int, tagger_hidden_layer_sizes.split(','))
tagger = structured_graph_builder.StructuredGraphBuilder(
    num_actions,
    feature_sizes,
    domain_sizes,
    embedding_dims,
    hidden_layer_sizes,
    gate_gradients=True,
    arg_prefix=tagger_arg_prefix,
    beam_size=beam_size,
    max_steps=max_steps)
tagger.AddEvaluation(task_context,
                     batch_size,
                     corpus_name=input_style,
                     evaluation_max_steps=max_steps)
Esempio n. 18
0
def Eval(sess):
    """Builds and evaluates a network."""
    task_context = FLAGS.task_context
    if FLAGS.resource_dir:
        task_context = RewriteContext(task_context)
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=task_context,
                                    arg_prefix=FLAGS.arg_prefix))

    t = time.time()
    hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
    LOGGING.info(
        'Building training network with parameters: feature_sizes: %s '
        'domain_sizes: %s', feature_sizes, domain_sizes)
    if FLAGS.graph_builder == 'greedy':
        parser = graph_builder.GreedyParser(num_actions,
                                            feature_sizes,
                                            domain_sizes,
                                            embedding_dims,
                                            hidden_layer_sizes,
                                            gate_gradients=True,
                                            arg_prefix=FLAGS.arg_prefix)
    else:
        parser = structured_graph_builder.StructuredGraphBuilder(
            num_actions,
            feature_sizes,
            domain_sizes,
            embedding_dims,
            hidden_layer_sizes,
            gate_gradients=True,
            arg_prefix=FLAGS.arg_prefix,
            beam_size=FLAGS.beam_size,
            max_steps=FLAGS.max_steps)
    parser.AddEvaluation(task_context,
                         FLAGS.batch_size,
                         corpus_name=FLAGS.input,
                         evaluation_max_steps=FLAGS.max_steps)

    parser.AddSaver(FLAGS.slim_model)
    sess.run(parser.inits.values())
    parser.saver.restore(sess, FLAGS.model_path)

    sink_documents = tf.placeholder(tf.string)
    sink = gen_parser_ops.document_sink(sink_documents,
                                        task_context=task_context,
                                        corpus_name=FLAGS.output)
    t = time.time()
    num_epochs = None
    num_tokens = 0
    num_correct = 0
    num_documents = 0
    while True:
        tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
            parser.evaluation['epochs'],
            parser.evaluation['eval_metrics'],
            parser.evaluation['documents'],
        ])

        if len(tf_documents):
            LOGGING.info('Processed %d documents', len(tf_documents))
            num_documents += len(tf_documents)
            sess.run(sink, feed_dict={sink_documents: tf_documents})

        num_tokens += tf_eval_metrics[0]
        num_correct += tf_eval_metrics[1]
        if num_epochs is None:
            num_epochs = tf_eval_epochs
        elif num_epochs < tf_eval_epochs:
            break

    LOGGING.info('Total processed documents: %d', num_documents)
    if num_tokens > 0:
        eval_metric = 100.0 * num_correct / num_tokens
        LOGGING.info('num correct tokens: %d', num_correct)
        LOGGING.info('total tokens: %d', num_tokens)
        LOGGING.info(
            'Seconds elapsed in evaluation: %.2f, '
            'eval metric: %.2f%%',
            time.time() - t, eval_metric)
Esempio n. 19
0
def Eval(sess):
  """Builds and evaluates a network."""
  task_context = FLAGS.task_context
  if FLAGS.resource_dir:
    task_context = RewriteContext(task_context)
  feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
      gen_parser_ops.feature_size(task_context=task_context,
                                  arg_prefix=FLAGS.arg_prefix))

  t = time.time()
  hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(','))
  logging.info('Building training network with parameters: feature_sizes: %s '
               'domain_sizes: %s', feature_sizes, domain_sizes)
  if FLAGS.graph_builder == 'greedy':
    parser = graph_builder.GreedyParser(num_actions,
                                        feature_sizes,
                                        domain_sizes,
                                        embedding_dims,
                                        hidden_layer_sizes,
                                        gate_gradients=True,
                                        arg_prefix=FLAGS.arg_prefix)
  else:
    parser = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        gate_gradients=True,
        arg_prefix=FLAGS.arg_prefix,
        beam_size=FLAGS.beam_size,
        max_steps=FLAGS.max_steps)
  parser.AddEvaluation(task_context,
                       FLAGS.batch_size,
                       corpus_name=FLAGS.input,
                       evaluation_max_steps=FLAGS.max_steps)

  parser.AddSaver(FLAGS.slim_model)
  sess.run(parser.inits.values())
  parser.saver.restore(sess, FLAGS.model_path)

  sink_documents = tf.placeholder(tf.string)
  sink = gen_parser_ops.document_sink(sink_documents,
                                      task_context=task_context,
                                      corpus_name=FLAGS.output)
  t = time.time()
  num_epochs = None
  num_tokens = 0
  num_correct = 0
  num_documents = 0
  while True:
    tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
        parser.evaluation['epochs'],
        parser.evaluation['eval_metrics'],
        parser.evaluation['documents'],
    ])

    if len(tf_documents):
      logging.info('Processed %d documents', len(tf_documents))
      num_documents += len(tf_documents)
      sess.run(sink, feed_dict={sink_documents: tf_documents})

    num_tokens += tf_eval_metrics[0]
    num_correct += tf_eval_metrics[1]
    if num_epochs is None:
      num_epochs = tf_eval_epochs
    elif num_epochs < tf_eval_epochs:
      break

  logging.info('Total processed documents: %d', num_documents)
  if num_tokens > 0:
    eval_metric = 100.0 * num_correct / num_tokens
    logging.info('num correct tokens: %d', num_correct)
    logging.info('total tokens: %d', num_tokens)
    logging.info('Seconds elapsed in evaluation: %.2f, '
                 'eval metric: %.2f%%', time.time() - t, eval_metric)
Esempio n. 20
0
def GetFeatureSize(task_context, arg_prefix):
    with tf.variable_scope("fs_" + arg_prefix):
        with tf.Session() as sess:
            return sess.run(
                gen_parser_ops.feature_size(task_context=task_context,
                                            arg_prefix=arg_prefix))
Esempio n. 21
0
def _perform_action(action=None):
    arg_prefix = action
    task_context = task_context_path

    if action == "brain_tagger":
        hidden_layer_sizes = [64]
        model_path = tagger_params_path
        output = 'output-to-file'
        input = 'input-from-file'
    elif action == "brain_parser":
        hidden_layer_sizes = [512, 512]
        model_path = parser_params_path
        output = 'output-to-file-conll'
        input = 'input-from-file-conll'
    else:
        raise Exception("Do not recognize action %s" % action)

    with tf.Session() as sess:
        feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
            gen_parser_ops.feature_size(task_context=task_context,
                                        arg_prefix=arg_prefix))

    beam_size = 8
    max_steps = 1000
    batch_size = 1024
    slim_model = True

    parser = structured_graph_builder.StructuredGraphBuilder(
        num_actions,
        feature_sizes,
        domain_sizes,
        embedding_dims,
        hidden_layer_sizes,
        gate_gradients=True,
        arg_prefix=arg_prefix,
        beam_size=beam_size,
        max_steps=max_steps)

    parser.AddEvaluation(task_context,
                         batch_size,
                         corpus_name=input,
                         evaluation_max_steps=max_steps)

    with tf.Session() as sess:

        parser.AddSaver(slim_model)
        sess.run(parser.inits.values())
        parser.saver.restore(sess, model_path)

        sink_documents = tf.placeholder(tf.string)
        sink = gen_parser_ops.document_sink(sink_documents,
                                            task_context=task_context,
                                            corpus_name=output)
        t = time.time()
        num_epochs = None
        num_tokens = 0
        num_correct = 0
        num_documents = 0
        while True:
            tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([
                parser.evaluation['epochs'],
                parser.evaluation['eval_metrics'],
                parser.evaluation['documents'],
            ])

            if len(tf_documents):
                logging.info('Processed %d documents', len(tf_documents))
                num_documents += len(tf_documents)
                sess.run(sink, feed_dict={sink_documents: tf_documents})

            num_tokens += tf_eval_metrics[0]
            num_correct += tf_eval_metrics[1]
            if num_epochs is None:
                num_epochs = tf_eval_epochs
            elif num_epochs < tf_eval_epochs:
                break

        logging.info('Total processed documents: %d', num_documents)
        if num_tokens > 0:
            eval_metric = 100.0 * num_correct / num_tokens
            logging.info('num correct tokens: %d', num_correct)
            logging.info('total tokens: %d', num_tokens)
            logging.info(
                'Seconds elapsed in evaluation: %.2f, '
                'eval metric: %.2f%%',
                time.time() - t, eval_metric)