예제 #1
0
    def setUp(self):
        # Creates a task context with the correct testing paths.
        initial_task_context = os.path.join(FLAGS.test_srcdir, "syntaxnet/" "testdata/context.pbtxt")
        self._task_context = os.path.join(FLAGS.test_tmpdir, "context.pbtxt")
        with open(initial_task_context, "r") as fin:
            with open(self._task_context, "w") as fout:
                fout.write(fin.read().replace("SRCDIR", FLAGS.test_srcdir).replace("OUTPATH", FLAGS.test_tmpdir))

        # Creates necessary term maps.
        with self.test_session() as sess:
            gen_parser_ops.lexicon_builder(task_context=self._task_context, corpus_name="training-corpus").run()
            self._num_features, self._num_feature_ids, _, self._num_actions = sess.run(
                gen_parser_ops.feature_size(task_context=self._task_context, arg_prefix="brain_parser")
            )
예제 #2
0
  def setUp(self):
    # Creates a task context with the correct testing paths.
    initial_task_context = os.path.join(FLAGS.test_srcdir,
                                        'syntaxnet/'
                                        'testdata/context.pbtxt')
    self._task_context = os.path.join(FLAGS.test_tmpdir, 'context.pbtxt')
    with open(initial_task_context, 'r') as fin:
      with open(self._task_context, 'w') as fout:
        fout.write(fin.read().replace('SRCDIR', FLAGS.test_srcdir)
                   .replace('OUTPATH', FLAGS.test_tmpdir))

    # Creates necessary term maps.
    with self.test_session() as sess:
      gen_parser_ops.lexicon_builder(task_context=self._task_context,
                                     corpus_name='training-corpus').run()
      self._num_features, self._num_feature_ids, _, self._num_actions = (
          sess.run(gen_parser_ops.feature_size(task_context=self._task_context,
                                               arg_prefix='brain_parser')))
예제 #3
0
  def setUp(self):
    # Creates a task context with the correct testing paths.
    initial_task_context = os.path.join(FLAGS.test_srcdir,
                                        'syntaxnet/'
                                        'testdata/context.pbtxt')
    self._task_context = os.path.join(FLAGS.test_tmpdir, 'context.pbtxt')
    with open(initial_task_context, 'r') as fin:
      with open(self._task_context, 'w') as fout:
        fout.write(fin.read().replace('SRCDIR', FLAGS.test_srcdir)
                   .replace('OUTPATH', FLAGS.test_tmpdir))

    # Creates necessary term maps.
    with self.test_session() as sess:
      gen_parser_ops.lexicon_builder(task_context=self._task_context,
                                     corpus_name='training-corpus').run()
      self._num_features, self._num_feature_ids, _, self._num_actions = (
          sess.run(gen_parser_ops.feature_size(task_context=self._task_context,
                                               arg_prefix='brain_parser')))
예제 #4
0
def main(unused_argv):
  logging.set_verbosity(logging.INFO)
  if not gfile.IsDirectory(OutputPath('')):
    gfile.MakeDirs(OutputPath(''))

  # Rewrite context.
  RewriteContext()

  # Creates necessary term maps.
  if FLAGS.compute_lexicon:
    logging.info('Computing lexicon...')
    with tf.Session(FLAGS.tf_master) as sess:
      gen_parser_ops.lexicon_builder(task_context=OutputPath('context'),
                                     corpus_name=FLAGS.training_corpus).run()
  with tf.Session(FLAGS.tf_master) as sess:
    feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
        gen_parser_ops.feature_size(task_context=OutputPath('context'),
                                    arg_prefix=FLAGS.arg_prefix))

  # Well formed and projectivize.
  if FLAGS.projectivize_training_set:
    logging.info('Preprocessing...')
    with tf.Session(FLAGS.tf_master) as sess:
      source, last = gen_parser_ops.document_source(
          task_context=OutputPath('context'),
          batch_size=FLAGS.batch_size,
          corpus_name=FLAGS.training_corpus)
      sink = gen_parser_ops.document_sink(
          task_context=OutputPath('context'),
          corpus_name='projectivized-training-corpus',
          documents=gen_parser_ops.projectivize_filter(
              gen_parser_ops.well_formed_filter(source,
                                                task_context=OutputPath(
                                                    'context')),
              task_context=OutputPath('context')))
      while True:
        tf_last, _ = sess.run([last, sink])
        if tf_last:
          break

  logging.info('Training...')
  with tf.Session(FLAGS.tf_master) as sess:
    Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
예제 #5
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)
    if not gfile.IsDirectory(OutputPath('')):
        gfile.MakeDirs(OutputPath(''))

    # Rewrite context.
    RewriteContext()

    # Creates necessary term maps.
    if FLAGS.compute_lexicon:
        logging.info('Computing lexicon...')
        with tf.Session(FLAGS.tf_master) as sess:
            gen_parser_ops.lexicon_builder(
                task_context=OutputPath('context'),
                corpus_name=FLAGS.training_corpus).run()
    with tf.Session(FLAGS.tf_master) as sess:
        feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run(
            gen_parser_ops.feature_size(task_context=OutputPath('context'),
                                        arg_prefix=FLAGS.arg_prefix))

    # Well formed and projectivize.
    if FLAGS.projectivize_training_set:
        logging.info('Preprocessing...')
        with tf.Session(FLAGS.tf_master) as sess:
            source, last = gen_parser_ops.document_source(
                task_context=OutputPath('context'),
                batch_size=FLAGS.batch_size,
                corpus_name=FLAGS.training_corpus)
            sink = gen_parser_ops.document_sink(
                task_context=OutputPath('context'),
                corpus_name='projectivized-training-corpus',
                documents=gen_parser_ops.projectivize_filter(
                    gen_parser_ops.well_formed_filter(
                        source, task_context=OutputPath('context')),
                    task_context=OutputPath('context')))
            while True:
                tf_last, _ = sess.run([last, sink])
                if tf_last:
                    break

    logging.info('Training...')
    with tf.Session(FLAGS.tf_master) as sess:
        Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
예제 #6
0
def build_lexicon(output_path,
                  training_corpus_path,
                  tf_master='',
                  training_corpus_format='conll-sentence',
                  morph_to_pos=False,
                  **kwargs):
    """Constructs a SyntaxNet lexicon at the given path.

  Args:
    output_path: Location to construct the lexicon.
    training_corpus_path: Path to CONLL formatted training data.
    tf_master: TensorFlow master executor (string, defaults to '' to use the
      local instance).
    training_corpus_format: Format of the training corpus (defaults to CONLL;
      search for REGISTER_SYNTAXNET_DOCUMENT_FORMAT for other formats).
    morph_to_pos: Whether to serialize morph attributes to the tag field,
      combined with category and fine POS tag.
    **kwargs: Forwarded to the LexiconBuilder op.
  """
    context = create_lexicon_context(output_path)
    if morph_to_pos:
        context.parameter.add(name='join_category_to_pos', value='true')
        context.parameter.add(name='add_pos_as_attribute', value='true')
        context.parameter.add(name='serialize_morph_to_pos', value='true')

    # Add the training data to the context.
    resource = context.input.add()
    resource.name = 'corpus'
    resource.record_format.extend([training_corpus_format])
    part = resource.part.add()
    part.file_pattern = training_corpus_path

    # Run the lexicon builder op.
    with tf.Session(tf_master) as sess:
        sess.run(
            gen_parser_ops.lexicon_builder(task_context_str=str(context),
                                           corpus_name='corpus',
                                           **kwargs))
예제 #7
0
파일: lexicon.py 프로젝트: ALISCIFP/models
def build_lexicon(output_path,
                  training_corpus_path,
                  tf_master='',
                  training_corpus_format='conll-sentence',
                  morph_to_pos=False,
                  **kwargs):
  """Constructs a SyntaxNet lexicon at the given path.

  Args:
    output_path: Location to construct the lexicon.
    training_corpus_path: Path to CONLL formatted training data.
    tf_master: TensorFlow master executor (string, defaults to '' to use the
      local instance).
    training_corpus_format: Format of the training corpus (defaults to CONLL;
      search for REGISTER_SYNTAXNET_DOCUMENT_FORMAT for other formats).
    morph_to_pos: Whether to serialize morph attributes to the tag field,
      combined with category and fine POS tag.
    **kwargs: Forwarded to the LexiconBuilder op.
  """
  context = create_lexicon_context(output_path)
  if morph_to_pos:
    context.parameter.add(name='join_category_to_pos', value='true')
    context.parameter.add(name='add_pos_as_attribute', value='true')
    context.parameter.add(name='serialize_morph_to_pos', value='true')

  # Add the training data to the context.
  resource = context.input.add()
  resource.name = 'corpus'
  resource.record_format.extend([training_corpus_format])
  part = resource.part.add()
  part.file_pattern = training_corpus_path

  # Run the lexicon builder op.
  with tf.Session(tf_master) as sess:
    sess.run(
        gen_parser_ops.lexicon_builder(
            task_context_str=str(context), corpus_name='corpus', **kwargs))
예제 #8
0
 def BuildLexicon(self):
   with self.test_session():
     gen_parser_ops.lexicon_builder(
         task_context=self.context_file,
         lexicon_max_char_ngram_length=2,
         lexicon_char_ngram_mark_boundaries=True).run()
 def BuildLexicon(self):
     with self.test_session():
         gen_parser_ops.lexicon_builder(
             task_context=self.context_file,
             lexicon_max_char_ngram_length=2,
             lexicon_char_ngram_mark_boundaries=True).run()
예제 #10
0
 def BuildLexicon(self):
   with self.test_session():
     gen_parser_ops.lexicon_builder(task_context=self.context_file).run()
 def BuildLexicon(self):
     with self.test_session():
         gen_parser_ops.lexicon_builder(
             task_context=self.context_file).run()