def setUp(self): # Creates a task context with the correct testing paths. initial_task_context = os.path.join(FLAGS.test_srcdir, "syntaxnet/" "testdata/context.pbtxt") self._task_context = os.path.join(FLAGS.test_tmpdir, "context.pbtxt") with open(initial_task_context, "r") as fin: with open(self._task_context, "w") as fout: fout.write(fin.read().replace("SRCDIR", FLAGS.test_srcdir).replace("OUTPATH", FLAGS.test_tmpdir)) # Creates necessary term maps. with self.test_session() as sess: gen_parser_ops.lexicon_builder(task_context=self._task_context, corpus_name="training-corpus").run() self._num_features, self._num_feature_ids, _, self._num_actions = sess.run( gen_parser_ops.feature_size(task_context=self._task_context, arg_prefix="brain_parser") )
def setUp(self): # Creates a task context with the correct testing paths. initial_task_context = os.path.join(FLAGS.test_srcdir, 'syntaxnet/' 'testdata/context.pbtxt') self._task_context = os.path.join(FLAGS.test_tmpdir, 'context.pbtxt') with open(initial_task_context, 'r') as fin: with open(self._task_context, 'w') as fout: fout.write(fin.read().replace('SRCDIR', FLAGS.test_srcdir) .replace('OUTPATH', FLAGS.test_tmpdir)) # Creates necessary term maps. with self.test_session() as sess: gen_parser_ops.lexicon_builder(task_context=self._task_context, corpus_name='training-corpus').run() self._num_features, self._num_feature_ids, _, self._num_actions = ( sess.run(gen_parser_ops.feature_size(task_context=self._task_context, arg_prefix='brain_parser')))
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) # Rewrite context. RewriteContext() # Creates necessary term maps. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') with tf.Session(FLAGS.tf_master) as sess: gen_parser_ops.lexicon_builder(task_context=OutputPath('context'), corpus_name=FLAGS.training_corpus).run() with tf.Session(FLAGS.tf_master) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=OutputPath('context'), arg_prefix=FLAGS.arg_prefix)) # Well formed and projectivize. if FLAGS.projectivize_training_set: logging.info('Preprocessing...') with tf.Session(FLAGS.tf_master) as sess: source, last = gen_parser_ops.document_source( task_context=OutputPath('context'), batch_size=FLAGS.batch_size, corpus_name=FLAGS.training_corpus) sink = gen_parser_ops.document_sink( task_context=OutputPath('context'), corpus_name='projectivized-training-corpus', documents=gen_parser_ops.projectivize_filter( gen_parser_ops.well_formed_filter(source, task_context=OutputPath( 'context')), task_context=OutputPath('context'))) while True: tf_last, _ = sess.run([last, sink]) if tf_last: break logging.info('Training...') with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) # Rewrite context. RewriteContext() # Creates necessary term maps. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') with tf.Session(FLAGS.tf_master) as sess: gen_parser_ops.lexicon_builder( task_context=OutputPath('context'), corpus_name=FLAGS.training_corpus).run() with tf.Session(FLAGS.tf_master) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=OutputPath('context'), arg_prefix=FLAGS.arg_prefix)) # Well formed and projectivize. if FLAGS.projectivize_training_set: logging.info('Preprocessing...') with tf.Session(FLAGS.tf_master) as sess: source, last = gen_parser_ops.document_source( task_context=OutputPath('context'), batch_size=FLAGS.batch_size, corpus_name=FLAGS.training_corpus) sink = gen_parser_ops.document_sink( task_context=OutputPath('context'), corpus_name='projectivized-training-corpus', documents=gen_parser_ops.projectivize_filter( gen_parser_ops.well_formed_filter( source, task_context=OutputPath('context')), task_context=OutputPath('context'))) while True: tf_last, _ = sess.run([last, sink]) if tf_last: break logging.info('Training...') with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
def build_lexicon(output_path, training_corpus_path, tf_master='', training_corpus_format='conll-sentence', morph_to_pos=False, **kwargs): """Constructs a SyntaxNet lexicon at the given path. Args: output_path: Location to construct the lexicon. training_corpus_path: Path to CONLL formatted training data. tf_master: TensorFlow master executor (string, defaults to '' to use the local instance). training_corpus_format: Format of the training corpus (defaults to CONLL; search for REGISTER_SYNTAXNET_DOCUMENT_FORMAT for other formats). morph_to_pos: Whether to serialize morph attributes to the tag field, combined with category and fine POS tag. **kwargs: Forwarded to the LexiconBuilder op. """ context = create_lexicon_context(output_path) if morph_to_pos: context.parameter.add(name='join_category_to_pos', value='true') context.parameter.add(name='add_pos_as_attribute', value='true') context.parameter.add(name='serialize_morph_to_pos', value='true') # Add the training data to the context. resource = context.input.add() resource.name = 'corpus' resource.record_format.extend([training_corpus_format]) part = resource.part.add() part.file_pattern = training_corpus_path # Run the lexicon builder op. with tf.Session(tf_master) as sess: sess.run( gen_parser_ops.lexicon_builder(task_context_str=str(context), corpus_name='corpus', **kwargs))
def build_lexicon(output_path, training_corpus_path, tf_master='', training_corpus_format='conll-sentence', morph_to_pos=False, **kwargs): """Constructs a SyntaxNet lexicon at the given path. Args: output_path: Location to construct the lexicon. training_corpus_path: Path to CONLL formatted training data. tf_master: TensorFlow master executor (string, defaults to '' to use the local instance). training_corpus_format: Format of the training corpus (defaults to CONLL; search for REGISTER_SYNTAXNET_DOCUMENT_FORMAT for other formats). morph_to_pos: Whether to serialize morph attributes to the tag field, combined with category and fine POS tag. **kwargs: Forwarded to the LexiconBuilder op. """ context = create_lexicon_context(output_path) if morph_to_pos: context.parameter.add(name='join_category_to_pos', value='true') context.parameter.add(name='add_pos_as_attribute', value='true') context.parameter.add(name='serialize_morph_to_pos', value='true') # Add the training data to the context. resource = context.input.add() resource.name = 'corpus' resource.record_format.extend([training_corpus_format]) part = resource.part.add() part.file_pattern = training_corpus_path # Run the lexicon builder op. with tf.Session(tf_master) as sess: sess.run( gen_parser_ops.lexicon_builder( task_context_str=str(context), corpus_name='corpus', **kwargs))
def BuildLexicon(self): with self.test_session(): gen_parser_ops.lexicon_builder( task_context=self.context_file, lexicon_max_char_ngram_length=2, lexicon_char_ngram_mark_boundaries=True).run()
def BuildLexicon(self): with self.test_session(): gen_parser_ops.lexicon_builder(task_context=self.context_file).run()
def BuildLexicon(self): with self.test_session(): gen_parser_ops.lexicon_builder( task_context=self.context_file).run()