def __init__(self, action=None): self._sess = tf.Session() self._variable_scope = action.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) arg_prefix = action task_context = task_context_path print("_init: 0") if action == "brain_tagger": hidden_layer_sizes = [64] model_path = tagger_params_path output = 'output-to-file' input = 'input-from-file' elif action == "brain_parser": hidden_layer_sizes = [512, 512] model_path = parser_params_path output = 'output-to-file-conll' input = 'input-from-file-conll' else: raise Exception("Do not recognize action %s" % action) print("_init: 1") with tf.variable_scope(self._variable_scope): feature_sizes, domain_sizes, embedding_dims, num_actions = self._sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=arg_prefix)) print("_init: 2") beam_size = 8 max_steps = 1000 batch_size = 1024 slim_model = True self._parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=arg_prefix, beam_size=beam_size, max_steps=max_steps) print("_init: 3") self._parser.AddEvaluation(task_context, batch_size, corpus_name=input, evaluation_max_steps=max_steps) print("_init: 4") # with tf.Session() as sess: self._sess.run(self._parser.inits.values()) self._parser.AddSaver(slim_model) self._parser.saver.restore(self._sess, model_path) self._task_context = task_context self._output = 'stdout-conll' #output print("_init: Done")
def Parse(sess, text): """Parses the text""" task_context = TASK_CONTEXT feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix="brain_parser")) t = time.time() hidden_layer_sizes = [512, 512] parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix="brain_parser", beam_size=8, max_steps=1000) parser.AddEvaluation(task_context, 1024, corpus_name="direct-conll", value=text, evaluation_max_steps=1000) parser.AddSaver(True) sess.run(parser.inits.values()) parser.saver.restore(sess, MODEL_BASE + "parser-params") sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.variable_sink(sink_documents, corpus_name="stdout-conll", task_context=task_context) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) logging.info("TF DOCUMENTS: %s" % tf_documents) if len(tf_documents): num_documents += len(tf_documents) result = sess.run(sink, feed_dict={sink_documents: tf_documents}) return result num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break
def Eval(sess): logging.info('***************%s', FLAGS.arg_prefix) """Builds and evaluates a network.""" task_context = FLAGS.task_context task_context = RewriteContext(task_context) logging.info(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context[0], arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info('Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) for c in task_context: parser.AddEvaluation(c, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=c, corpus_name=FLAGS.output) run_parser(sess, parser, sink, sink_documents)
def __init__(self, processconfig): self._sess = tf.Session() self._pg = processconfig self.stdout_file_path = os.path.join( os.path.dirname(self._pg.custom_file), 'stdout.tmp') # File where syntaxnet output will be written """ Builds and evaluates a network. """ self.task_context = self._pg.task_context if self._pg.resource_dir: self.task_context = RewriteContext(self.task_context, self._pg.resource_dir) # Initiate custom tmp file with open(self._pg.custom_file, 'w') as f: pass self.fdescr_ = open(self._pg.custom_file, 'r') self.fdescr_.close() with tf.variable_scope(self._pg.variable_scope): feature_sizes, domain_sizes, embedding_dims, num_actions = self._sess.run( gen_parser_ops.feature_size(task_context=self.task_context, arg_prefix=self._pg.arg_prefix)) if self._pg.graph_builder_ == 'greedy': self._parser = graph_builder.GreedyParser( num_actions, feature_sizes, domain_sizes, embedding_dims, self._pg.hidden_layer_sizes, gate_gradients=True, arg_prefix=self._pg.arg_prefix) else: self._parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, self._pg.hidden_layer_sizes, gate_gradients=True, arg_prefix=self._pg.arg_prefix, beam_size=self._pg.beam_size, max_steps=self._pg.max_steps) self._parser.AddEvaluation(self.task_context, self._pg.batch_size, corpus_name=self._pg.input_, evaluation_max_steps=self._pg.max_steps) self._parser.AddSaver(self._pg.slim_model) self._sess.run(self._parser.inits.values()) self._parser.saver.restore(self._sess, self._pg.model_path)
def MakeGraph(self, max_steps=10, beam_size=2, batch_size=1, **kwargs): """Constructs a structured learning graph.""" assert max_steps > 0, 'Empty network not supported.' logging.info('MakeGraph + %s', kwargs) with self.test_session(graph=tf.Graph()) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=self._task_context)) embedding_dims = [8, 8, 8] hidden_layer_sizes = [] learning_rate = 0.01 builder = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, seed=1, max_steps=max_steps, beam_size=beam_size, gate_gradients=True, use_locking=True, use_averaging=False, check_parameters=False, **kwargs) builder.AddTraining(self._task_context, batch_size, learning_rate=learning_rate, decay_steps=1000, momentum=0.9, corpus_name='training-corpus') builder.AddEvaluation(self._task_context, batch_size, evaluation_max_steps=25, corpus_name=None) builder.training['inits'] = tf.group(*builder.inits.values(), name='inits') return builder
def __init__(self, cfg): super(ProcessorSyntaxNet, self).__init__() self.parser_ = None self.task_context_ = RewriteContext(task_context_file) self.sess_ = tf.Session() self.cfg_ = cfg with open(self.cfg_.custom_file_path, 'w') as f: pass self.fdescr_ = open(self.cfg_.custom_file_path, 'r') hidden_layer_sizes = map(int, self.cfg_.hidden_layer_str.split(',')) with tf.variable_scope(self.cfg_.variable_scope): feature_sizes, domain_sizes, embedding_dims, num_actions = self.sess_.run( gen_parser_ops.feature_size(task_context=self.task_context_, arg_prefix=self.cfg_.arg_prefix)) self.parser_ = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=self.cfg_.arg_prefix, beam_size=self.cfg_.beam_size, max_steps=self.cfg_.max_steps) self.parser_.AddEvaluation( self.task_context_, self.cfg_.batch_size, corpus_name=self.cfg_.input_str, evaluation_max_steps=self.cfg_.max_steps) self.parser_.AddSaver(self.cfg_.slim_model) self.sess_.run(self.parser_.inits.values()) self.parser_.saver.restore(self.sess_, self.cfg_.model_path) self.parse(self.cfg_.init_line)
def EvalForever(sess, num_actions, feature_sizes, domain_sizes, embedding_dims): """Builds and evaluates a network. Args: sess: tensorflow session to use num_actions: number of possible golden actions feature_sizes: size of each feature vector domain_sizes: number of possible feature ids in each feature vector embedding_dims: embedding dimension for each feature group """ t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info('Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) task_context = FLAGS.task_context while True: if not Eval(sess, parser, task_context): break
def EvalForever(sess): """Builds and evaluates a network.""" task_context = FLAGS.task_context if FLAGS.resource_dir: task_context = RewriteContext(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info( 'Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) while True: if not Eval(sess, parser, task_context): break
fout.write(str(context)) return fout.name sess = tf.Session() task_context = RewriteContext(context_path) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=tagger_arg_prefix)) hidden_layer_sizes = map(int, tagger_hidden_layer_sizes.split(',')) tagger = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=tagger_arg_prefix, beam_size=beam_size, max_steps=max_steps) tagger.AddEvaluation(task_context, batch_size, corpus_name=input_style, evaluation_max_steps=max_steps) tagger.AddSaver(slim_model) sess.run(tagger.inits.values()) tagger.saver.restore(sess, tagger_model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents,
def Eval(sess): """Builds and evaluates a network.""" task_context = FLAGS.task_context if FLAGS.resource_dir: task_context = RewriteContext(task_context) feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=FLAGS.arg_prefix)) t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) LOGGING.info( 'Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.input, evaluation_max_steps=FLAGS.max_steps) parser.AddSaver(FLAGS.slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, FLAGS.model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name=FLAGS.output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): LOGGING.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break LOGGING.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens LOGGING.info('num correct tokens: %d', num_correct) LOGGING.info('total tokens: %d', num_tokens) LOGGING.info( 'Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)
def Build(sess, document_source, FLAGS): """Builds a sub-network, which will be either the tagger or the parser Args: sess: tensorflow session to use document_source: the input of serialized document objects to process Flags: (taken from FLAGS argument) num_actions: number of possible golden actions feature_sizes: size of each feature vector domain_sizes: number of possible feature ids in each feature vector embedding_dims: embedding dimension for each feature group hidden_layer_sizes: Comma separated list of hidden layer sizes. arg_prefix: Prefix for context parameters. beam_size: Number of slots for beam parsing. max_steps: Max number of steps to take. task_context: Path to a task context with inputs and parameters for feature extractors. input: Name of the context input to read data from. graph_builder: 'greedy' or 'structured' batch_size: Number of sentences to process in parallel. slim_model: Whether to expect only averaged variables. model_path: Path to model parameters. Return: returns the tensor which will contain the serialized document objects. """ task_context = FLAGS["task_context"] arg_prefix = FLAGS["arg_prefix"] num_actions = FLAGS["num_actions"] feature_sizes = FLAGS["feature_sizes"] domain_sizes = FLAGS["domain_sizes"] embedding_dims = FLAGS["embedding_dims"] hidden_layer_sizes = map(int, FLAGS["hidden_layer_sizes"].split(',')) beam_size = FLAGS["beam_size"] max_steps = FLAGS["max_steps"] batch_size = FLAGS["batch_size"] corpus_name = FLAGS["input"] slim_model = FLAGS["slim_model"] model_path = FLAGS["model_path"] parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=arg_prefix, beam_size=beam_size, max_steps=max_steps) parser.AddEvaluation(task_context, batch_size, corpus_name=corpus_name, evaluation_max_steps=max_steps, document_source=document_source) parser.AddSaver(slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, model_path) return parser.evaluation['documents']
def Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims): """Builds and trains the network. Args: sess: tensorflow session to use. num_actions: number of possible golden actions. feature_sizes: size of each feature vector. domain_sizes: number of possible feature ids in each feature vector. embedding_dims: embedding dimension to use for each feature group. """ t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info( 'Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) if FLAGS.graph_builder == 'greedy': parser = graph_builder.GreedyParser( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, seed=int(FLAGS.seed), gate_gradients=True, averaging_decay=FLAGS.averaging_decay, arg_prefix=FLAGS.arg_prefix) else: parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, seed=int(FLAGS.seed), gate_gradients=True, averaging_decay=FLAGS.averaging_decay, arg_prefix=FLAGS.arg_prefix, beam_size=FLAGS.beam_size, max_steps=FLAGS.max_steps) task_context = OutputPath('context') if FLAGS.word_embeddings is not None: parser.AddPretrainedEmbeddings(0, FLAGS.word_embeddings, task_context) corpus_name = ('projectivized-training-corpus' if FLAGS.projectivize_training_set else FLAGS.training_corpus) parser.AddTraining(task_context, FLAGS.batch_size, learning_rate=FLAGS.learning_rate, momentum=FLAGS.momentum, decay_steps=FLAGS.decay_steps, corpus_name=corpus_name) parser.AddEvaluation(task_context, FLAGS.batch_size, corpus_name=FLAGS.tuning_corpus) parser.AddSaver(FLAGS.slim_model) # Save graph. if FLAGS.output_path: with gfile.FastGFile(OutputPath('graph'), 'w') as f: f.write(sess.graph_def.SerializeToString()) logging.info('Initializing...') num_epochs = 0 cost_sum = 0.0 num_steps = 0 best_eval_metric = 0.0 sess.run(parser.inits.values()) if FLAGS.pretrained_params is not None: logging.info('Loading pretrained params from %s', FLAGS.pretrained_params) feed_dict = {'save/Const:0': FLAGS.pretrained_params} targets = [] for node in sess.graph_def.node: if (node.name.startswith('save/Assign') and node.input[0] in FLAGS.pretrained_params_names.split(',')): logging.info('Loading %s with op %s', node.input[0], node.name) targets.append(node.name) sess.run(targets, feed_dict=feed_dict) logging.info('Training...') while num_epochs < FLAGS.num_epochs: tf_epochs, tf_cost, _ = sess.run([ parser.training['epochs'], parser.training['cost'], parser.training['train_op'] ]) num_epochs = tf_epochs num_steps += 1 cost_sum += tf_cost if num_steps % FLAGS.report_every == 0: logging.info( 'Epochs: %d, num steps: %d, ' 'seconds elapsed: %.2f, avg cost: %.2f, ', num_epochs, num_steps, time.time() - t, cost_sum / FLAGS.report_every) cost_sum = 0.0 if num_steps % FLAGS.checkpoint_every == 0: best_eval_metric = Eval(sess, parser, num_steps, best_eval_metric)
def _perform_action(action=None): arg_prefix = action task_context = task_context_path if action == "brain_tagger": hidden_layer_sizes = [64] model_path = tagger_params_path output = 'output-to-file' input = 'input-from-file' elif action == "brain_parser": hidden_layer_sizes = [512, 512] model_path = parser_params_path output = 'output-to-file-conll' input = 'input-from-file-conll' else: raise Exception("Do not recognize action %s" % action) with tf.Session() as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=task_context, arg_prefix=arg_prefix)) beam_size = 8 max_steps = 1000 batch_size = 1024 slim_model = True parser = structured_graph_builder.StructuredGraphBuilder( num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True, arg_prefix=arg_prefix, beam_size=beam_size, max_steps=max_steps) parser.AddEvaluation(task_context, batch_size, corpus_name=input, evaluation_max_steps=max_steps) with tf.Session() as sess: parser.AddSaver(slim_model) sess.run(parser.inits.values()) parser.saver.restore(sess, model_path) sink_documents = tf.placeholder(tf.string) sink = gen_parser_ops.document_sink(sink_documents, task_context=task_context, corpus_name=output) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 num_documents = 0 while True: tf_eval_epochs, tf_eval_metrics, tf_documents = sess.run([ parser.evaluation['epochs'], parser.evaluation['eval_metrics'], parser.evaluation['documents'], ]) if len(tf_documents): logging.info('Processed %d documents', len(tf_documents)) num_documents += len(tf_documents) sess.run(sink, feed_dict={sink_documents: tf_documents}) num_tokens += tf_eval_metrics[0] num_correct += tf_eval_metrics[1] if num_epochs is None: num_epochs = tf_eval_epochs elif num_epochs < tf_eval_epochs: break logging.info('Total processed documents: %d', num_documents) if num_tokens > 0: eval_metric = 100.0 * num_correct / num_tokens logging.info('num correct tokens: %d', num_correct) logging.info('total tokens: %d', num_tokens) logging.info( 'Seconds elapsed in evaluation: %.2f, ' 'eval metric: %.2f%%', time.time() - t, eval_metric)