コード例 #1
0
ファイル: pos_tagger.py プロジェクト: Sandy4321/deepnlp-1
 def _init_pos_model(self, session):
     """Create POS Tagger model and initialize with random or load parameters in session."""
     # initilize config
     config_dict = load_config(self.model_config_path)
     config = get_config(config_dict, self.name)
     config.batch_size = 1
     config.num_steps = 1  # iterator one token per time
     model_var_scope = get_model_var_scope(self.var_scope, self.name)
     print("NOTICE: Input POS Model Var Scope Name '%s'" % model_var_scope)
     # Check if self.model already exist
     if self.model is None:
         with tf.variable_scope(model_var_scope, tf.AUTO_REUSE):
             self.model = pos_model.POSTagger(
                 is_training=False,
                 config=config)  # save object after is_training
     # Load Specific .data* ckpt file
     if len(glob.glob(self.ckpt_path + '.data*')
            ) > 0:  # file exist with pattern: 'pos.ckpt.data*'
         print("NOTICE: Loading model parameters from %s" % self.ckpt_path)
         all_vars = tf.global_variables()
         model_vars = [
             k for k in all_vars if model_var_scope in k.name.split("/")
         ]
         tf.train.Saver(model_vars).restore(session, self.ckpt_path)
     else:
         print(
             "NOTICE: Model not found, Try to run method: deepnlp.download(module='pos', name='%s')"
             % self.name)
         print("NOTICE: Created with fresh parameters.")
         session.run(tf.global_variables_initializer())
コード例 #2
0
 def _init_ner_model(self, session, ckpt_path):
     """Create ner Tagger model and initialize or load parameters in session."""
     # initilize config
     config = ner_model.get_config(self.name)
     if config is None:
         print("WARNING: Input model name %s has no configuration..." %
               self.name)
     config.batch_size = 1
     config.num_steps = 1  # iterator one token per time
     model_var_scope = get_model_var_scope(self.var_scope, self.name)
     print("NOTICE: Input NER Model Var Scope Name '%s'" % model_var_scope)
     # Check if self.model already exist
     if self.model is None:
         with tf.variable_scope(model_var_scope, reuse=tf.AUTO_REUSE):
             self.model = ner_model.NERTagger(
                 is_training=True,
                 config=config)  # save object after is_training
     #else:   # Model Graph Def already exist
     #    print ("DEBUG: Model Def already exists")
     # update model parameters
     if len(glob.glob(ckpt_path + '.data*')
            ) > 0:  # file exist with pattern: 'ner.ckpt.data*'
         print("NOTICE: Loading model parameters from %s" % ckpt_path)
         all_vars = tf.global_variables()
         model_vars = [
             k for k in all_vars if model_var_scope in k.name.split("/")
         ]  # e.g. ner_var_scope_zh
         tf.train.Saver(model_vars).restore(session, ckpt_path)
     else:
         print(
             "NOTICE: Model not found, Try to run method: deepnlp.download(module='ner', name='%s')"
             % self.name)
         print("NOTICE: Created with fresh parameters.")
         session.run(tf.global_variables_initializer())
コード例 #3
0
def main(_):
  if not FLAGS.ner_data_path:
    raise ValueError("No data files found in 'data_path' folder")

  raw_data = reader.load_data(FLAGS.ner_data_path)
  train_word, train_tag, dev_word, dev_tag, test_word, test_tag, vocabulary = raw_data
  
  config = get_config(FLAGS.ner_lang)
  
  eval_config = get_config(FLAGS.ner_lang)
  eval_config.batch_size = 1
  eval_config.num_steps = 1
  
  model_var_scope = get_model_var_scope(FLAGS.ner_scope_name, FLAGS.ner_lang)
  
  with tf.Graph().as_default(), tf.Session() as session:
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
    with tf.variable_scope(model_var_scope, reuse=True, initializer=initializer):
      m = NERTagger(is_training=True, config=config)
    with tf.variable_scope(model_var_scope, reuse=True, initializer=initializer):
      mvalid = NERTagger(is_training=False, config=config)
      mtest = NERTagger(is_training=False, config=eval_config)
    
    # CheckPoint State
    ckpt = tf.train.get_checkpoint_state(FLAGS.ner_train_dir)
    if ckpt:
      print("Loading model parameters from %s" % ckpt.model_checkpoint_path)
      m.saver.restore(session, tf.train.latest_checkpoint(FLAGS.ner_train_dir))
    else:
      print("Created model with fresh parameters.")
      session.run(tf.global_variables_initializer())
    
    # write the graph out for further use e.g. C++ API call
    tf.train.write_graph(session.graph_def, './models/', 'ner_graph.pbtxt', as_text=True)   # output is text
    
    for i in range(config.max_max_epoch):
      lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
      m.assign_lr(session, config.learning_rate * lr_decay)
    
      print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
      train_perplexity = run_epoch(session, m, train_word, train_tag, m.train_op,
                                   verbose=True)
      print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
      valid_perplexity = run_epoch(session, mvalid, dev_word, dev_tag, tf.no_op())
      print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

    test_perplexity = run_epoch(session, mtest, test_word, test_tag, tf.no_op())
    print("Test Perplexity: %.3f" % test_perplexity)
コード例 #4
0
 def _init_model(self, session):
     """Create Parser model and initialize with random or load parameters in session."""
     config_dict = load_config(self.model_config_path)
     config = get_config(config_dict, self.name)
     model_var_scope = get_model_var_scope(self.var_scope, self.name)
     print ("NOTICE: Initializing model var scope '%s'" % model_var_scope)
     # Check if self.model already exist
     if self.model is None:   # Create Graph Only once
         with tf.variable_scope(model_var_scope, reuse = tf.AUTO_REUSE):
             self.model = parse_model.NNParser(config=config)
     if len(glob.glob(self.ckpt_path + '.data*')) > 0: # file exist with pattern: 'parser.ckpt.data*'
         print("NOTICE: Loading model parameters from %s" % self.ckpt_path)
         all_vars = tf.global_variables()
         model_vars = [k for k in all_vars if model_var_scope in k.name.split("/")]   # Only Restore the Variable in Graph begin with parser/....
         tf.train.Saver(model_vars).restore(session, self.ckpt_path)
     else:
         print("NOTICE: Model not found, Try to run method: deepnlp.download('parse')")
         print("NOTICE: Created with fresh parameters.")
         session.run(tf.global_variables_initializer())
コード例 #5
0
ファイル: parse_model.py プロジェクト: Sandy4321/deepnlp-1
def main(_):
    # Load Data
    raw_data = reader.load_data(FLAGS.parse_data_path)
    train_sents, train_trees, dev_sents, dev_trees, vocab_dict, pos_dict, label_dict, feature_tpl = raw_data # items in ids
    # Load Config
    config_dict = load_config(FLAGS.parse_model_config_path)
    config = get_config(config_dict, FLAGS.parse_lang)
    model_var_scope = get_model_var_scope(FLAGS.parse_scope_name, FLAGS.parse_lang)
    print ("NOTICE: Parsing model variable scope is %s" % model_var_scope)
    with tf.Session() as session:
        with tf.variable_scope(model_var_scope, reuse=True):
            m = NNParser(config=config)
    
    # CheckPoint State
    if not os.path.exists(FLAGS.parse_train_dir):
        os.makedirs(FLAGS.parse_train_dir)

    ckpt = tf.train.get_checkpoint_state(FLAGS.parse_train_dir)
    if ckpt:
        print("Loading model parameters from %s" % ckpt.model_checkpoint_path)
        m.saver.restore(session, tf.train.latest_checkpoint(FLAGS.parse_train_dir))
    else:
        print("Created model with fresh parameters.")
        session.run(tf.global_variables_initializer())
    
    # train dataset should be generated only once and called by run_epoch function
    # max_tree_num = 8000 # for memory saving 
    for i in range(config.max_max_epoch):
        lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
        m.assign_lr(session, config.learning_rate * lr_decay)
        print("Epoch: %d Learning rate: %.4f" % (i + 1, session.run(m.lr)))
        # Training: Check training set perplexity
        instance_path = os.path.join(FLAGS.parse_data_path, "instance")  # saved example instance path
        train_X, train_Y = transition_system.generate_examples(train_sents, train_trees, label_dict, feature_tpl, instance_path, is_train = True)
        train_iter = transition_system.iter_examples(train_X, train_Y, m.batch_size)
        train_perplexity = run_epoch(session, m, m.train_op, train_iter)
        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
        # Dev: Check development set perplexity
        dev_X, dev_Y = transition_system.generate_examples(dev_sents, dev_trees, label_dict, feature_tpl, instance_path, is_train = False)
        dev_iter = transition_system.iter_examples(dev_X, dev_Y, m.batch_size)
        dev_perplexity = run_epoch(session, m, tf.no_op(), dev_iter, is_train = False)
        print("Epoch: %d Dev Perplexity: %.3f" % (i + 1, dev_perplexity))