def _init_pos_model(self, session): """Create POS Tagger model and initialize with random or load parameters in session.""" # initilize config config_dict = load_config(self.model_config_path) config = get_config(config_dict, self.name) config.batch_size = 1 config.num_steps = 1 # iterator one token per time model_var_scope = get_model_var_scope(self.var_scope, self.name) print("NOTICE: Input POS Model Var Scope Name '%s'" % model_var_scope) # Check if self.model already exist if self.model is None: with tf.variable_scope(model_var_scope, tf.AUTO_REUSE): self.model = pos_model.POSTagger( is_training=False, config=config) # save object after is_training # Load Specific .data* ckpt file if len(glob.glob(self.ckpt_path + '.data*') ) > 0: # file exist with pattern: 'pos.ckpt.data*' print("NOTICE: Loading model parameters from %s" % self.ckpt_path) all_vars = tf.global_variables() model_vars = [ k for k in all_vars if model_var_scope in k.name.split("/") ] tf.train.Saver(model_vars).restore(session, self.ckpt_path) else: print( "NOTICE: Model not found, Try to run method: deepnlp.download(module='pos', name='%s')" % self.name) print("NOTICE: Created with fresh parameters.") session.run(tf.global_variables_initializer())
def _init_ner_model(self, session, ckpt_path): """Create ner Tagger model and initialize or load parameters in session.""" # initilize config config = ner_model.get_config(self.name) if config is None: print("WARNING: Input model name %s has no configuration..." % self.name) config.batch_size = 1 config.num_steps = 1 # iterator one token per time model_var_scope = get_model_var_scope(self.var_scope, self.name) print("NOTICE: Input NER Model Var Scope Name '%s'" % model_var_scope) # Check if self.model already exist if self.model is None: with tf.variable_scope(model_var_scope, reuse=tf.AUTO_REUSE): self.model = ner_model.NERTagger( is_training=True, config=config) # save object after is_training #else: # Model Graph Def already exist # print ("DEBUG: Model Def already exists") # update model parameters if len(glob.glob(ckpt_path + '.data*') ) > 0: # file exist with pattern: 'ner.ckpt.data*' print("NOTICE: Loading model parameters from %s" % ckpt_path) all_vars = tf.global_variables() model_vars = [ k for k in all_vars if model_var_scope in k.name.split("/") ] # e.g. ner_var_scope_zh tf.train.Saver(model_vars).restore(session, ckpt_path) else: print( "NOTICE: Model not found, Try to run method: deepnlp.download(module='ner', name='%s')" % self.name) print("NOTICE: Created with fresh parameters.") session.run(tf.global_variables_initializer())
def main(_): if not FLAGS.ner_data_path: raise ValueError("No data files found in 'data_path' folder") raw_data = reader.load_data(FLAGS.ner_data_path) train_word, train_tag, dev_word, dev_tag, test_word, test_tag, vocabulary = raw_data config = get_config(FLAGS.ner_lang) eval_config = get_config(FLAGS.ner_lang) eval_config.batch_size = 1 eval_config.num_steps = 1 model_var_scope = get_model_var_scope(FLAGS.ner_scope_name, FLAGS.ner_lang) with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope(model_var_scope, reuse=True, initializer=initializer): m = NERTagger(is_training=True, config=config) with tf.variable_scope(model_var_scope, reuse=True, initializer=initializer): mvalid = NERTagger(is_training=False, config=config) mtest = NERTagger(is_training=False, config=eval_config) # CheckPoint State ckpt = tf.train.get_checkpoint_state(FLAGS.ner_train_dir) if ckpt: print("Loading model parameters from %s" % ckpt.model_checkpoint_path) m.saver.restore(session, tf.train.latest_checkpoint(FLAGS.ner_train_dir)) else: print("Created model with fresh parameters.") session.run(tf.global_variables_initializer()) # write the graph out for further use e.g. C++ API call tf.train.write_graph(session.graph_def, './models/', 'ner_graph.pbtxt', as_text=True) # output is text for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, train_word, train_tag, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, dev_word, dev_tag, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest, test_word, test_tag, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity)
def _init_model(self, session): """Create Parser model and initialize with random or load parameters in session.""" config_dict = load_config(self.model_config_path) config = get_config(config_dict, self.name) model_var_scope = get_model_var_scope(self.var_scope, self.name) print ("NOTICE: Initializing model var scope '%s'" % model_var_scope) # Check if self.model already exist if self.model is None: # Create Graph Only once with tf.variable_scope(model_var_scope, reuse = tf.AUTO_REUSE): self.model = parse_model.NNParser(config=config) if len(glob.glob(self.ckpt_path + '.data*')) > 0: # file exist with pattern: 'parser.ckpt.data*' print("NOTICE: Loading model parameters from %s" % self.ckpt_path) all_vars = tf.global_variables() model_vars = [k for k in all_vars if model_var_scope in k.name.split("/")] # Only Restore the Variable in Graph begin with parser/.... tf.train.Saver(model_vars).restore(session, self.ckpt_path) else: print("NOTICE: Model not found, Try to run method: deepnlp.download('parse')") print("NOTICE: Created with fresh parameters.") session.run(tf.global_variables_initializer())
def main(_): # Load Data raw_data = reader.load_data(FLAGS.parse_data_path) train_sents, train_trees, dev_sents, dev_trees, vocab_dict, pos_dict, label_dict, feature_tpl = raw_data # items in ids # Load Config config_dict = load_config(FLAGS.parse_model_config_path) config = get_config(config_dict, FLAGS.parse_lang) model_var_scope = get_model_var_scope(FLAGS.parse_scope_name, FLAGS.parse_lang) print ("NOTICE: Parsing model variable scope is %s" % model_var_scope) with tf.Session() as session: with tf.variable_scope(model_var_scope, reuse=True): m = NNParser(config=config) # CheckPoint State if not os.path.exists(FLAGS.parse_train_dir): os.makedirs(FLAGS.parse_train_dir) ckpt = tf.train.get_checkpoint_state(FLAGS.parse_train_dir) if ckpt: print("Loading model parameters from %s" % ckpt.model_checkpoint_path) m.saver.restore(session, tf.train.latest_checkpoint(FLAGS.parse_train_dir)) else: print("Created model with fresh parameters.") session.run(tf.global_variables_initializer()) # train dataset should be generated only once and called by run_epoch function # max_tree_num = 8000 # for memory saving for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.4f" % (i + 1, session.run(m.lr))) # Training: Check training set perplexity instance_path = os.path.join(FLAGS.parse_data_path, "instance") # saved example instance path train_X, train_Y = transition_system.generate_examples(train_sents, train_trees, label_dict, feature_tpl, instance_path, is_train = True) train_iter = transition_system.iter_examples(train_X, train_Y, m.batch_size) train_perplexity = run_epoch(session, m, m.train_op, train_iter) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) # Dev: Check development set perplexity dev_X, dev_Y = transition_system.generate_examples(dev_sents, dev_trees, label_dict, feature_tpl, instance_path, is_train = False) dev_iter = transition_system.iter_examples(dev_X, dev_Y, m.batch_size) dev_perplexity = run_epoch(session, m, tf.no_op(), dev_iter, is_train = False) print("Epoch: %d Dev Perplexity: %.3f" % (i + 1, dev_perplexity))