def train(self): self._init_dataset_maker(False) train_graph = tf.Graph() with train_graph.as_default(): train_char_mapping_tensor, train_label_mapping_tensor = DatasetMaker.make_mapping_table_tensor( ) train_dataset = DatasetMaker.make_dataset( train_char_mapping_tensor, train_label_mapping_tensor, self.train_data, FLAGS.batch_size, "train", 1, 0) self.global_step = tf.train.get_or_create_global_step() train_iter = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) train_init_op = train_iter.make_initializer(train_dataset) train_model = TrainModel(train_iter, FLAGS, self.global_step) self.train_summary_op = train_model.merge_train_summary_op eval_graph = tf.Graph() with eval_graph.as_default(): eval_char_mapping_tensor, eval_label_mapping_tensor = DatasetMaker.make_mapping_table_tensor( ) valid_dataset = DatasetMaker.make_dataset( eval_char_mapping_tensor, eval_label_mapping_tensor, self.valid_data, FLAGS.batch_size, "eval", 1, 0) tf.logging.info("The part 1/1 Validation dataset is prepared!") test_dataset = DatasetMaker.make_dataset( eval_char_mapping_tensor, eval_label_mapping_tensor, self.test_data, FLAGS.batch_size, "eval", 1, 0) tf.logging.info("The part 1/1 Test dataset is prepared!") eval_iter = tf.data.Iterator.from_structure( valid_dataset.output_types, valid_dataset.output_shapes) valid_init_op = eval_iter.make_initializer(valid_dataset) test_init_op = eval_iter.make_initializer(test_dataset) eval_model = EvalModel(eval_iter, FLAGS) train_session = self._create_session(train_graph) tf.logging.info("Created model with fresh parameters.") print_flags(FLAGS) save_flags(FLAGS, os.path.join(self.root_path, "config.pkl")) with train_session.graph.as_default(): train_session.run(tf.global_variables_initializer()) train_session.run(train_char_mapping_tensor.init) #train_session.run(train_gram2_mapping_tensor.init) #train_session.run(train_gram3_mapping_tensor.init) train_session.run(train_label_mapping_tensor.init) train_session.run(train_init_op) eval_session = self._create_session(eval_graph) eval_session.run(eval_char_mapping_tensor.init) #eval_session.run(eval_gram2_mapping_tensor.init) #eval_session.run(eval_gram3_mapping_tensor.init) eval_session.run(eval_label_mapping_tensor.init) tf.logging.info("Start training") loss = [] steps_per_epoch = self.train_data_num // FLAGS.batch_size # how many batches in an epoch for i in range(FLAGS.max_epoch): for j in range(steps_per_epoch): step, loss_value = train_model.train(train_session) loss.append(loss_value) if step % FLAGS.check_step == 0: iteration = step // steps_per_epoch + 1 tf.logging.info( "iteration:{} step:{}/{}, cross entropy loss:{:>9.6f}". format(iteration, step % steps_per_epoch, steps_per_epoch, np.mean(loss))) loss = [] if step % FLAGS.eval_step == 0: tf.logging.info( "Evaluate Validation Dataset and Test Dataset in step: {}" .format(step)) train_model.saver.save( train_session, os.path.join(self.log_dir, "temp_model.ckpt")) tf.logging.info("Saving model parameters in {}".format( os.path.join(self.log_dir, "temp_model.ckpt"))) eval_model.saver.restore( eval_session, os.path.join(self.log_dir, "temp_model.ckpt")) tf.logging.info("Loading model from {}".format( os.path.join(self.log_dir, "temp_model.ckpt"))) validation_accuracy = self._eval_performance( eval_session, eval_model, "validation", valid_init_op) test_accuracy = self._eval_performance( eval_session, eval_model, "test", test_init_op) eval_model.save_dev_test_summary(self.summary_writer, eval_session, validation_accuracy, test_accuracy, step)
with train_model.graph.as_default(): train_model.model.train(train_sess, x, y_) def get_reconstruct(train_model, train_sess, x): with train_model.graph.as_default(): return train_model.model.reconstruct(train_sess, x) if __name__ == '__main__': mnist = input_data.read_data_sets('MNIST_data', one_hot=True) n_samples = mnist.train.num_examples batch_size = 128 epoch_num = 60 tr_model = TrainModel() tr_sess = tf.Session(target='', graph=tr_model.graph) initialize(tr_model, tr_sess) n_batches = int(n_samples / batch_size) x_sample = mnist.test.next_batch(100)[0] plt.figure(figsize=(8, 12)) step = 0 for epoch in range(epoch_num): for _ in range(n_batches): x, y = mnist.train.next_batch(batch_size) run_training(tr_model, tr_sess, x, x) step += 1 if step % 10 != 0: continue
# beam 大小 beam_size = 5 # 分词映射 segment_to_int = vocab_to_int # 推理模式 infer_mode = 'beam_search' train_graph = tf.Graph() infer_graph = tf.Graph() checkpoints_path = "model2/" train_sess = tf.Session(graph=train_graph) infer_sess = tf.Session(graph=infer_graph) with train_graph.as_default(): train_model = TrainModel(vocab_size, embedding_size, num_units, num_layers, max_target_sequence_length, batch_size, max_gradient_norm, learning_rate, learning_rate_decay) train_model.saver = tf.train.import_meta_graph("checkpoints-999.meta") train_model.saver.restore(train_sess, tf.train.latest_checkpoint(checkpoints_path)) ''' with train_graph.as_default(): train_model = TrainModel(vocab_size,embedding_size,num_units,num_layers,max_target_sequence_length,batch_size, max_gradient_norm, learning_rate, learning_rate_decay) initializer = tf.global_variables_initializer() ''' with infer_graph.as_default(): infer_model = InferenceModel(vocab_size, embedding_size, num_units, num_layers, max_target_sequence_length, infer_batch_size, beam_size, segment_to_int, infer_mode)
def train(self): if self.job_name == "ps": with tf.device("/cpu:0"): self.server.join() return self._init_dataset_maker(False) train_init_op = None valid_init_op = None test_init_op = None with tf.device( tf.train.replica_device_setter( worker_device=self.worker_prefix, cluster=self.cluster)): self.global_step = tf.train.get_or_create_global_step() if self.job_name == "worker": train_dataset = DatasetMaker.make_dataset( self.train_data, FLAGS.batch_size, "train", self.num_worker, self.task_index) tf.logging.info( "The part {}/{} Training dataset is prepared!".format( self.task_index + 1, self.num_worker)) train_iter = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) train_init_op = train_iter.make_initializer(train_dataset) train_model = TrainModel(train_iter, FLAGS, self.global_step) elif self.job_name == "chief": # build same train graph to synchronize model parameters train_dataset = DatasetMaker.make_dataset( self.train_data, FLAGS.batch_size, "train", self.num_worker, self.task_index) train_iter = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) train_model = TrainModel(train_iter, FLAGS, self.global_step) self.train_summary_op = train_model.merge_train_summary_op # build test graph of same structure but different name scope # restore model from train checkpoint, and avoid its updating during validation eval_graph = tf.Graph() with eval_graph.as_default(): valid_dataset = DatasetMaker.make_dataset( self.valid_data, FLAGS.batch_size, "eval", 1, 0) tf.logging.info( "The part 1/1 Validation dataset is prepared!") test_dataset = DatasetMaker.make_dataset( self.test_data, FLAGS.batch_size, "eval", 1, 0) tf.logging.info("The part 1/1 Test dataset is prepared!") eval_iter = tf.data.Iterator.from_structure( valid_dataset.output_types, valid_dataset.output_shapes) valid_init_op = eval_iter.make_initializer(valid_dataset) test_init_op = eval_iter.make_initializer(test_dataset) eval_model = EvalModel(eval_iter, FLAGS, "eval_graph") with self._create_session_wrapper(retries=10) as sess: try: if self.job_name == "worker": DatasetMaker.init_mapping_table_tensor(sess) sess.run(train_init_op) step = 0 while not sess.should_stop(): global_step_val, loss_value = train_model.train(sess) if (step + 1) % self.check_step == 0: epoch = (global_step_val * FLAGS.batch_size) // self.train_data_num tf.logging.info( "Job-{}:Worker-{}-----Epoch:{}-Local_Step/Global_Step:{}/{}:Loss is {:.2f}" .format(self.job_name, self.task_index, epoch, step, global_step_val, loss_value)) step += 1 elif self.job_name == "chief": tf.logging.info("Created model with fresh parameters.") self._print_flags(FLAGS) sess.run(tf.global_variables_initializer()) DatasetMaker.init_mapping_table_tensor(sess) # record top N model's performance while True: time.sleep(2) global_step_val = sess.run(self.global_step) if (global_step_val + 1) % self.eval_step == 0: tf.logging.info( "Evaluate Validation Dataset and Test Dataset in step: {}" .format(global_step_val)) train_model.saver.save( sess, self.log_dir, latest_filename="temp", global_step=self.global_step) ckpt = tf.train.get_checkpoint_state( self.log_dir, latest_filename="temp") tf.logging.info( "Saving model parameters in {}".format( ckpt.model_checkpoint_path)) eval_model.saver.restore( sess, ckpt.model_checkpoint_path) tf.logging.info("Loading model from {}".format( ckpt.model_checkpoint_path)) validation_accuracy = self._eval_performance( sess, EvalModel, "validation", valid_init_op) test_accuracy = self._eval_performance( sess, EvalModel, "test", test_init_op) eval_model.save_dev_test_summary( self.summary_writer, sess, validation_accuracy, test_accuracy, global_step_val) except tf.errors.OutOfRangeError as e: exc_info = traceback.format_exc(sys.exc_info()) msg = 'Out of range error:{}\n{}'.format(e, exc_info) tf.logging.warn(msg) tf.logging.info('Done training -- step limit reached')
from generator import TrafficGenerator from memory import Memory from model import TrainModel from visualization import Visualization from utils import import_train_configuration, set_sumo, set_train_path if __name__ == "__main__": config = import_train_configuration(config_file='training_settings.ini') sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps']) path = set_train_path(config['models_path_name']) Model = TrainModel(config['num_layers'], config['width_layers'], config['batch_size'], config['learning_rate'], input_dim=config['num_states'], output_dim=config['num_actions']) Memory = Memory(config['memory_size_max'], config['memory_size_min']) TrafficGen = TrafficGenerator(config['max_steps'], config['n_v_generated']) Visualization = Visualization(path, dpi=96) Simulation = Simulation(Model, Memory, TrafficGen, sumo_cmd, config['gamma'], config['max_steps'], config['green_duration'], config['yellow_duration'], config['num_states'], config['num_actions'], config['training_epochs'])
def train(self): if self.job_name == "ps": with tf.device("/cpu:0"): self.server.join() return if not self.is_chief: time.sleep(20) self._init_dataset_maker(True) ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy( self.num_ps) with tf.device( tf.train.replica_device_setter( worker_device=self.worker_prefix, cluster=self.cluster, ps_strategy=ps_strategy)): self.global_step = tf.train.get_or_create_global_step() char_mapping_tensor, label_mapping_tensor = DatasetMaker.make_mapping_table_tensor( ) train_dataset = DatasetMaker.make_dataset( char_mapping_tensor, label_mapping_tensor, self.train_data, FLAGS.batch_size, "train", self.num_worker, self.task_index) tf.logging.info( "The part {}/{} Training dataset is prepared!".format( self.task_index + 1, self.num_worker)) train_iter = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) self.train_init_op = train_iter.make_initializer(train_dataset) train_model = TrainModel(train_iter, FLAGS, self.global_step) self.optimizer = train_model.optimizer self.train_summary_op = train_model.merge_train_summary_op with self._create_session_wrapper(retries=10) as sess: try: if self.job_name == "worker": step = 0 while not sess.should_stop(): global_step_val, loss_value = train_model.train(sess) if (step + 1) % self.check_step == 0: epoch = ((step + 1) * FLAGS.batch_size) // self.train_data_num tf.logging.info( "Job-{}:Worker-{}-----Local_Step/Global_Step:{}/{}:Loss is {:.4f}" .format(self.job_name, self.task_index, step, global_step_val, loss_value)) tf.logging.info( "Epoch:{}-Processed {}/{} data".format( epoch, (step + 1) * FLAGS.batch_size % self.train_data_num, self.train_data_num)) step += 1 elif self.job_name == "chief": print_flags(FLAGS, True) save_flags(FLAGS, os.path.join(self.root_path, "config.pkl"), True) tf.logging.info("Waiting for training...") # record top N model's performance while True: time.sleep(5) global_step_val = sess.run(self.global_step) tf.logging.info( "Global step is {}".format(global_step_val)) except tf.errors.OutOfRangeError as e: exc_info = traceback.format_exc(sys.exc_info()) msg = 'Out of range error:{}\n{}'.format(e, exc_info) tf.logging.warn(msg) tf.logging.info('Done training -- step limit reached')
epochs = 50 # 多少步预测一下 infer_step = 5 # beam 大小 beam_size = 5 # 分词映射 segment_to_int = vocab_to_int # 推理模式 infer_mode = 'beam_search' train_graph = tf.Graph() infer_graph = tf.Graph() with train_graph.as_default(): train_model = TrainModel(vocab_size, embedding_size, num_units, num_layers, max_target_sequence_length, batch_size, max_gradient_norm, learning_rate) initializer = tf.global_variables_initializer() with infer_graph.as_default(): infer_model = InferenceModel(vocab_size, embedding_size, num_units, num_layers, max_target_sequence_length, infer_batch_size, beam_size, segment_to_int, infer_mode) checkpoints_path = "model2/checkpoints" train_sess = tf.Session(graph=train_graph) infer_sess = tf.Session(graph=infer_graph) train_sess.run(initializer)
from i_o import Dataset from sgns import SGNS from model import TrainModel from cosine_sim import cosine_similarities #import zipfil de_corpora = "dataset/de_pud-ud-test.conllu" tr_corpora = "dataset/tr_pud-ud-test.conllu" en_corpora = "dataset/en_pud-ud-test.conllu" if __name__ == "__main__": en_dataset = Dataset(en_corpora) en_data = SGNS(en_dataset) en_dependent_embeddings = TrainModel(en_data.dependent_train_set, en_data.word2id) print(en_dependent_embeddings.embeddings['transition']) en_linear_bow_embeddings = TrainModel(en_data.linear_bow_train_set, en_data.word2id) print(en_dependent_embeddings.embeddings['transition']) de_dataset = Dataset(de_corpora) de_data = SGNS(de_dataset) de_dependent_embeddings = TrainModel(de_data.dependent_train_set, de_data.word2id) print(de_dependent_embeddings.embeddings['übergangs']) de_linear_bow_embeddings = TrainModel(de_data.linear_bow_train_set, de_data.word2id) print(de_linear_bow_embeddings.embeddings['übergangs']) tr_dataset = Dataset(tr_corpora)
#Random agent hyperparameters num_layers = 4 width_layers = 480 batch_size = 100 learning_rate = 0.001 training_epochs = 800 num_states = 320 num_actions = 4 memory_size_min = 600 memory_size_max = 50000 model = TrainModel( num_layers, width_layers, batch_size, learning_rate, input_dim=num_states, output_dim=num_actions ) mem = Memory( memory_size_max, memory_size_min ) @app.route('/initialize_agent', methods=['POST']) def initialize_agent(): model._num_layers = request.get_json()['num_layers'] model._width = request.get_json()['width_layers'] model._batch_size = request.get_json()['batch_size'] model._learning_rate = request.get_json()['learning_rate']
class ModelUsage(object): def __init__(self, FLAGS): self.FLAGS = FLAGS self.FLAGS.ckpt_path = os.path.join(FLAGS.root_path, FLAGS.ckpt_path) self.FLAGS.summary_path = os.path.join(FLAGS.root_path, FLAGS.summary_path) self.FLAGS.log_path = os.path.join(FLAGS.root_path, FLAGS.log_path) self.FLAGS.logfile_path = os.path.join(self.FLAGS.log_path, "train.log") self.FLAGS.map_path = os.path.join(FLAGS.root_path, FLAGS.map_path) self.FLAGS.mapfile_path = os.path.join(self.FLAGS.map_path, "maps.pkl") self.FLAGS.vocab_path = os.path.join(FLAGS.root_path, FLAGS.vocab_path) self.FLAGS.vocabfile_path = os.path.join(FLAGS.vocab_path, "vocabulary.csv") self.FLAGS.config_path = os.path.join(FLAGS.root_path, FLAGS.config_path) self.FLAGS.configfile_path = os.path.join(self.FLAGS.config_path, "config_file") self.FLAGS.result_path = os.path.join(FLAGS.root_path, FLAGS.result_path) self.FLAGS.train_file = os.path.join(FLAGS.data_root_path, FLAGS.train_file) self.FLAGS.dev_file = os.path.join(FLAGS.data_root_path, FLAGS.dev_file) self.FLAGS.test_file = os.path.join(FLAGS.data_root_path, FLAGS.test_file) def config(self): config = OrderedDict() config["char_num"] = len(MappingInfo.char_to_id) config["char_dim"] = self.FLAGS.char_dim config["hidden_dim"] = self.FLAGS.hidden_dim config["rnn_layer_num"] = self.FLAGS.rnn_layer_num config["infer_num"] = self.FLAGS.infer_num config["batch_size"] = self.FLAGS.batch_size config["start_symbol_id"] = MappingInfo.char_to_id['<begin>'] config["end_symbol_id"] = MappingInfo.char_to_id['</begin>'] config["clip"] = self.FLAGS.clip config["use_train_sampling"] = self.FLAGS.use_train_sampling config["train_sample_prob"] = self.FLAGS.train_sample_prob config["dropout"] = self.FLAGS.dropout config["lr"] = self.FLAGS.lr config["zeros"] = self.FLAGS.zeros config["lower"] = self.FLAGS.lower config["summary_path"] = self.FLAGS.summary_path return config def infer(self, session, model, logger): sentence_list = model.infer(session) sentence = u"\n".join([u"".join(s) for s in sentence_list]) logger.info(sentence) def evaluate(self, session, model, name, iter_init_op, logger): logger.info("evaluate:{}".format(name)) session.run(iter_init_op) # initilize dev or test iterator logger.info("iterator is switched to {}".format(name)) perplexity = model.evaluate(session) logger.info("current {} perplexity score:{:>.3f}".format( name, perplexity)) if name == "dev": self.train_session.run( tf.assign(self.train_model.dev_perplexity, perplexity)) best_dev_perplexity = self.train_session.run( self.train_model.best_dev_perplexity) if perplexity < best_dev_perplexity: self.train_session.run( tf.assign(self.train_model.best_dev_perplexity, perplexity)) logger.info( "new best dev perplexity score:{:>.3f}".format(perplexity)) return (perplexity < best_dev_perplexity, perplexity) elif name == "test": self.train_session.run( tf.assign(self.train_model.test_perplexity, perplexity)) best_test_perplexity = self.train_session.run( self.train_model.best_test_perplexity) if perplexity < best_test_perplexity: self.train_session.run( tf.assign(self.train_model.best_test_perplexity, perplexity)) logger.info("new best test perplexity score:{:>.3f}".format( perplexity)) return (perplexity < best_test_perplexity, perplexity) def train(self): make_path(self.FLAGS) logger = get_logger(self.FLAGS.logfile_path) # build char-id mapping MappingInfo.char_mapping(self.FLAGS.train_file, self.FLAGS.zeros, self.FLAGS.lower) MappingInfo.save_map(self.FLAGS.mapfile_path, self.FLAGS.vocabfile_path, logger) # load config and print it if tf.gfile.Exists(self.FLAGS.configfile_path): config = load_config(self.FLAGS.configfile_path) else: config = self.config() save_config(config, self.FLAGS.configfile_path) print_config(config, logger) # calculate sentence num logger.info("Calculating sentence num in dataset") train_sentence_num = line_num_count(self.FLAGS.train_file) dev_sentence_num = line_num_count(self.FLAGS.dev_file) test_sentence_num = line_num_count(self.FLAGS.test_file) logger.info("{} / {} / {} sentences in train / dev / test.".format( train_sentence_num, dev_sentence_num, test_sentence_num)) self.train_graph = tf.Graph() with self.train_graph.as_default(): table_train = MappingInfo.make_table_tensor() # load data sets # use generator to avoid memory oversize train_dataset = dataset_from_file(self.FLAGS.train_file, self.FLAGS.zeros, self.FLAGS.lower, self.FLAGS.batch_size, None, table_train) logger.info("Train sentence dataset is initialized") # build iterator from dataset iter_train = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) train_init_op = iter_train.make_initializer(train_dataset) self.train_model = TrainModel(config, iter_train) self.eval_graph = tf.Graph() with self.eval_graph.as_default(): table_eval = MappingInfo.make_table_tensor() dev_dataset = dataset_from_file(self.FLAGS.dev_file, self.FLAGS.zeros, self.FLAGS.lower, self.FLAGS.batch_size, 1, table_eval) logger.info("Dev sentence dataset is initialized") test_dataset = dataset_from_file(self.FLAGS.test_file, self.FLAGS.zeros, self.FLAGS.lower, self.FLAGS.batch_size, 1, table_eval) logger.info("Test sentence dataset is initialized") iter_eval = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) dev_init_op = iter_eval.make_initializer(dev_dataset) test_init_op = iter_eval.make_initializer(test_dataset) eval_model = EvalModel(config, iter_eval) self.infer_graph = tf.Graph() with self.infer_graph.as_default(): infer_model = InferModel(config) # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True #tf_config.log_device_placement = True steps_per_epoch = train_sentence_num // config[ "batch_size"] # how many batches in an epoch self.train_session = tf.Session(config=tf_config, graph=self.train_graph) eval_session = tf.Session(config=tf_config, graph=self.eval_graph) infer_session = tf.Session(config=tf_config, graph=self.infer_graph) logger.info("start training") create_model(self.train_session, self.train_model, self.FLAGS.ckpt_path, logger) self.train_session.run(table_train.init) self.train_session.run(train_init_op) eval_session.run(table_eval.init) loss = [] lr = config["lr"] for i in range(self.FLAGS.max_epoch): for j in range(steps_per_epoch): step, batch_loss = self.train_model.train(self.train_session) loss.append(batch_loss) sample_prob = max( 0.3, config["train_sample_prob"] - 0.2 * step / steps_per_epoch) # liner decay sample prob self.train_session.run( tf.assign(self.train_model.train_sample_prob, sample_prob)) if step % self.FLAGS.steps_check == 0: iteration = step // steps_per_epoch + 1 logger.info( "iteration:{} step:{}/{}, NER loss:{:>9.6f}, Training Sample prob is now {:>4.2f}" .format(iteration, step % steps_per_epoch, steps_per_epoch, np.mean(loss), sample_prob)) loss = [] if step % self.FLAGS.steps_eval == 0: save_model(self.train_session, self.train_model, self.FLAGS.ckpt_path, logger) load_model(eval_session, eval_model, self.FLAGS.ckpt_path, logger) best, current_perplexity = self.evaluate( eval_session, eval_model, "dev", dev_init_op, logger) if best: save_model(self.train_session, self.train_model, self.FLAGS.best_ckpt_path, logger) self.evaluate(eval_session, eval_model, "test", test_init_op, logger) self.train_model.save_dev_test_summary(self.train_session) load_model(infer_session, infer_model, self.FLAGS.ckpt_path, logger) self.infer(infer_session, infer_model, logger) lr = max(0.0001, lr / 1.5) self.train_session.run(tf.assign(self.train_model.lr, lr)) logger.info( "Epoch {} is finished, rescale learing rate to {}".format( i, lr)) def evaluate_line(self): config = load_config(self.FLAGS.configfile_path) logger = get_logger(self.FLAGS.logfile_path) tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with tf.gfile.GFile(self.FLAGS.mapfile_path, "rb") as f: char_to_id, id_to_char = pickle.load(f) with tf.Session(config=tf_config) as sess: x = tf.placeholder(tf.string, shape=[1, N]) dataset = dataset_from_string model = load_model(sess, InferModel, self.FLAGS.ckpt_path, config, logger) def run(self): if self.FLAGS.train: if self.FLAGS.clean: clean(self.FLAGS) if self.FLAGS.clean_map: clean_map(self.FLAGS) self.train() else: self.evaluate_line()
def train(self): make_path(self.FLAGS) logger = get_logger(self.FLAGS.logfile_path) # build char-id mapping MappingInfo.char_mapping(self.FLAGS.train_file, self.FLAGS.zeros, self.FLAGS.lower) MappingInfo.save_map(self.FLAGS.mapfile_path, self.FLAGS.vocabfile_path, logger) # load config and print it if tf.gfile.Exists(self.FLAGS.configfile_path): config = load_config(self.FLAGS.configfile_path) else: config = self.config() save_config(config, self.FLAGS.configfile_path) print_config(config, logger) # calculate sentence num logger.info("Calculating sentence num in dataset") train_sentence_num = line_num_count(self.FLAGS.train_file) dev_sentence_num = line_num_count(self.FLAGS.dev_file) test_sentence_num = line_num_count(self.FLAGS.test_file) logger.info("{} / {} / {} sentences in train / dev / test.".format( train_sentence_num, dev_sentence_num, test_sentence_num)) self.train_graph = tf.Graph() with self.train_graph.as_default(): table_train = MappingInfo.make_table_tensor() # load data sets # use generator to avoid memory oversize train_dataset = dataset_from_file(self.FLAGS.train_file, self.FLAGS.zeros, self.FLAGS.lower, self.FLAGS.batch_size, None, table_train) logger.info("Train sentence dataset is initialized") # build iterator from dataset iter_train = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) train_init_op = iter_train.make_initializer(train_dataset) self.train_model = TrainModel(config, iter_train) self.eval_graph = tf.Graph() with self.eval_graph.as_default(): table_eval = MappingInfo.make_table_tensor() dev_dataset = dataset_from_file(self.FLAGS.dev_file, self.FLAGS.zeros, self.FLAGS.lower, self.FLAGS.batch_size, 1, table_eval) logger.info("Dev sentence dataset is initialized") test_dataset = dataset_from_file(self.FLAGS.test_file, self.FLAGS.zeros, self.FLAGS.lower, self.FLAGS.batch_size, 1, table_eval) logger.info("Test sentence dataset is initialized") iter_eval = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) dev_init_op = iter_eval.make_initializer(dev_dataset) test_init_op = iter_eval.make_initializer(test_dataset) eval_model = EvalModel(config, iter_eval) self.infer_graph = tf.Graph() with self.infer_graph.as_default(): infer_model = InferModel(config) # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True #tf_config.log_device_placement = True steps_per_epoch = train_sentence_num // config[ "batch_size"] # how many batches in an epoch self.train_session = tf.Session(config=tf_config, graph=self.train_graph) eval_session = tf.Session(config=tf_config, graph=self.eval_graph) infer_session = tf.Session(config=tf_config, graph=self.infer_graph) logger.info("start training") create_model(self.train_session, self.train_model, self.FLAGS.ckpt_path, logger) self.train_session.run(table_train.init) self.train_session.run(train_init_op) eval_session.run(table_eval.init) loss = [] lr = config["lr"] for i in range(self.FLAGS.max_epoch): for j in range(steps_per_epoch): step, batch_loss = self.train_model.train(self.train_session) loss.append(batch_loss) sample_prob = max( 0.3, config["train_sample_prob"] - 0.2 * step / steps_per_epoch) # liner decay sample prob self.train_session.run( tf.assign(self.train_model.train_sample_prob, sample_prob)) if step % self.FLAGS.steps_check == 0: iteration = step // steps_per_epoch + 1 logger.info( "iteration:{} step:{}/{}, NER loss:{:>9.6f}, Training Sample prob is now {:>4.2f}" .format(iteration, step % steps_per_epoch, steps_per_epoch, np.mean(loss), sample_prob)) loss = [] if step % self.FLAGS.steps_eval == 0: save_model(self.train_session, self.train_model, self.FLAGS.ckpt_path, logger) load_model(eval_session, eval_model, self.FLAGS.ckpt_path, logger) best, current_perplexity = self.evaluate( eval_session, eval_model, "dev", dev_init_op, logger) if best: save_model(self.train_session, self.train_model, self.FLAGS.best_ckpt_path, logger) self.evaluate(eval_session, eval_model, "test", test_init_op, logger) self.train_model.save_dev_test_summary(self.train_session) load_model(infer_session, infer_model, self.FLAGS.ckpt_path, logger) self.infer(infer_session, infer_model, logger) lr = max(0.0001, lr / 1.5) self.train_session.run(tf.assign(self.train_model.lr, lr)) logger.info( "Epoch {} is finished, rescale learing rate to {}".format( i, lr))
import schedule import time import logging from model import TrainModel handlers = [logging.StreamHandler()] logging.basicConfig(handlers=handlers, format='%(levelname)s:%(message)s', level=logging.INFO) if __name__ == '__main__': tm = TrainModel() schedule.every().second.do(tm.train_once) schedule.every().day.do(tm.train) while True: schedule.run_pending() time.sleep(1)