def train(config): print('parameters: ') print(json.dumps(config, indent=4, ensure_ascii=False)) # load data print('load data .....') X, y = data_helper.process_data(config) # make vocab print('make vocab .....') word_to_index, label_to_index = data_helper.generate_vocab(X, y, config) # padding data print('padding data .....') input_x, input_y = data_helper.padding(X, y, config, word_to_index, label_to_index) # split data print('split data .....') x_train, y_train, x_test, y_test, x_dev, y_dev = data_helper.split_data(input_x, input_y, config) print('length train: {}'.format(len(x_train))) print('length test: {}'.format(len(x_test))) print('length dev: {}'.format(len(x_dev))) print('training .....') with tf.Graph().as_default(): sess_config = tf.ConfigProto( allow_soft_placement=config['allow_soft_placement'], log_device_placement=config['log_device_placement'] ) with tf.Session(config=sess_config) as sess: fast_text = FastText(config) # training procedure global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(config['learning_rate']) grads_and_vars = optimizer.compute_gradients(fast_text.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # keep track of gradient values and sparsity grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram('{}/grad/hist'.format(v.name), g) sparsity_summary = tf.summary.scalar('{}/grad/sparsity'.format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # output dir for models and summaries timestamp = str(int(time.time())) outdir = os.path.abspath(os.path.join(os.path.curdir, 'runs', timestamp)) print('writing to {}'.format(outdir)) # summary for loss and accuracy loss_summary = tf.summary.scalar('loss', fast_text.loss) acc_summary = tf.summary.scalar('accuracy', fast_text.accuracy) # train summary train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(outdir, 'summaries', 'train') train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # dev summary dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(outdir, 'summaries', 'dev') dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # checkpoint dirctory checkpoint_dir = os.path.abspath(os.path.join(outdir, 'checkpoints')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model.bin') if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=config['num_checkpoints']) sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): feed_dict = { fast_text.input_x: x_batch, fast_text.input_y: y_batch, } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, fast_text.loss, fast_text.accuracy], feed_dict=feed_dict ) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): feed_dic = { fast_text.input_x: x_batch, fast_text.input_y: y_batch, fast_text.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, fast_text.loss, fast_text.accuracy], feed_dict=feed_dic ) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # generate batches batches = data_helper.generate_batchs(x_train, y_train, config) for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % config['evaluate_every'] == 0: print('Evaluation:') dev_step(x_dev, y_dev, writer=dev_summary_writer) if current_step % config['checkpoint_every'] == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print('save model checkpoint to {}'.format(path)) # test accuracy test_accuracy = sess.run([fast_text.accuracy], feed_dict={ fast_text.input_x: x_test, fast_text.input_y: y_test, fast_text.dropout_keep_prob: 1.0}) print('Test dataset accuracy: {}'.format(test_accuracy))
def train(): X_train, y_train, all_words = data_helper.preprocess_data( './mini_data/train.txt') word_to_idx, idx_to_word = data_helper.generator_vocab( X_train, './mini_data') X_train_digit = data_helper.padding(X_train, word_to_idx) with tf.Graph().as_default(): sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: fasttext = FastText(seq_length=config["seq_lenght"], num_class=config["num_class"], vocab_size=config["vocab_size"], embedding_size=config["embedding_size"]) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer( learning_rate=config["learning_rate"]) train_op = optimizer.minimize(fasttext.loss, global_step=global_step) loss_summary = tf.summary.scalar('loss', fasttext.loss) acc_summary = tf.summary.scalar('precision', fasttext.predictions) time_stamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") out_dir = os.path.join("runs", time_stamp) # train summary train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, 'summary', 'train') train_summary_write = tf.summary.FileWriter( train_summary_dir, sess.graph) # dev summary dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, 'summary', 'dev') dev_summary_write = tf.summary.FileWriter(dev_summary_dir, sess.graph) # checkpoint checkpoint_dir = os.path.join(out_dir, 'model') if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=config["max_to_keep"]) sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): feed_dict = { fasttext.input_x: x_batch, fasttext.input_y: y_batch } _, step, summaries, loss = sess.run( [train_op, global_step, train_summary_op, fasttext.loss], feed_dict=feed_dict) train_summary_write.add_summary(summaries, global_step=step) print("train_step: {}, loss: {}".format(step, loss)) def dev_step(x_batch, y_batch, write=None): feed_dict = { fasttext.input_x: x_batch, fasttext.input_y: y_batch, fasttext.dropout_keep_prob: 1.0 } step, summaries, loss = sess.run( [global_step, dev_summary_op, fasttext.loss], feed_dict=feed_dict) print("dev_step: {}, loss: {}".format(step, loss)) if write: write.write(summaries, step) # generate batches batches = data_helper.generate_batchs(X_train_digit, y_train) for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % config["evaluate_every"] == 0: dev_step(x_dev, y_dev, write=dev_summary_write) if current_step % config["checkpoint_every"] == 0: path = saver.save(sess, checkpoint_dir, global_step=current_step) print('save model checkpoint to {}'.format(path)) # test feed_dict = { fasttext.input_x: x_test, fasttext.input_y: y_test, fasttext.dropout_keep_prob: 1.0 } test_precision, test_recall = sess.run( [fasttext.precision, fasttext.recall], feed_dict=feed_dict) print('test_precision: {}, test_recall: {}'.format( test_precision, test_recall))
def train(config): print('parameters:') print(config) # load data print('load data') X, y = data_helper.process_data(config) # X=[[seq1],[seq2]] y=[,,,,] # make vocab print('make vocab...') word2index, label2index = data_helper.generate_vocab(X, y, config) # padding data print('padding data') input_x, input_y = data_helper.padding(X, y, config, word2index, label2index) # split data print('split data...') x_train, y_train, x_test, y_test, x_dev, y_dev = data_helper.split_data( input_x, input_y, config) print('length train: {}'.format(len(x_train))) print('length test: {}'.format(len(x_test))) print('length dev: {}'.format(len(x_dev))) print('training...') with tf.Graph().as_default(): sess_config = tf.ConfigProto( allow_soft_placement=config['allow_soft_placement'], log_device_placement=config['log_device_placement']) with tf.Session(config=sess_config) as sess: rcnn = TextRCNN(config) # training procedure global_step = tf.Variable(0, name='globel_step', trainable=False) train_op = tf.train.AdamOptimizer(config['learning_rate']).minimize( rcnn.loss, global_step=global_step) # output dir for models timestamp = str(int(time.time())) outdir = os.path.abspath( os.path.join(os.path.curdir, 'runs', timestamp)) if not os.path.exists(os.path.join(os.path.curdir, 'runs')): os.mkdir(os.path.join(os.path.curdir, 'runs')) if not os.path.exists(outdir): os.mkdir(outdir) print('writing to {}'.format(outdir)) # checkpoint dictory checkpoint_dir = os.path.abspath(os.path.join(outdir, 'checkpoints')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=config['num_checkpoints']) sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): feed_dict = { rcnn.input_x: x_batch, rcnn.input_y: y_batch, rcnn.dropout_keep_prob: config['dropout_keep_prob'] } _, step, loss, accuracy = sess.run( [train_op, global_step, rcnn.loss, rcnn.accuracy], feed_dict=feed_dict) time_str = datetime.datetime.now().isoformat() print('{}: step {}, loss {}, acc {}'.format( time_str, step, loss, accuracy)) def dev_step(x_batch, y_batch): feed_dict = { rcnn.input_x: x_batch, rcnn.input_y: y_batch, rcnn.dropout_keep_prob: 1.0 } step, loss, accuracy = sess.run( [global_step, rcnn.loss, rcnn.accuracy], feed_dict=feed_dict) time_str = datetime.datetime.now().isoformat() print('{}: step {}, loss {}, acc {}'.format( time_str, step, loss, accuracy)) # generate batches batches = data_helper.generate_batchs(x_train, y_train, config) for batch in batches: x_batch, y_batch = zip(*batch) print(y_batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % config['evaluate_every'] == 0: print('Evaluation:') dev_step(x_dev, y_dev) if current_step % config['checkpoint_every'] == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print('save model checkpoint to {}'.format(path)) # test accuracy test_accuracy = sess.run( [rcnn.accuracy], feed_dict={ rcnn.input_x: x_test, rcnn.input_y: y_test, rcnn.dropout_keep_prob: 1.0 }) print('Test dataset accuracy: {}'.format(test_accuracy))
def train(config): learning_rate = config['learning_rate'] clip_grad = config['clip_grad'] max_model_keep = config['max_model_keep'] print('parameters: ') print(json.dumps(config, indent=4, ensure_ascii=False)) # load data print('load data .....') X, y = data_helper.process_data(config) # make vocab print('make vocab .....') word_to_index, label_to_index = data_helper.generate_vocab(X, y, config) config['num_tags'] = len(label_to_index) # padding data print('padding data .....') input_x, input_y, sequence_lengths = data_helper.padding(X, y, word_to_index, label_to_index) # split data print('split data .....') x_train, y_train, sequences_length_train, x_test, y_test, sequence_length_test, x_dev, y_dev, sequence_length_dev = \ data_helper.split_data(input_x, input_y, sequence_lengths, config) print('length train: {}'.format(len(x_train))) print('length test: {}'.format(len(x_test))) print('length dev: {}'.format(len(x_dev))) with tf.Graph().as_default(): with tf.Session() as sess: bilstm_crf = BilstmCrf(config) # training_procedure global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate) # apply grad clip to avoid gradiend explosion grads_and_vars = optimizer.compute_gradients(bilstm_crf.loss) grads_and_vars_clip = [[tf.clip_by_value(g, -clip_grad, clip_grad), v] for g, v in grads_and_vars] train_op = optimizer.apply_gradients(grads_and_vars_clip, global_step=global_step) # output dir for models and summaries timestamp = str(int(time.time())) outdir = os.path.abspath(os.path.join(os.path.curdir, 'runs', timestamp)) print('writing to {} !!!'.format(outdir)) # summary of loss tf.summary.scalar('loss', bilstm_crf.loss) # train summary train_sumary_op = tf.summary.merge_all() train_summary_dir = os.path.join(outdir, 'summaries', 'train') train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # dev summary dev_summary_op = tf.summary.merge_all() dev_summary_dir = os.path.join(outdir, 'summaries', 'dev') dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # checkpoint dir checkpoint_dir = os.path.abspath(os.path.join(outdir, 'checkpoints')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=max_model_keep) sess.run(tf.global_variables_initializer()) def viterbi_decoder(logits, seq_len_list, transition_params): label_list = [] for logit, seq_len in zip(logits, seq_len_list): viterbi_seq, _ = tf.contrib.crf.viterbi_decode(logit[:seq_len], transition_params) label_list.append(viterbi_seq) return label_list def train_step(x_batch, y_batch, sequence_lengths): feed_dict = { bilstm_crf.input_x: x_batch, bilstm_crf.input_y: y_batch, bilstm_crf.sequence_length: sequence_lengths, bilstm_crf.dropout_keep_prob: config['dropout_keep_prob'] } _, step, summaries, loss, transition_params, logits = sess.run( [train_op, global_step, train_sumary_op, bilstm_crf.loss, bilstm_crf.transition_params, bilstm_crf.logits], feed_dict=feed_dict ) label_list = viterbi_decoder(logits, sequence_lengths, transition_params) acc, recall, f1 = data_helper.measure(y_batch, label_list, sequence_lengths) time_str = datetime.datetime.now().isoformat() print("training: {}: step {}, loss {:g}, acc {:.2f} recall {:.2f} f1 {:.2f}".format (time_str, step, loss, acc, recall, f1)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, sequence_lengths, writer=None): feed_dic = { bilstm_crf.input_x: x_batch, bilstm_crf.input_y: y_batch, bilstm_crf.sequence_length: sequence_lengths, bilstm_crf.dropout_keep_prob: 1.0 } step, summaries, loss, transition_params, logits = sess.run( [global_step, dev_summary_op, bilstm_crf.loss, bilstm_crf.transition_params, bilstm_crf.logits], feed_dict=feed_dic ) label_list = viterbi_decoder(logits, sequence_lengths, transition_params) acc, recall, f1 = data_helper.measure(y_batch, label_list, sequence_lengths) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, f1 {:.2f}".format(time_str, step, loss, f1)) if writer: writer.add_summary(summaries, step) # generate batches batches = data_helper.generate_batchs(x_train, y_train, sequences_length_train, config) for batch in batches: x_batch, y_batch, sequence_length_batch = zip(*batch) train_step(x_batch, y_batch, sequence_length_batch) current_step = tf.train.global_step(sess, global_step) if current_step % config['evaluate_every'] == 0: print('Evaluation:') dev_step(x_dev, y_dev, sequence_length_dev, writer=dev_summary_writer) if current_step % config['checkpoint_every'] == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print('save model checkpoint to {}'.format(path))