def __init__(self): # Load model config config = load_config(FLAGS) config_proto = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=tf.GPUOptions(allow_growth=True)) # load LM for re-ranking self.re_ranking = FLAGS.lm_ranking if self.re_ranking: self.load_lm_model() self.sess = tf.Session(config=config_proto) # create seq2seq model instance self.model = Seq2SeqModel(config, 'predict') # Create saver # Using var_list = None returns the list of all savable variables saver = tf.train.Saver(var_list=None) # Reload existing checkpoint load_model(self.sess, self.model, saver)
def main(_): data_path = 'data/new-dataset-cornell-length10-filter1-vocabSize40000.pkl' word2id, id2word, trainingSamples = load_dataset(data_path) hparam = Config() hparam.is_training = False with tf.Session() as sess: model = Seq2SeqModel(hparam, word2id) ckpt = tf.train.get_checkpoint_state(hparam.save_path) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Restoring model parameters from %s." % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Creating model with fresh parameters.") sess.run(model.init) sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: batch = sentence_preprocess(sentence, word2id) outputs = model.infer_session(sess, batch) predicted_ids = outputs["predicted_ids"] out_sents = [id2word[idx] for idx in predicted_ids[0][0].tolist()] print(" ".join(out_sents)) print("> ", "") sys.stdout.flush() sentence = sys.stdin.readline()
def create_model(session, restore_only=False): # with bidirectional encoder, decoder state size should be # 2x encoder state size is_training = tf.placeholder(dtype=tf.bool, name='is_training') encoder_cell = LSTMCell(64) encoder_cell = MultiRNNCell([encoder_cell] * 5) decoder_cell = LSTMCell(128) decoder_cell = MultiRNNCell([decoder_cell] * 5) model = Seq2SeqModel(encoder_cell=encoder_cell, decoder_cell=decoder_cell, vocab_size=wiki.vocab_size, embedding_size=300, attention=True, bidirectional=True, is_training=is_training, device=args.device, debug=False) saver = tf.train.Saver(tf.global_variables(), keep_checkpoint_every_n_hours=1) checkpoint = tf.train.get_checkpoint_state(checkpoint_dir) if checkpoint: print("Reading model parameters from %s" % checkpoint.model_checkpoint_path) saver.restore(session, checkpoint.model_checkpoint_path) elif restore_only: raise FileNotFoundError("Cannot restore model") else: print("Created model with fresh parameters") session.run(tf.global_variables_initializer()) tf.get_default_graph().finalize() return model, saver
def __init__(self, gpu_number=0): # Load model config config = load_config(FLAGS) config_proto = tf.ConfigProto( # allow_soft_placement=FLAGS.allow_soft_placement, # log_device_placement=FLAGS.log_device_placement, gpu_options=tf.GPUOptions( allow_growth=True) #, visible_device_list=str(gpu_number)) ) self.graphpre = tf.Graph() self.sess = tf.Session(graph=self.graphpre, config=config_proto) with self.sess.as_default(): with self.graphpre.as_default(): # Build the model self.model = Seq2SeqModel(config, 'predict') # Create saver # Using var_list = None returns the list of all saveable variables saver = tf.train.Saver(var_list=None) # Reload existing checkpoint load_model(self.sess, self.model, saver) self.planner = Planner() print("poetry is ok!")
def create_model(sess, data, args, embed): with tf.variable_scope(args.name): model = Seq2SeqModel(data, args, embed) model.print_parameters() latest_dir = '%s/checkpoint_latest' % args.model_dir best_dir = '%s/checkpoint_best' % args.model_dir if tf.train.get_checkpoint_state( latest_dir) and args.restore == "last": print("Reading model parameters from %s" % latest_dir) model.latest_saver.restore(sess, tf.train.latest_checkpoint(latest_dir)) else: if tf.train.get_checkpoint_state( best_dir) and args.restore == "best": print('Reading model parameters from %s' % best_dir) model.best_saver.restore(sess, tf.train.latest_checkpoint(best_dir)) else: print("Created model with fresh parameters.") global_variable = [ gv for gv in tf.global_variables() if args.name in gv.name ] sess.run(tf.variables_initializer(global_variable)) return model
def run(): """ running :return: """ idx_q, idx_a = data_process.load_qa_data() metadata = data_process.load_metadata() (trainX, trainY), (testX, testY) = data_process.split_dataset(idx_q, idx_a) xseq_len = trainX.shape[-1] yseq_len = trainY.shape[-1] batch_size = 32 xvocab_size = len(metadata['idx2w']) yvocab_size = xvocab_size emb_dim = 512 model = Seq2SeqModel(xseq_len=xseq_len, yseq_len=yseq_len, x_vocab_size=xvocab_size, y_vocab_size=yvocab_size, emb_dim=emb_dim, num_layers=2, ckpt_path='./model_ckp', metadata=metadata, batch_size=batch_size, hook=data_process.upload_to_google_drive) model.train((trainX, trainY), (testX, testY))
def self_test(): """Test the translation model.""" with tf.Session() as sess: print("Self-test for neural translation model.") # Create model with vocabularies of 10, 2 small buckets, 2 layers of 32. model = Seq2SeqModel(10, 10, [(3, 3), (6, 6)], 32, 2, 5.0, 4, 0.3, 0.99, num_samples=8) sess.run(tf.global_variables_initializer()) # Fake data set for both the (3, 3) and (6, 6) bucket. data_set = ([([1, 1], [2, 2]), ([3, 3], [4]), ([5], [6])], [([1, 1, 1, 1, 1], [2, 2, 2, 2, 2]), ([3, 3, 3], [5, 6])]) for i in xrange(500): # Train the fake model for 5 steps. bucket_id = random.choice([0]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( data_set, bucket_id) _, loss, _, summary = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) if i % 100 == 0: print('step %d, loss: %.4f' % (i, loss)) print('bucket_id: %d' % bucket_id) print('encoder_inputs\n', encoder_inputs) print('decoder_inputs\n', decoder_inputs) print('target_weights\n', target_weights)
def load_model(session, config): model = Seq2SeqModel(config, 'decode') if tf.train.checkpoint_exists(FLAGS.model_path): print('Reloading model parameters..') model.restore(session, FLAGS.model_path) else: raise ValueError('No such file:[{}]'.format(FLAGS.model_path)) return model
def main(_): data_path = 'data/new-dataset-cornell-length10-filter1-vocabSize40000.pkl' word2id, id2word, trainingSamples = load_dataset(data_path) hparam = Config() with tf.Session() as sess: model = Seq2SeqModel(hparam, word2id) ckpt = tf.train.get_checkpoint_state(hparam.save_path) if FLAGS.resume and ckpt and tf.train.checkpoint_exists( ckpt.model_checkpoint_path): print("Restoring model parameters from %s." % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Creating model with fresh parameters.") sess.run(model.init) train_writer = tf.summary.FileWriter(hparam.save_path, graph=sess.graph) for epoch in range(hparam.num_epoch): print("Starting Epoch {}/{}:".format(epoch, hparam.num_epoch)) batches = get_batches(trainingSamples, hparam.batch_size) total_loss = 0.0 total_count = 0 for nextBatch in tqdm(batches, desc="training"): outputs = model.train_session(sess, nextBatch) loss = outputs["loss"] summary = outputs["summary"] step = outputs["step"] train_writer.add_summary(summary, step) total_loss += loss total_count += 1 if step % hparam.display_per_step == 0: perplexity = math.exp( float(total_loss / total_count) ) if total_loss / total_count < 300 else float('inf') tqdm.write( " Step %d | Per-word Loss %.4f | Perplexity %.4f" % (step, total_loss / total_count, perplexity)) checkpoint_path = os.path.join(hparam.save_path, hparam.model_name) model.saver.save(sess, checkpoint_path) tqdm.write("\n") tqdm.write(" Epoch %d | Per-word Loss %.4f | Perplexity %.4f" % (epoch, total_loss / total_count, perplexity)) tqdm.write("\n")
def main(): args = setup_args() outfile = args.out_dir + args.comment f_out = open(outfile, 'w') #Write meta-info about the particular run into the master file before each run timestr = time.strftime("%Y%m%d-%H%M%S") f = open(master_meta_info_file, 'a+') f.write(timestr + " #### " + args.comment + " ##### " + str(args) + "\n") f.close() hole_feature_filename = args.out_dir + "hole_features_" + args.comment dataset = getData(args.hole_window_size, args.num_files * args.num_of_holes_per_file, args.dataset_type, args.sup_window_size, args.num_sup_tokens, args.num_of_holes_per_file, args.sup_def, args.method) #Get the size of the vocabulary vocab_size, encoder = get_vocab_size() model = Seq2SeqModel(vocab_size, bias_init=None) if args.load_model: y = tf.reshape(tf.Variable(1, dtype=tf.int32), (1, 1)) model(y, y, False) model.load_weights( args.model_load_dir).expect_partial() #to supress warnings print("Loaded Weights from: ", args.model_load_dir) size = args.num_files * args.num_of_holes_per_file bar = tqdm(total=size) print("Evaluating " + args.dataset_type + " Data.......") subword_loss, token_loss, error, hole_features = evaluate( model, dataset, args.method, bar, args.inner_learning_rate, args.sup_batch_size, args.num_of_updates) bar.close() print(args.dataset_type + " Statistics..........") f_out.write(args.dataset_type + " Statistics..........") print("Token Cross-Entropy = {:.4f} ".format(token_loss)) print("{:.4f} confidence error over mean cross-entropy = {:.4f}".format( CONFIDENCE_INTERVAL, error)) f_out.write("Token Cross-Entropy = {:.4f} ".format(token_loss)) f_out.write( "{:.4f} confidence error over mean cross-entropy = {:.4f}".format( CONFIDENCE_INTERVAL, error)) f_out.flush() with open(hole_feature_filename, 'wb') as f: pickle.dump(hole_features, f)
def main(_): vocab_path = FLAGS.vocab_path model_dir = FLAGS.model_dir hidden_size = FLAGS.hidden_size attention_model = FLAGS.attention_model extra_decode_length = FLAGS.extra_decode_length beam_width = FLAGS.beam_width alpha = FLAGS.alpha decode_batch_size = FLAGS.decode_batch_size src_max_length = FLAGS.src_max_length source_text_filename = FLAGS.source_text_filename target_text_filename = FLAGS.target_text_filename translation_output_filename = FLAGS.translation_output_filename # seq2seq model subtokenizer = tokenization.restore_subtokenizer_from_vocab_files( vocab_path) vocab_size = subtokenizer.vocab_size model = Seq2SeqModel(subtokenizer.vocab_size, hidden_size, attention_model=attention_model, dropout_rate=0.0, extra_decode_length=extra_decode_length, beam_width=beam_width, alpha=alpha) ckpt = tf.train.Checkpoint(model=model) latest_ckpt = tf.train.latest_checkpoint(model_dir) if latest_ckpt is None: raise ValueError('No checkpoint is found in %s' % model_dir) print('Loaded latest checkpoint ', latest_ckpt) ckpt.restore(latest_ckpt).expect_partial() # build evaluator evaluator = SequenceTransducerEvaluator(model, subtokenizer, decode_batch_size, src_max_length) # translates input sequences, and optionally evaluates BLEU score if # groundtruth target sequences are provided if target_text_filename is not None: case_insensitive_score, case_sensitive_score = evaluator.evaluate( source_text_filename, target_text_filename, translation_output_filename) print('BLEU(case insensitive): %f' % case_insensitive_score) print('BLEU(case sensitive): %f' % case_sensitive_score) else: evaluator.translate(source_text_filename, translation_output_filename) print( 'Inference mode: no groundtruth translations.\nTranslations written ' 'to file "%s"' % translation_output_filename)
def create_model(session, feed_previous): model = Seq2SeqModel(data_size=FLAGS.data_size, time_len=FLAGS.time_len, unit_size=FLAGS.unit_size, num_layers=FLAGS.num_layers, batch_size=FLAGS.batch_size, learning_rate=FLAGS.learning_rate, feed_previous=feed_previous) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): # restore the pre-trained model if there is print("Reading model parameters from {}".format(ckpt.model_checkpoint_path)) model.saver.restore(session, ckpt.model_checkpoint_path) else: print('Created model with initialization') session.run(tf.global_variables_initializer()) return model
def load_model(sess, forward_only): model = Seq2SeqModel(vocab_size, rnn_size, num_layers, buckets, batch_size, max_gradient_norm, num_samples, learning_rate, forward_only) checkpoint = tf.train.get_checkpoint_state(train_path) if checkpoint and tf.train.checkpoint_exists( checkpoint.model_checkpoint_path): model.saver.restore(sess, checkpoint.model_checkpoint_path) print("Load model parameters from %s." % train_path) else: sess.run(tf.global_variables_initializer()) print("Create a new model.") return model
def create_model(): print("creating seq2seq model..") # prepare dataset source_vocab_size = c.getint('MODEL', 'SOURCE_VOCAB_SIZE') target_vocab_size = c.getint('MODEL', 'TRAGET_VOCAB_SIZE') buckets = ast.literal_eval(c.get('MODEL', 'BUCKETS')) size = c.getint('MODEL', 'SIZE') num_layers = c.getint('MODEL', 'NUM_LAYERS') max_gradient_norm = c.getint('MODEL', 'MAX_GRADIENT_NORM') batch_size = c.getint('MODEL', 'BATCH_SIZE') learning_rate = c.getint('MODEL', 'LEARNING_RATE') learning_rate_decay_factor = c.getint('MODEL', 'LEARNING_RATE_DECAY_FACTOR') # build seq2seq model model = Seq2SeqModel(source_vocab_size, target_vocab_size, buckets, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor) return model
def main(): hidden_size = 256 embedding_dim = 300 pretrained_embeddings = None max_len = 20 min_count = 2 max_grad_norm = 5 val_len = 10000 weight_decay = 0.00001 model_filename_1 = '/home/mattd/pycharm/encoder/models3/Baseline' model_filename_2 = '/home/mattd/pycharm/encoder/models3/Attention' eng_fr_filename = '/home/okovaleva/projects/forced_apart/autoencoder/data' \ '/train_1M.txt' dataset = SentenceDataset(eng_fr_filename, max_len, min_count) vocab_size = len(dataset.vocab) padding_idx = dataset.vocab[SentenceDataset.PAD_TOKEN] init_idx = dataset.vocab[SentenceDataset.INIT_TOKEN] model = Seq2SeqModel(pretrained_embeddings, hidden_size, padding_idx, init_idx, max_len, vocab_size, embedding_dim) parameters = list(model.parameters()) optimizer = torch.optim.Adam(parameters, amsgrad=True, weight_decay=weight_decay) model, optimizer, lowest_loss, description, last_epoch, \ train_loss_1, val_loss_1 = load_checkpoint(model_filename_1, model, optimizer) model = Seq2SeqModelAttention(pretrained_embeddings, hidden_size, padding_idx, init_idx, max_len, vocab_size, embedding_dim) parameters = list(model.parameters()) optimizer = torch.optim.Adam(parameters, amsgrad=True, weight_decay=weight_decay) model, optimizer, lowest_loss, description, last_epoch, \ train_loss_2, val_loss_2 = load_checkpoint(model_filename_2, model, optimizer) plot_data(train_loss_1, val_loss_1) plot_data(train_loss_2, val_loss_2)
def create_model(session, FLAGS): config = FLAGS.flag_values_dict() model = Seq2SeqModel(config, 'train') ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Reloading model parameters..') model.restore(session, ckpt.model_checkpoint_path) else: if not os.path.exists(FLAGS.model_dir): os.makedirs(FLAGS.model_dir) print('Created new model parameters..') session.run(tf.global_variables_initializer()) return model
def load_model(sess): model = Seq2SeqModel(vocab_size, rnn_size, encoder_layers, decoder_layers, attention_depth, max_gradient_norm, learning_rate) checkpoint = tf.train.get_checkpoint_state(train_path) if checkpoint and tf.train.checkpoint_exists(checkpoint.model_checkpoint_path): model.saver.restore(sess, checkpoint.model_checkpoint_path) print("Load model parameters from %s." % train_path) else: sess.run(tf.global_variables_initializer()) # ?? print("Create a new model.") return model
def generate(query): parser = argparse.ArgumentParser() parser.add_argument("--vocab_file", type=str, default="data/vocab.txt", help="Vocabulary dictionary") parser.add_argument("--word2vec_file", type=str, default=None, help="Chinese wiki word2vec") parser.add_argument("--vocab_size", type=int, default=50001, help="Vocabulary size") parser.add_argument("--embedding_dim", type=int, default=256, help="Dimensionality of the words embedding") parser.add_argument("--rnn_size", type=int, default=256, help="Hidden units of rnn layer ") parser.add_argument("--num_layers", type=int, default=4, help="Number of rnn layer") parser.add_argument("--batch_size", type=int, default=1, help="Minibatch size") args,_ = parser.parse_known_args() vocab_dict = utils.load_vocab(args.vocab_file) index2word = dict(zip(vocab_dict.values(), vocab_dict.keys())) query = list(jieba.cut(query)) query2id = np.array(utils.encoder_transform(query, vocab_dict)) query2id = query2id.reshape((1, len(query))) checkpoint_dir = os.path.abspath(os.path.join(os.path.curdir, "checkpoints")) checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) graph = tf.Graph() with graph.as_default(): sess = tf.Session() with sess.as_default(): model = Seq2SeqModel(vocab_size=args.vocab_size, embedding_dim=args.embedding_dim, rnn_size=args.rnn_size, num_layers=args.num_layers, batch_size=args.batch_size, word2vec_path=args.word2vec_file, vocab_path=args.vocab_file, training=False) saver = tf.train.Saver() saver.restore(sess, checkpoint_file) decoder_inputs = np.array([[vocab_dict[EOS]]], dtype=np.int32) predictions = [-1] while predictions[-1] != vocab_dict[EOS]: feed_dict={model.encoder_inputs: query2id, model.decoder_inputs: decoder_inputs} predictions = sess.run(model.prediction, feed_dict=feed_dict) next_word = np.array([[predictions[-1]]]) decoder_inputs = np.hstack((decoder_inputs, next_word)) if len(predictions) > 100: break answer = [index2word[index] for index in predictions][:-1] return "".join(answer)
def create_model(session, mode='train'): """Create translation model and initialize or load parameters in session.""" print('building model...') model = Seq2SeqModel(FLAGS.src_vocab_size, FLAGS.tgt_vocab_size, _buckets, FLAGS.rnn_size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, FLAGS.num_samples, mode) print('seq2seq model built') ckpt = tf.train.get_checkpoint_state(FLAGS.model_path) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.global_variables_initializer()) return model
def test_seq2seq(): num_layers = 13 enc_seq_length = num_layers * 2 dec_seq_length = num_layers * 3 batchsize = 3 enc_vocab_size = 6 dec_vocab_size = 3 enc_data = np.random.randint(0, enc_vocab_size, size=(batchsize, enc_seq_length), dtype=np.int32) dec_data = np.random.randint(0, dec_vocab_size, size=(batchsize, dec_seq_length), dtype=np.int32) skip_mask = np.ones_like(enc_data).astype(np.float32) skip_mask[:, :1] = 0 skip_mask[0, :2] = 0 skip_mask[1, :4] = 0 skip_mask[2, :7] = 0 model = Seq2SeqModel(enc_vocab_size, dec_vocab_size, ndim_embedding=30, num_layers=num_layers, ndim_h=3, pooling="fo", zoneout=False, wgain=1, densely_connected=True) ht = model.encode(enc_data, skip_mask) Y = model.decode(dec_data, ht) model.reset_decoder_state() for t in range(dec_seq_length): y = model.decode_one_step(dec_data[:, :t + 1], ht).data target = np.swapaxes( np.reshape(Y.data, (batchsize, -1, dec_vocab_size)), 1, 2) target = np.reshape(np.swapaxes(target[:, :, t, None], 1, 2), (batchsize, -1)) assert np.sum((y - target)**2) == 0 print("t = {} OK".format(t))
def tf_session(self): with tf.session() as session: model = Seq2SeqModel( encoder_cell=LSTMCell(input_size=self.hidden_units_encoder), decoder_cell=LSTMCell(input_size=self.hidden_units_encoder), vocab_size=self.vocab_size, attention=True, bidirectional=True, debug=False ) # Word Embedding Initialiser W = tf.Variable(tf.constant(0.0, shape=[self.vocab_size, self.embedding_dim]), trainable=False, name="W") embedding_placeholder = tf.placeholder(tf.float32, [self.vocab_size, self.embedding_dim]) embedding_init = W.assign(embedding_placeholder) req_embedded = tf.nn.embedding_lookup(W, self.X) session.run(tf.global_variables_initializer())
def predict(): with tf.Session() as sess: model = Seq2SeqModel(flags,mode='predict',beam_search=True) ckpt = tf.train.get_checkpoint_state(flags.model_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Reloading model parameters...') model.saver.restore(sess,ckpt.model_checkpoint_path) else: raise ValueError('No such file:[{}]'.format(flags.model_dir)) sys.stdout.write(">") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: batch = sentence2enco(sentence,model.word2id) predict_ids = model.infer(sess,batch) predict_ids_seq(predict_ids,model.id2word,model.beam_size) print(">") sys.stdout.flush() sentence = sys.stdin.readline()
def __init__(self): # Load model config config = load_config(FLAGS) config_proto = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=tf.GPUOptions(allow_growth=True)) print("config_proto: {}".format(config_proto)) self.sess = tf.Session(config=config_proto) # Build the model self.model = Seq2SeqModel(config, 'predict') # Create saver # Using var_list = None returns the list of all saveable variables saver = tf.train.Saver(var_list=None) # Reload existing checkpoint load_model(self.sess, self.model, saver)
def prepare_to_chat(): global enc_vocab, inv_dec_vocab, model, saver, sess, output_file #Your code _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = Seq2SeqModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() print("Setting up sess ...") sess = tf.Session() _check_restore_parameters(sess, saver) print("Opening output file ...") output_file = open(os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+')
def run(): """ running :return: """ idx_q, idx_a = data_process.load_qa_data() metadata = data_process.load_metadata() (trainX, trainY), (testX, testY) = data_process.split_dataset(idx_q, idx_a) xseq_len = trainX.shape[-1] yseq_len = trainY.shape[-1] batch_size = 16 xvocab_size = len(metadata['idx2w']) yvocab_size = xvocab_size emb_dim = 128 print('xseq_len : {}'.format(xseq_len)) print('yseq_len : {}'.format(yseq_len)) print('batch_size : {}'.format(batch_size)) print('xvocab_size : {}'.format(xvocab_size)) print('yvocab_size : {}'.format(yvocab_size)) print('emb_dim : {}'.format(emb_dim)) print('train : {}'.format(len(trainX))) print('test : {}'.format(len(testX))) model_seq = Seq2SeqModel( xseq_len=xseq_len, yseq_len=yseq_len, x_vocab_size=xvocab_size, y_vocab_size=yvocab_size, emb_dim=emb_dim, num_layers=2, ckpt_path='./model_ckp', metadata=metadata, batch_size=batch_size, mtype='attention', # hook=data_process.upload_to_google_drive ) model_seq.train((idx_q, idx_a), (testX, testY))
def __init__(self): self.hp = HyperParameter() sources = load_and_cut_data(self.hp.sources_txt) targets = load_and_cut_data(self.hp.targets_txt) self.sources_data, self.targets_data, self.word_to_id, self.id_to_word = create_dic_and_map( sources, targets) self.model = Seq2SeqModel( rnn_size=self.hp.rnn_size, num_layers=self.hp.num_layers, embedding_size=self.hp.embedding_size, word_to_id=self.word_to_id, mode='predict', learning_rate=self.hp.learning_rate, use_attention=True, beam_search=True, beam_size=self.hp.beam_size, encoder_state_merge_method=self.hp.encoder_state_merge_method, max_gradient_norm=5.0)
def __init__(self): # Load model config config = dict() # config = {'cangtou_data': False, 'rev_data': True, 'align_data': True, 'prev_data': True, 'align_word2vec': True, 'cell_type': 'lstm', 'attention_type': 'bahdanau', 'hidden_units': 128, 'depth': 4, 'embedding_size': 128, 'num_encoder_symbols': 30000, 'num_decoder_symbols': 30000, 'vocab_size': 6000, 'use_residual': True, 'attn_input_feeding': False, 'use_dropout': True, 'dropout_rate': 0.3, 'learning_rate': 0.0002, 'max_gradient_norm': 1.0, 'batch_size': 64, 'max_epochs': 10000, 'max_load_batches': 20, 'max_seq_length': 50, 'display_freq': 100, 'save_freq': 100, 'valid_freq': 1150000, 'optimizer': 'adam', 'model_dir': 'model', 'summary_dir': 'model/summary', 'model_name': 'translate.ckpt', 'shuffle_each_epoch': True, 'sort_by_length': True, 'use_fp16': False, 'bidirectional': True, 'train_mode': 'ground_truth', 'sampling_probability': 0.1, 'start_token': 0, 'end_token': 5999, 'allow_soft_placement': True, 'log_device_placement': False, 'rev_data': True, 'align_data': True, 'prev_data': True, 'align_word2vec': True, 'beam_width': 1, 'decode_batch_size': 80, 'write_n_best': False, 'max_decode_step': 500, 'model_path': None, 'model_dir': None, 'predict_mode': 'greedy', 'decode_input': 'data/newstest2012.bpe.de', 'decode_output': 'data/newstest2012.bpe.de.trans'} config = load_config(FLAGS) #print("config",config) config_proto = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=tf.GPUOptions(allow_growth=True)) self.sess = tf.Session(config=config_proto) # Build the model self.model = Seq2SeqModel(config, 'predict') # Create saver # Using var_list = None returns the list of all saveable variables saver = tf.train.Saver(var_list=None) # Reload existing checkpoint load_model(self.sess, self.model, saver)
def train(train_path='', model_dir='', save_src_vocab_path='', save_trg_vocab_path='', embedding_dim=256, batch_size=64, epochs=4, maxlen=400, hidden_dim=1024, gpu_id=0): # source_texts, target_texts = create_dataset(train_path, None) source_texts, target_texts = create_dataset(train_path, 10)#create_dataset_singleLine(train_path, None) print(source_texts[-1]) print(target_texts[-1]) source_seq, source_word2id = tokenize(source_texts, maxlen) target_seq, target_word2id = tokenize(target_texts, maxlen) save_word_dict(source_word2id, save_src_vocab_path) save_word_dict(target_word2id, save_trg_vocab_path) # Show length # print(source_seq[-1]) # print(target_seq[-1]) # Calculate max_length of the target tensors max_length_target, max_length_source = max_length(target_seq), max_length(source_seq) print(max_length_source, max_length_target) print(len(source_seq), len(target_seq)) steps_per_epoch = len(source_seq) // batch_size print(steps_per_epoch) dataset = tf.data.Dataset.from_tensor_slices((source_seq, target_seq)).shuffle(len(source_seq)) dataset = dataset.batch(batch_size, drop_remainder=True) example_source_batch, example_target_batch = next(iter(dataset)) # Build model model = Seq2SeqModel(source_word2id, target_word2id, embedding_dim=embedding_dim, hidden_dim=hidden_dim, batch_size=batch_size, maxlen=maxlen, checkpoint_path=model_dir, gpu_id=gpu_id) # Train model.train(example_source_batch, dataset, steps_per_epoch, epochs=epochs) # Evaluate one sentence sentence = "例 如 病 人 必 须 在 思 想 清 醒 时 。" result, sentence, attention_plot = model.evaluate(sentence) print('Input: %s' % sentence) print('Predicted translation: {}'.format(result))
func_file.close() print 'func_list=', len(func_list) # build vocabularies vocab, embed, vocab_topic, topic_pos, func_pos = build_vocab( FLAGS.data_dir, data_train, stop_list, func_list) print 'num_topic_vocab=', len(vocab_topic) print 'num_func_vocab=', len(func_pos) # Training mode if FLAGS.is_train: model = Seq2SeqModel(FLAGS.symbols, FLAGS.embed_units, FLAGS.units, is_train=True, vocab=vocab, topic_pos=topic_pos, func_pos=func_pos, embed=embed, full_kl_step=FLAGS.full_kl_step) if FLAGS.log_parameters: model.print_parameters() if tf.train.get_checkpoint_state(FLAGS.train_dir): print("Reading model parameters from %s" % FLAGS.train_dir) model.saver.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir)) model.symbol2index.init.run() else: print("Created model with fresh parameters.")
'Save model checkpoint every this iteration') tf.app.flags.DEFINE_string('model_dir', 'model/', 'Path to save model checkpoints') tf.app.flags.DEFINE_string('model_name', 'chatbot.ckpt', 'File name used for model checkpoints') FLAGS = tf.app.flags.FLAGS data_path = 'data/dataset-cornell-length10-filter1-vocabSize40000.pkl' word2id, id2word, trainingSamples = loadDataset(data_path) with tf.Session() as sess: model = Seq2SeqModel(FLAGS.rnn_size, FLAGS.num_layers, FLAGS.embedding_size, FLAGS.learning_rate, word2id, mode='train', use_attention=True, use_beam_search=True, beam_size=5, max_gradient_norm=5.0) # 如果存在已经保存的模型的话,就继续训练,否则,就重新开始 ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): #或者latest_model = tf.train.latest_checkpoint(FLAGS.model_dir) latest_model = ckpt.model_checkpoint_path print('Reloading model parameters, latest_model:' + latest_model) model.saver.restore(sess, latest_model) else: print('Created new model parameters..') sess.run(tf.global_variables_initializer())