def predict(sentences): # Load graph g = TransformerDecoder(is_training=False) print("Graph loaded") # Load data X, sources, actual_lengths = load_test_data2(sentences) sorted_lengths = np.argsort(actual_lengths) X = X[sorted_lengths] print(X.shape) src2idx, idx2src = load_source_vocab() tgt2idx, idx2tgt = load_target_vocab() # Start session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.4 with tf.Session(graph=g.graph, config=config) as sess: saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored!") batch_size = hp.batch_size num_batches = math.ceil(len(X) / batch_size) print("Num batch size", num_batches) Y_preds = np.zeros_like(X) + 2 for i in tqdm(range(num_batches), desc="Inference: "): indices = np.arange(i * batch_size, min((i + 1) * batch_size, len(X))) step = 0 max_steps = math.ceil( (np.max(actual_lengths[indices]) - hp.offset) / (hp.maxlen - hp.offset)) max_steps = max(1, max_steps) for step in range(max_steps): end = min(step * (hp.maxlen - hp.offset) + hp.maxlen, max(X.shape[1], hp.maxlen)) start = end - hp.maxlen x = X[indices, start:end] _preds = sess.run(g.preds, {g.x: x, g.dropout: False}) if step > 0: Y_preds[indices, start + hp.offset // 2:end] = _preds[:, hp.offset // 2:] else: Y_preds[indices, start:end] = _preds Y_preds = Y_preds[np.argsort(sorted_lengths)] result = "" for source, preds, actual_length in zip(sources, Y_preds, actual_lengths): formatted_pred = [ idx2tgt[idx] if src2idx.get(source[id], 1) > 8 else source[id] for id, idx in enumerate(preds[:actual_length]) ] sentence = " ".join(formatted_pred) result = result + " " + sentence return result
def accent_comment(comment): # Let's allow the user to pass the filename as an argument frozen_model_filename = curdir + "/vnaccent/infer/infer.pb" batch_size = 1 maxlen = 35 # We use our "load_graph" function graph = load_graph(frozen_model_filename) src2idx, idx2src = load_source_vocab() tgt2idx, idx2tgt = load_target_vocab() # for op in graph.get_operations(): # print(op.name) preds = graph.get_tensor_by_name('prefix/ToInt32:0') x = graph.get_tensor_by_name('prefix/Placeholder:0') y = graph.get_tensor_by_name('prefix/Placeholder_1:0') with tf.Session(graph=graph) as sess: result = np.zeros((batch_size, maxlen), np.int32) input_sent = (comment + " . </s>").split() feed_x = [src2idx.get(word.lower(), 1) for word in input_sent] feed_x = np.expand_dims(np.lib.pad(feed_x, [0, maxlen - len(feed_x)], 'constant'), 0) for j in range(maxlen): _preds = sess.run(preds, {x: feed_x, y: result}) result[:, j] = _preds[:, j] result = result[0] # print('Input : ', comment) raw_output = [idx2tgt[idx] for idx in result[result != 3]] # Unknown token aligning for idx, token in enumerate(feed_x[0]): if token == 3: break if token == 1: raw_output[idx] = input_sent[idx] if input_sent[idx].istitle(): raw_output[idx] = raw_output[idx].title() return ' '.join(raw_output[:raw_output.index(".")])
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) self.dropout = tf.placeholder(tf.bool, shape=()) if is_training: self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) # Load vocabulary src2idx, idx2src = load_source_vocab() tgt2idx, idx2tgt = load_target_vocab() # Decoder with tf.variable_scope("decoder"): ## Embedding self.dec, self.lookup_table = embedding( self.x, vocab_size=len(src2idx), num_units=hp.hidden_units, zero_pad=False, pretrained=False, of="src", scope="src_embeddings") self.dec += positional_encoding(self.dec, hp.maxlen) ## Dropout self.dec = tf.layers.dropout(self.dec, rate=hp.dropout_rate, training=self.dropout) ## Blocks for i in range(hp.num_blocks): with tf.variable_scope("num_blocks_{}".format(i)): self.dec = multihead_attention( queries=self.dec, keys=self.dec, values=self.dec, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=self.dropout, causality=False, scope="vanilla_attention") self.dec = multihead_attention( queries=self.dec, keys=self.dec, values=self.dec, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=self.dropout, causality=False, scope="vanilla_attention_2") ## Feed Forward self.dec = feedforward( self.dec, num_units=[4 * hp.hidden_units, hp.hidden_units]) # Final linear projection self.logits = tf.layers.dense( tf.reshape(self.dec, [-1, hp.hidden_units]), len(tgt2idx)) self.logits = tf.reshape( self.logits, [-1, hp.maxlen, len(tgt2idx)]) self.preds = tf.to_int32(tf.argmax(self.logits, axis=-1)) if is_training: self.istarget = tf.to_float(tf.greater(self.y, src2idx[":"])) self.acc = tf.reduce_sum( tf.to_float( tf.equal(tf.reshape(self.preds, (-1, self.y.shape[1])), self.y)) * self.istarget) / (tf.reduce_sum(self.istarget)) ce = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels=self.y) nonpadding = tf.to_float(tf.not_equal( self.y, tgt2idx["<pad>"])) # 0: <pad> self.loss = tf.reduce_sum( tf.reshape(ce, (-1, self.y.shape[1])) * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7) self.mean_loss = self.loss self.global_step = tf.train.get_or_create_global_step() self.lr = noam_scheme(hp.lr, self.global_step, hp.warmup_steps) self.optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = self.optimizer.minimize( self.loss, global_step=self.global_step) self.train_op_noembeddings = self.optimizer.minimize \ (self.loss, global_step=self.global_step, var_list=[var for var in tf.trainable_variables() if ("src_embeddings" not in var.name and "tgt_embeddings" not in var.name)]) tf.summary.scalar('lr', self.lr) tf.summary.scalar("loss", self.loss) tf.summary.scalar("global_step", self.global_step) self.summaries = tf.summary.merge_all()
#!/home/gdmlab/anaconda3/bin/python #coding: utf-8 import tensorflow as tf from data_load import load_source_vocab, load_target_vocab from hyperparams import Hyperparams as hp source_char2idx, source_idx2char = load_source_vocab() target_char2idx, target_idx2char = load_target_vocab() word = 'commonfdasyuiwenkda' word_int = [source_char2idx.get(char, 0) for char in word] # 0 means <UNK> graph = tf.Graph() with tf.Session(graph=graph) as sess: loader = tf.train.import_meta_graph(hp.checkpoint + '.meta') loader.restore(sess, hp.checkpoint) inputs = graph.get_tensor_by_name('inputs:0') logits = graph.get_tensor_by_name('predictions:0') source_sequence_length = graph.get_tensor_by_name( 'source_sequence_length:0') target_sequence_length = graph.get_tensor_by_name( 'target_sequence_length:0') answer_logits = sess.run( logits, { inputs: [word_int] * hp.batch_size, source_sequence_length: [len(word_int)] * hp.batch_size, target_sequence_length: [len(word_int)] * hp.batch_size
def eval(): # Load graph g = TransformerDecoder(is_training=False) print("Graph loaded") # Load data X, sources, actual_lengths = load_test_data() sorted_lengths = np.argsort(actual_lengths) X = X[sorted_lengths] print(X.shape) src2idx, idx2src = load_source_vocab() tgt2idx, idx2tgt = load_target_vocab() # Start session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.4 with tf.Session(graph=g.graph, config=config) as sess: saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored!") ## Inference if not os.path.exists('results'): os.mkdir('results') with codecs.open("results/{}.txt".format(hp.logdir), "w", "utf-8") as fout: batch_size = hp.batch_size num_batches = math.ceil(len(X) / batch_size) Y_preds = np.zeros_like(X) + 2 for i in tqdm(range(num_batches), desc="Inference: "): indices = np.arange(i * batch_size, min((i + 1) * batch_size, len(X))) step = 0 max_steps = math.ceil( (np.max(actual_lengths[indices]) - hp.offset) / (hp.maxlen - hp.offset)) max_steps = max(1, max_steps) for step in range(max_steps): end = min(step * (hp.maxlen - hp.offset) + hp.maxlen, X.shape[1]) start = end - hp.maxlen x = X[indices, start:end] _preds = sess.run(g.preds, {g.x: x, g.dropout: False}) if step > 0: Y_preds[indices, start + hp.offset // 2:end] = _preds[:, hp.offset // 2:] else: Y_preds[indices, start:end] = _preds Y_preds = Y_preds[np.argsort(sorted_lengths)] for source, preds, actual_length in zip(sources, Y_preds, actual_lengths): formatted_pred = [ idx2tgt[idx] if src2idx.get(source[id], 1) > 8 else source[id] for id, idx in enumerate(preds[:actual_length]) ] fout.write(" ".join(formatted_pred) + "\n") model_vars = tf.trainable_variables() slim.model_analyzer.analyze_vars(model_vars, print_info=True) slim.model_analyzer.analyze_ops(g.graph, print_info=True)
# Let's allow the user to pass the filename as an argument parser = argparse.ArgumentParser() parser.add_argument("--frozen_model_filename", default="./model/frozen_graph.pb", type=str, help="Frozen model file to import") parser.add_argument("--batch_size", default=1, type=int) parser.add_argument("--input", default='xin chao', type=str) parser.add_argument("--maxlen", default=35, type=int) args = parser.parse_args() # We use our "load_graph" function graph = load_graph(args.frozen_model_filename) src2idx, idx2src = load_source_vocab() tgt2idx, idx2tgt = load_target_vocab() # for op in graph.get_operations(): # print(op.name) preds = graph.get_tensor_by_name('prefix/ToInt32:0') x = graph.get_tensor_by_name('prefix/Placeholder:0') y = graph.get_tensor_by_name('prefix/Placeholder_1:0') with tf.Session(graph=graph) as sess: result = np.zeros((args.batch_size, args.maxlen), np.int32) input_sent = (args.input + " . </s>").split() feed_x = [src2idx.get(word.lower(), 1) for word in input_sent] feed_x = np.expand_dims( np.lib.pad(feed_x, [0, args.maxlen - len(feed_x)], 'constant'), 0) for j in range(args.maxlen): _preds = sess.run(preds, {x: feed_x, y: result}) result[:, j] = _preds[:, j]