forked from efikarra/rnn-crf-tf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_helper.py
181 lines (147 loc) · 8.33 KB
/
model_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import collections
import tensorflow as tf
import time
import iterator_utils
import vocab_utils
class TrainModel(collections.namedtuple("TrainModel",("graph", "model", "iterator"))):
pass
class EvalModel(collections.namedtuple("EvalModel",("graph", "model", "input_file_placeholder", "output_file_placeholder", "iterator"))):
pass
class InferModel(collections.namedtuple("EvalModel",("graph", "model", "input_file_placeholder", "iterator"))):
pass
def create_train_model(model_creator, hparams, input_path, target_path, mode):
graph = tf.Graph()
with graph.as_default() , tf.container("train"):
input_vocab_table = vocab_utils.create_vocab_table(hparams.vocab_path)
input_dataset = tf.contrib.data.TextLineDataset(input_path)
output_dataset = tf.contrib.data.TextLineDataset(target_path)
iterator = iterator_utils.get_iterator(input_dataset, output_dataset, input_vocab_table,
batch_size=hparams.batch_size, random_seed=hparams.random_seed,
pad=hparams.pad, input_max_len=hparams.input_max_len)
model = model_creator(hparams, mode, iterator, input_vocab_table=input_vocab_table, reverse_input_vocab_table=None)
return TrainModel(graph, model, iterator)
def create_eval_model(model_creator, hparams, mode):
graph = tf.Graph()
with graph.as_default(), tf.container("eval"):
# create a table to map words to vocab ids.
input_vocab_table = vocab_utils.create_vocab_table(hparams.vocab_path)
# define a placeholder for the input dataset.
# we will dynamically initialize this placeholder with a file name during validation.
# The reason for this is that during validation, we may want to evaluate our trained model on different datasets.
input_file_placeholder= tf.placeholder(shape=(),dtype=tf.string)
input_dataset = tf.contrib.data.TextLineDataset(input_file_placeholder)
output_file_placeholder= tf.placeholder(shape=(), dtype=tf.string)
output_dataset = tf.contrib.data.TextLineDataset(output_file_placeholder)
iterator = iterator_utils.get_iterator(input_dataset, output_dataset, input_vocab_table,
batch_size=hparams.eval_batch_size, random_seed=hparams.random_seed,
pad=hparams.pad, input_max_len=hparams.input_max_len)
model = model_creator(hparams, mode, iterator, input_vocab_table=input_vocab_table, reverse_input_vocab_table=None)
return EvalModel(graph, model, input_file_placeholder, output_file_placeholder, iterator)
def create_infer_model(model_creator, hparams, mode):
graph = tf.Graph()
with graph.as_default(), tf.container("predict"):
input_vocab_table = vocab_utils.create_vocab_table(hparams.vocab_path)
input_file_placeholder= tf.placeholder(shape=(),dtype=tf.string)
input_dataset = tf.contrib.data.TextLineDataset(input_file_placeholder)
iterator = iterator_utils.get_iterator_infer(input_dataset, input_vocab_table,
batch_size=hparams.predict_batch_size, random_seed=hparams.random_seed,
pad=hparams.pad, input_max_len=hparams.input_max_len)
model = model_creator(hparams, mode, iterator, input_vocab_table=input_vocab_table, reverse_input_vocab_table=None)
return InferModel(graph, model, input_file_placeholder, iterator)
def create_embeddings(vocab_size,emb_size,emb_trainable,emb_pretrain,dtype=tf.float32):
with tf.variable_scope("embeddings", dtype=dtype) as scope:
emb_placeholder=None
emb_init=None
embedding = tf.get_variable("embedding", [vocab_size, emb_size], dtype, trainable=emb_trainable)
if emb_pretrain:
emb_placeholder = tf.placeholder(tf.float32, [vocab_size, emb_size])
emb_init = embedding.assign(emb_placeholder)
return embedding, emb_init, emb_placeholder
def _single_cell(unit_type, num_units, forget_bias, in_to_hidden_dropout):
# Cell Type
if unit_type == "lstm":
single_cell = tf.contrib.rnn.BasicLSTMCell(num_units,forget_bias=forget_bias)
elif unit_type == "gru":
single_cell = tf.contrib.rnn.GRUCell(num_units)
elif unit_type == "layer_norm_lstm":
single_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(num_units,forget_bias=forget_bias,layer_norm=True)
elif unit_type == "rnn":
single_cell = tf.contrib.rnn.BasicRNNCell(
num_units)
else:
raise ValueError("Unknown unit type %s!" % unit_type)
# Dropout (= 1 - keep_prob)
if in_to_hidden_dropout > 0.0:
single_cell = tf.contrib.rnn.DropoutWrapper(
cell=single_cell, input_keep_prob=(1.0 - in_to_hidden_dropout))
return single_cell
def _cell_list(unit_type, num_units, num_layers, forget_bias, in_to_hidden_dropout, mode):
"""Create a list of RNN cells."""
cell_list = []
for i in range(num_layers):
in_to_hidden_dropout = in_to_hidden_dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0
single_cell = _single_cell(
unit_type=unit_type,
num_units=num_units,
forget_bias=forget_bias,
in_to_hidden_dropout=in_to_hidden_dropout)
cell_list.append(single_cell)
return cell_list
def create_rnn_cell(unit_type, num_units, num_layers, forget_bias, in_to_hidden_dropout, mode):
cell_list = _cell_list(unit_type=unit_type,
num_units=num_units,
num_layers=num_layers,
forget_bias=forget_bias,
in_to_hidden_dropout=in_to_hidden_dropout,
mode=mode)
if len(cell_list) == 1: # Single layer.
return cell_list[0]
else: # Multi layers
return tf.contrib.rnn.MultiRNNCell(cell_list)
def gradient_clip(gradients, max_gradient_norm):
# if the global_norm, i.e., the sum of the norms of all gradients, exceeds max_gradient_norm
# then clip all gradients by the ratio of global_norm. Otherwise, all gradients remain as they are.
# gradient_norm is the global_norm=sqrt(sum([l2norm(t)**2 for t in gradients]))
clipped_gradients, gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)]
# compute and log the global_norm of the new clipped gradients
gradient_norm_summary.append(tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients)))
return clipped_gradients, gradient_norm_summary, gradient_norm
def run_batch_evaluation(model, session):
batch_count=0.0
loss=0.0
while True:
try:
batch_loss,batch_size=model.eval(session)
loss+=batch_loss
batch_count+=1
except tf.errors.OutOfRangeError:
break
loss /= batch_count
return loss
def load_model(model, session, name, ckpt):
start_time=time.time()
#initialize all read-only tables of the graph, e.g., vocabulary tables or embedding tables.
session.run(tf.tables_initializer())
model.saver.restore(session, ckpt)
print("loaded %s model parameters from %s, time %.2fs" % (name, ckpt, time.time()-start_time))
return model
def create_or_load_model(model, session, name, model_dir, input_emb_weights=None):
latest_ckpt = tf.train.latest_checkpoint(model_dir)
if latest_ckpt:
model = load_model(model, session, name, latest_ckpt)
else:
start_time = time.time()
#initialize all global variables in the graph, e.g., the model's weights.
session.run(tf.global_variables_initializer())
# initialize all read-only tables of the graph, e.g., vocabulary tables or embedding tables.
session.run(tf.tables_initializer())
if input_emb_weights is not None:
session.run(model.input_emb_init, feed_dict={model.input_emb_placeholder: input_emb_weights})
print ("created model %s with new parameters, time %.2fs" %(name,time.time()-start_time))
return model
def add_summary(summary_writer, tag, value):
"""Add a new summary to the current summary_writer."""
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
# global_step value to record with the summary (optional).
summary_writer.add_summary(summary, global_step=None)