def __init__(self, returns_dict=False): embeddings = [ ("", [0, 0, 0, 0]), # OOV items are mapped to this embedding. ("hello world", [1, 2, 3, 4]), ("pair-programming", [5, 5, 5, 5]), ] keys = tf.constant([item[0] for item in embeddings], dtype=tf.string) indices = tf.constant(list(range(len(embeddings))), dtype=tf.int64) tbl_init = KeyValueTensorInitializer(keys, indices) self.table = HashTable(tbl_init, 0) self.weights = tf.Variable( list([item[1] for item in embeddings]), dtype=tf.float32) self.variables = [self.weights] self.trainable_variables = self.variables self._returns_dict = returns_dict
class TextEmbedding(tf.train.Checkpoint): def __init__(self, returns_dict=False): embeddings = [ ("", [0, 0, 0, 0]), # OOV items are mapped to this embedding. ("hello world", [1, 2, 3, 4]), ("pair-programming", [5, 5, 5, 5]), ] keys = tf.constant([item[0] for item in embeddings], dtype=tf.string) indices = tf.constant(list(range(len(embeddings))), dtype=tf.int64) tbl_init = KeyValueTensorInitializer(keys, indices) self.table = HashTable(tbl_init, 0) self.weights = tf.Variable( list([item[1] for item in embeddings]), dtype=tf.float32) self.variables = [self.weights] self.trainable_variables = self.variables self._returns_dict = returns_dict @tf.function(input_signature=[ tf.TensorSpec(dtype=tf.string, name="text", shape=[None]) ]) def __call__(self, text_tensor): indices_tensor = self.table.lookup(text_tensor) embedding_tensor = tf.gather(self.weights, indices_tensor) return dict( outputs=embedding_tensor) if self._returns_dict else embedding_tensor
def __init__(self, mode, config): self._mode = mode self.config = config if self._mode != Mode.Predict: self.input = tf.placeholder(name='inputs', dtype=tf.int32, shape=[None, None]) self.pos_input = tf.placeholder(name='pos_input', dtype=tf.int32, shape=[None]) self.morph_input = tf.placeholder(name='morph_input', dtype=tf.int32, shape=[None, config.max_morph_tags]) else: k, v = list(zip(*config.dec_char_numberer.value2num.items())) i = KeyValueTensorInitializer(v, k,key_dtype=tf.int64,value_dtype=tf.string) self.dec2char = HashTable(i, default_value="<UNK>") self.enc_chars = self.lookup(config.enc_char_numberer) self.morph_tags = self.lookup(config.morph_numberer) self.pos_tags = self.lookup(config.pos_numberer) self.input_plc =tf.placeholder(name='inputs', dtype=tf.string, shape=[None, None]) self.pos_plc = tf.placeholder(name='pos_input', dtype=tf.string, shape=[None]) self.morph_plc = tf.placeholder(name='morph_input', dtype=tf.string, shape=[None, config.max_morph_tags]) self.input = self.enc_chars.lookup(self.input_plc) self.pos_input = self.pos_tags.lookup(self.pos_plc) self.morph_input = self.morph_tags.lookup(self.morph_plc) self.input_lengths = tf.placeholder(name='input_lengths', dtype=tf.int32, shape=[None]) self.enc_character_embeddings = tf.get_variable('enc_character_embeddings', shape=[self.config.enc_vocab_size, self.config.char_embedding_size]) self.dec_character_embeddings = tf.get_variable('dec_character_embeddings', shape=[self.config.dec_vocab_size, self.config.char_embedding_size]) self.pos_embeddings = tf.get_variable('pos_embeddings', shape=[self.config.pos_vocab_size, self.config.pos_embedding_size]) self.morph_embeddings = tf.get_variable('morph_embeddings', shape=[self.config.morph_vocab_size, self.config.morph_embedding_size]) if self._mode != Mode.Train: self.start_tokens = tf.tile([config.start_idx], [tf.shape(self.input)[0]]) if self._mode != Mode.Predict: # length +2 since lengths are <bow> + word + <eow> self.dec_input = tf.placeholder(name='dec_in', dtype=tf.int32, shape=[None, None]) # exclude start token from targets for loss-computations since we feed the start token and don't want to # predict it self.decoder_targets = self.dec_input[:, 1:] self.dec_lengths = tf.placeholder(name='dec_lengths', dtype=tf.int32, shape=[None])
def text_module_fn(): embeddings = [ ("", [0, 0, 0, 0]), # OOV items are mapped to this embedding. ("hello world", [1, 2, 3, 4]), ("pair-programming", [5, 5, 5, 5]), ] keys = tf.constant([item[0] for item in embeddings], dtype=tf.string) indices = tf.constant(list(range(len(embeddings))), dtype=tf.int64) tbl_init = KeyValueTensorInitializer(keys, indices) table = HashTable(tbl_init, 0) weights_initializer = tf.cast( tf.constant(list([item[1] for item in embeddings])), tf.float32) weights = tf_v1.get_variable( "weights", dtype=tf.float32, initializer=weights_initializer) text_tensor = tf_v1.placeholder(dtype=tf.string, name="text", shape=[None]) indices_tensor = table.lookup(text_tensor) embedding_tensor = tf.gather(weights, indices_tensor) hub.add_signature(inputs=text_tensor, outputs=embedding_tensor)
def input_fn(): with tf.variable_scope("input_fn"), tf.device("/cpu:0"): caption_dataset = Dataset.from_tensor_slices(list(captions)) filename_dataset = Dataset.from_tensor_slices(list(filenames)) table_init = KeyValueTensorInitializer( list(self.word_to_idx.keys()), list(self.word_to_idx.values()), key_dtype=tf.string, value_dtype=tf.int32) table = HashTable(table_init, default_value=0) def split_sentence(sentence): words = tf.string_split(tf.reshape(sentence, (1, ))).values words = tf.concat([ tf.constant(["<START>"]), words, tf.constant(["<END>"]) ], axis=0) return table.lookup(words) index_dataset = caption_dataset.map(split_sentence, num_threads=8) def decode_image(filename): image = tf.image.decode_jpeg(tf.read_file(filename), channels=3) # image = tf.image.resize_images(image, [224, 224]) image = tf.to_float(image) return image image_dataset = filename_dataset.map(decode_image, num_threads=8) caption_structure = { "raw": caption_dataset, "index": index_dataset } return image_dataset, caption_structure
def __init__(self, args, session, updates=None): self.args = args self.sess = session # updates if not updates: updates = 0 self.updates = updates self.global_step = tf.get_variable( 'global_step', shape=(), dtype=tf.float32, initializer=tf.constant_initializer(updates), trainable=False) self.step = tf.assign_add(self.global_step, 1) # placeholders table = HashTable(TextFileIdTableInitializer( filename=os.path.join(args.output_dir, 'vocab.txt')), default_value=Vocab.unk()) self.q1_string = tf.placeholder(tf.string, [None, None], name='q1_str') self.q2_string = tf.placeholder(tf.string, [None, None], name='q2_str') self.q1 = tf.placeholder_with_default(table.lookup(self.q1_string), [None, None], name='q1') self.q2 = tf.placeholder_with_default(table.lookup(self.q2_string), [None, None], name='q2') self.q1_len = tf.placeholder(tf.int32, [None], name='q1_len') self.q2_len = tf.placeholder(tf.int32, [None], name='q2_len') self.y = tf.placeholder(tf.int32, [None], name='y') self.dropout_keep_prob = tf.placeholder(tf.float32, (), name='dropout_keep_prob') q1_mask = tf.expand_dims(tf.sequence_mask(self.q1_len, dtype=tf.float32), dim=-1) q2_mask = tf.expand_dims(tf.sequence_mask(self.q2_len, dtype=tf.float32), dim=-1) devices = self.get_available_gpus() or ['/device:CPU:0'] if not args.multi_gpu: devices = devices[:1] if len(devices) == 1: splits = 1 else: splits = [tf.shape(self.q1)[0] // len(devices) ] * (len(devices) - 1) + [-1] # handle uneven split q1 = tf.split(self.q1, splits) q2 = tf.split(self.q2, splits) q1_mask = tf.split(q1_mask, splits) q2_mask = tf.split(q2_mask, splits) y = tf.split(self.y, splits) # network self.network = Network(args) # optimizer lr = tf.get_variable('lr', shape=(), dtype=tf.float32, initializer=tf.constant_initializer(args.lr), trainable=False) lr_next = tf.cond(self.global_step < args.lr_warmup_steps, true_fn=lambda: args.min_lr + (args.lr - args.min_lr) / max( 1, args.lr_warmup_steps) * self.global_step, false_fn=lambda: tf.maximum( args.min_lr, args.lr * args.lr_decay_rate**tf.floor( (self.global_step - args.lr_warmup_steps ) / args.lr_decay_steps))) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, tf.assign(lr, lr_next, name='update_lr')) self.lr = lr self.opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=args.beta1, beta2=args.beta2) # training graph tower_names = ['tower-{}'.format(i) for i in range(len(devices)) ] if len(devices) > 1 else [''] tower_logits = [] tower_grads = [] summaries = [] loss = 0 with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): for i, device in enumerate(devices): with tf.device(device): with tf.name_scope(tower_names[i]) as scope: logits = self.network(q1[i], q2[i], q1_mask[i], q2_mask[i], self.dropout_keep_prob) tower_logits.append(logits) loss = self.get_loss(logits, y[i]) tf.get_variable_scope().reuse_variables() summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) grads = self.opt.compute_gradients(loss) tower_grads.append(grads) gradients = [] variables = [] for grad_and_vars in zip(*tower_grads): if grad_and_vars[0][0] is None: msg = 'WARNING: trainable variable {} receives no grad.\n'.format( grad_and_vars[0][1].op.name) sys.stderr.write(msg) continue grad = tf.stack([g for g, _ in grad_and_vars]) grad = tf.reduce_mean(grad, 0) v = grad_and_vars[0][ 1] # use the first tower's pointer to the (shared) variable gradients.append(grad) variables.append(v) gradients, self.gnorm = tf.clip_by_global_norm(gradients, self.args.grad_clipping) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_op = self.opt.apply_gradients(zip(gradients, variables)) logits = tf.concat(tower_logits, 0) self.prob = tf.nn.softmax(logits, dim=1, name='prob') self.pred = tf.argmax(input=logits, axis=1, name='pred') self.loss = tf.identity(loss, name='loss') summaries.append(tf.summary.scalar('training/lr', lr)) summaries.append(tf.summary.scalar('training/gnorm', self.gnorm)) summaries.append(tf.summary.scalar('training/loss', self.loss)) # add summary self.summary = tf.summary.merge(summaries) # saver self.saver = tf.train.Saver( [var for var in tf.global_variables() if 'Adam' not in var.name], max_to_keep=args.max_checkpoints)
def __init__(self, hparam, export=False, with_char=False): self.hparam = hparam self.Embedding = None self.embed_matrix = None self.init_embedding = None if export: table = HashTable(TextFileIdTableInitializer( filename='assets/vocab', key_column_index=0, value_column_index=1, vocab_size=None, delimiter='\t', name='table'), default_value=0) self.q1_string = tf.placeholder(tf.string, [None, self.hparam.seq_length], name='string_input1') self.q2_string = tf.placeholder(tf.string, [None, self.hparam.seq_length], name='string_input2') self.premise = table.lookup(self.q1_string) self.hypothesis = table.lookup(self.q2_string) if with_char: char_table = HashTable(TextFileIdTableInitializer( filename='assets/char_vocab', key_column_index=0, value_column_index=1, vocab_size=None, delimiter='\t', name='char_table'), default_value=0) self.ch1_string = tf.placeholder( tf.string, [None, self.hparam.char_seq_length], name='char_string_input1') self.ch2_string = tf.placeholder( tf.string, [None, self.hparam.char_seq_length], name='char_string_input1') self.ch1 = char_table.lookup(self.ch1_string) self.ch2 = char_table.lookup(self.ch2_string) else: self.ch1, self.ch2 = None, None else: self.premise = tf.placeholder(tf.int32, [None, self.hparam.seq_length], 'premise') self.hypothesis = tf.placeholder(tf.int32, [None, self.hparam.seq_length], 'hypothesis') self.ch1, self.ch2 = None, None self.premise_mask, self.hypothesis_mask = None, None self.y = None self.pred = None self.logits = None self.dropout_keep_prob = None self.loss = None self.train_op = None self.is_training = None self.output_w = None self.output_b = None self.lsf_q1, self.lsf_q2 = None, None self.random_size = None
def lookup(self, numberer): k, v = list(zip(*numberer.value2num.items())) i = KeyValueTensorInitializer(k, v) return HashTable(i, default_value=numberer.unknown_idx)
def __init__(self, args, session, updates=None): self.args = args self.sess = session # updates if not updates: updates = 0 # tf.get_variable(): Make the variable be shared. If tf.Variable() is used, the system treat the variables with the same name as # two vairables self.global_step = tf.get_variable( 'global_step', shape=(), dtype=tf.float32, initializer=tf.constant_initializer(updates), trainable=False) self.step = tf.assign_add(self.global_step, 1) # update self.global by add 1 to it # placeholders table = HashTable( TextFileIdTableInitializer( filename=os.path.join(args.output_dir, 'vocab.txt')), default_value=Vocab.unk( )) # string to id table, generates one key-value pair per line self.q1_string = tf.placeholder(tf.string, [None, None], name='q1_str') self.q2_string = tf.placeholder(tf.string, [None, None], name='q2_str') self.q1 = tf.placeholder_with_default(table.lookup(self.q1_string), [None, None], name='q1') self.q2 = tf.placeholder_with_default(table.lookup(self.q2_string), [None, None], name='q2') self.q1_len = tf.placeholder(tf.int32, [None], name='q1_len') self.q2_len = tf.placeholder(tf.int32, [None], name='q2_len') self.y = tf.placeholder(tf.int32, [None], name='y') self.dropout_keep_prob = tf.placeholder(tf.float32, (), name='dropout_keep_prob') q1_mask = tf.expand_dims(tf.sequence_mask(self.q1_len, dtype=tf.float32), dim=-1) # 返回一个表示每个单元的前N个位置的mask张量 q2_mask = tf.expand_dims(tf.sequence_mask(self.q2_len, dtype=tf.float32), dim=-1) devices = self.get_available_gpus() or ['/device:CPU:0'] if not args.multi_gpu: devices = devices[:1] if len(devices) == 1: splits = 1 else: splits = [tf.shape(self.q1)[0] // len(devices) ] * (len(devices) - 1) + [-1] # handle uneven split q1 = tf.split(self.q1, splits) q2 = tf.split(self.q2, splits) q1_mask = tf.split(q1_mask, splits) q2_mask = tf.split(q2_mask, splits) y = tf.split(self.y, splits) # network self.network = Network(args) # optimizer lr = tf.get_variable('lr', shape=(), dtype=tf.float32, initializer=tf.constant_initializer(args.lr), trainable=False) lr_next = tf.cond(self.global_step < args.lr_warmup_steps, true_fn=lambda: args.min_lr + (args.lr - args.min_lr) / max( 1, args.lr_warmup_steps) * self.global_step, false_fn=lambda: tf.maximum( args.min_lr, args.lr * args.lr_decay_rate**tf.floor( (self.global_step - args.lr_warmup_steps ) / args.lr_decay_steps))) # 在一个计算图中,可以通过集合(collection)来管理不同类别的资源 # tf.GraphKeys.UPDATE_OPS: ops的集合(图表运行时执行的操作,如乘法,ReLU等),而不是变量 tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, tf.assign(lr, lr_next, name='update_lr')) self.lr = lr self.opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=args.beta1, beta2=args.beta2) # training graph tower_names = ['tower-{}'.format(i) for i in range(len(devices)) ] if len(devices) > 1 else [''] tower_logits = [] tower_grads = [] summaries = [] loss = 0 # 当系统检测到我们用了一个之前已经定义的变量时,就开启共享,否则就重新创建变量 with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): for i, device in enumerate(devices): with tf.device(device): with tf.name_scope(tower_names[i]) as scope: logits = self.network(q1[i], q2[i], q1_mask[i], q2_mask[i], self.dropout_keep_prob) tower_logits.append(logits) loss = self.get_loss(logits, y[i]) tf.get_variable_scope().reuse_variables() summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) grads = self.opt.compute_gradients(loss) tower_grads.append(grads) gradients = [] variables = [] for grad_and_vars in zip(*tower_grads): # *list: 将一组行转换为一组列 if grad_and_vars[0][0] is None: msg = 'WARNING: trainable variable {} receives no grad.\n'.format( grad_and_vars[0][1].op.name) sys.stderr.write(msg) # output error msg to window continue grad = tf.stack([g for g, _ in grad_and_vars]) grad = tf.reduce_mean(grad, 0) v = grad_and_vars[0][ 1] # use the 1st tower's pointer to the (shared) variable gradients.append(grad) variables.append(v) gradients, self.gnorm = tf.clip_by_global_norm(gradients, self.args.grad_clipping) """ tf.clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None) t_list 是梯度张量, clip_norm 是截取的比率,和上面的 clip_gradient是同一个东西。 这个函数返回截取过的梯度张量和一个所有张量的全局范数。 这样就保证了在一次迭代更新中,所有权重的梯度的平方和在一个设定范围以内 """ update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) """ 该函数接受的参数control_inputs,是Operation或者Tensor构成的list。返回的是一个上下文管理器,该上下文管理器用来控制在该上下文中的操作的依赖。 也就是说,上下文管理器下定义的操作是依赖control_inputs中的操作的,control_dependencies用来控制control_inputs中操作执行后, 才执行上下文管理器中定义的操作 """ with tf.control_dependencies(update_ops): # 先执行update_ops self.train_op = self.opt.apply_gradients(zip(gradients, variables)) logits = tf.concat(tower_logits, 0) self.prob = tf.nn.softmax(logits, dim=1, name='prob') self.pred = tf.argmax(input=logits, axis=1, name='pred') self.loss = tf.identity(loss, name='loss') summaries.append(tf.summary.scalar('training/lr', lr)) summaries.append(tf.summary.scalar('training/gnomr', self.gnorm)) summaries.append(tf.summary.scalar('training/loss', self.loss)) # add summary self.summary = tf.summary.merge(summaries) # saver self.saver = tf.train.Saver( [var for var in tf.global_variables() if 'Adam' not in var.name], max_to_keep=args.max_checkpoints)