def _get_weights(self, hidden_dim=None): """Create or get concatenated embedding or softmax variable. Args: hidden_dim: dim of the variable. Defaults to self._body_input_depth Returns: a list of self._num_shards Tensors. """ if hidden_dim is None: hidden_dim = self._body_input_depth num_shards = self._model_hparams.symbol_modality_num_shards shards = [] for i in xrange(num_shards): shard_size = (self._vocab_size // num_shards) + ( 1 if i < self._vocab_size % num_shards else 0) var_name = "weights_%d" % i shards.append( tf.get_variable( var_name, [shard_size, hidden_dim], initializer=tf.random_normal_initializer(0.0, hidden_dim**-0.5))) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) # Convert ret to tensor. if not context.in_eager_mode(): ret = eu.convert_gradient_to_tensor(ret) return ret
def _get_weights(self, src_mat_np, vocab_size): num_shards = self._model_hparams.symbol_modality_num_shards shards = [] pos = 0 for i in xrange(num_shards): shard_size = (vocab_size // num_shards) + ( 1 if i < vocab_size % num_shards else 0) var_name = "weights_%d" % i src_mat_np_shard = src_mat_np[pos:pos + shard_size] print(src_mat_np_shard.shape) raise Exception("gg") initializer = tf.constant(src_mat_np_shard, dtype=tf.float32) #initializer = lambda shape=[shard_size,src_mat_np.shape[1],300], dtype=tf.float32, partition_info=None: src_mat_np_shard pos += shard_size src_mat_shard = tf.get_variable(var_name, initializer=initializer, trainable=emb_trainable) #shape=[shard_size,src_mat_np.shape[1],300], #dtype=tf.float32, shards.append(src_mat_shard) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) ret = expert_utils.convert_gradient_to_tensor(ret) return ret
def _get_weights(self, hidden_dim=None): """Create or get concatenated embedding or softmax variable. Args: hidden_dim: dim of the variable. Defaults to self._body_input_depth Returns: a list of self._num_shards Tensors. """ if hidden_dim is None: hidden_dim = self._body_input_depth num_shards = self._model_hparams.symbol_modality_num_shards shards = [] for i in range(num_shards): shard_size = (self._vocab_size // num_shards) + ( 1 if i < self._vocab_size % num_shards else 0) var_name = "weights_%d" % i shards.append( tf.get_variable(var_name, [shard_size, hidden_dim], initializer=tf.random_normal_initializer( 0.0, hidden_dim**-0.5))) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) # Convert ret to tensor. if not tf.contrib.eager.in_eager_mode(): ret = eu.convert_gradient_to_tensor(ret) return ret
def _get_weights(self, use_pretrain=False, dim=None): """ create embedding or use pretrained embedding or create softmax variable Returns: a list of self._num_shards Tensors. """ if dim is None: dim = self._body_input_depth num_shards = self._model_hparams.symbol_modality_num_shards shards = [] for i in range(num_shards): shard_size = (self._vocab_size // num_shards) + ( 1 if i < self._vocab_size % num_shards else 0) var_name = "weights_%d" % i shards.append( tf.get_variable(var_name, [shard_size, dim], initializer=tf.random_normal_initializer( 0.0, dim**-0.5))) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) ret = eu.convert_gradient_to_tensor(ret) return ret
def _get_weights(self, dim=None): """ create embedding or use pretrained embedding or create softmax variable Returns: a list of self._num_shards Tensors. """ if dim is None: dim = self._body_input_depth var_name = "weights_" emb_file = open(pretrained_emb, 'rb') emb = pkl.load(emb_file, encoding='latin1') emb_file.close() emb = np.asarray(emb, dtype=np.float32) num_shards = self._model_hparams.symbol_modality_num_shards shards = [] start_ind = 0 for i in range(num_shards): shard_size = (self._vocab_size // num_shards) + ( 1 if i < self._vocab_size % num_shards else 0) var_name = "weights_%d" % i init = emb[start_ind:start_ind + shard_size, :] start_ind += shard_size shards.append(tf.get_variable(var_name, initializer=init)) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) ret = eu.convert_gradient_to_tensor(ret) return ret
def _embed_tgt(self, y, emb_mat, name, reuse=None, to_squeeze=True): with tf.variable_scope(name, reuse=reuse): if to_squeeze: y = tf.squeeze(y, axis=3) emb_mat = expert_utils.convert_gradient_to_tensor(emb_mat) ret = tf.gather(amb_mat, y) if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 if to_squeeze: ret *= tf.expand_dims(tf.to_float(tf.not_equal(y, 0)), -1) return ret
def _embed_src(self, x, emb_mat, name, reuse=None, to_squeeze=True): with tf.variable_scope(name, reuse=reuse): print(x.shape.as_list()) # ==> [None, None, 1, 1] if to_squeeze: x = tf.squeeze(x, axis=3) print(x.shape.as_list()) # ==> [None, None, 1] emb_mat = expert_utils.convert_gradient_to_tensor(emb_mat) ret = tf.gather(emb_mat, x) print(ret.shape.as_list()) # ==> [None, None, 1, 4, 300] if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 if to_squeeze: pads = tf.expand_dims(tf.to_float(tf.not_equal(x,0)), -1) pads = tf.expand_dims(pads, -1) print(pads.shape.as_list()) # ==> [None, None, 1, 1, 1] ret *= pads print(ret.shape.as_list()) # ==> [None, None, 1, 4, 300] return ret
def _get_weights(vocab_size, self): ''' create or get concantenated embedding or softmax variable return: a list of self._num_shards tensors ''' num_shards = self._model_hparams.symbol_modality_num_shards shards = [] for i in range(num_shards): shard_size = (vocab_size // num_shards) + (1 if i < vocab_size % num_shards else 0) emb_mat_init = tf.random_normal_initializer(0.0, self._body_input_depth**-0.5) emb_mat = tf.get_variable( "weights_%d" % i, [shard_size, self._body_input_depth], initializer=emb_mat_init) shards.append(emb_mat) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) ret = expert_utils.convert_gradient_to_tensor(ret) return ret
def _get_weights(self): """Create or get concatenated embedding or softmax variable. Returns: a list of self._num_shards Tensors. """ num_shards = self._model_hparams.symbol_modality_num_shards shards = [] for i in xrange(num_shards): shard_size = (self._vocab_size // num_shards) + ( 1 if i < self._vocab_size % num_shards else 0) var_name = "weights_%d" % i shards.append( tf.get_variable(var_name, [shard_size, self._body_input_depth], initializer=tf.random_normal_initializer( 0.0, self._body_input_depth**-0.5))) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) ret = eu.convert_gradient_to_tensor(ret) return ret
def _get_weights_top(self, tgt_mat_np, vocab_size): num_shards = self._model_hparams.symbol_modality_num_shards shards = [] pos = 0 for i in xrange(num_shards): shard_size = (vocab_size // num_shards) + ( 1 if i < vocab_size % num_shards else 0) var_name = "weights_%d" % i tgt_mat_np_shard = tgt_mat_np[pos:pos + shard_size] #initializer = tf.constant(tgt_mat_np_shard, dtype=tf.float32) initializer = lambda shape=tgt_mat_np_shard.shape, dtype=tf.float32, partition_info=None: tgt_mat_np_shard pos += shard_size tgt_mat_shard = tf.get_variable(var_name, initializer=initializer, trainable=emb_trainable, shape=tgt_mat_np_shard.shape, dtype=tf.float32) shards.append(tgt_mat_shard) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) ret = expert_utils.convert_gradient_to_tensor(ret) return ret
def _get_weights(self, hidden_dim=None, quantize_embedding=None, prune_embedding=None): """Create or get concatenated embedding or softmax variable. Args: hidden_dim: dim of the variable. Defaults to self._body_input_depth Returns: a list of self._num_shards Tensors. """ if hidden_dim is None: hidden_dim = self._body_input_depth if quantize_embedding is None: quantize_embedding = self._model_hparams.quantize_embedding if prune_embedding is None: prune_embedding = self._model_hparams.prune_embedding if quantize_embedding: codebook_size = self._model_hparams.quantize_codes codebook = tf.get_variable( "codebook", shape=[codebook_size], dtype=tf.float32, initializer=tf.random_normal_initializer(0.0, 0.5), trainable=True) tf.add_to_collection('codebooks', codebook) num_shards = self._model_hparams.symbol_modality_num_shards shards = [] for i in xrange(num_shards): shard_size = (self._vocab_size // num_shards) + ( 1 if i < self._vocab_size % num_shards else 0) if quantize_embedding: weight_idxs = tf.get_variable( "weight_idxs_%d" % i, [shard_size, hidden_dim], dtype=tf.int32, initializer=tf.random_uniform_initializer(0, codebook_size, dtype=tf.int32), trainable=False) tf.add_to_collection('idxs', weight_idxs) weights = tf.gather(codebook, weight_idxs) else: weights = tf.get_variable( "weights_%d" % i, [shard_size, hidden_dim], initializer=tf.random_normal_initializer( 0.0, hidden_dim**-0.5), trainable=True) if prune_embedding: mask = tf.get_variable( "weights_mask_%d" % i, [shard_size, hidden_dim], dtype=tf.bool, initializer=tf.ones_initializer(tf.bool), trainable=False) tf.add_to_collection('masks', mask) weights = tf.where(mask, weights, tf.zeros_like(weights)) # Append the weights to the current shards shards.append(weights) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) # Convert ret to tensor. if not context.in_eager_mode(): ret = eu.convert_gradient_to_tensor(ret) return ret