def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) self.embeddings1 = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, scope=0, zero_pad=True) self.embeddings2 = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, scope=1, zero_pad=True)
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) # 字向量(tooke向量),将待翻译的每个字映射到目标词表中 self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True)
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = _load_vocab(hp.vocab) self.vocab_len = len(self.token2idx) self.embeddings = get_token_embeddings(self.vocab_len, self.hp.d_model, zero_pad=True)
def __init__(self, hp, word2idx, idx2word): self.hp = hp self.token2idx, self.idx2token = word2idx, idx2word self.d_model = hp.d_model self.embeddings = get_token_embeddings(self.hp.word, self.hp.d_model, zero_pad=True)
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token, self.hp.vocab_size = load_vocab( hp.vocab) self.embd = None if self.hp.preembedding: self.embd = loadGloVe(self.hp.vec_path) self.embeddings = get_token_embeddings(self.embd, self.hp.vocab_size, self.hp.d_model, zero_pad=False) self.input_sup = tf.placeholder(tf.int32, [None, self.hp.maxlen], name="input_sup") self.input_ori = tf.placeholder(tf.int32, [None, self.hp.maxlen], name="input_ori") self.input_aug = tf.placeholder(tf.int32, [None, self.hp.maxlen], name="input_aug") self.sup_len = tf.placeholder(tf.int32, [None]) self.ori_len = tf.placeholder(tf.int32, [None]) self.aug_len = tf.placeholder(tf.int32, [None]) self.truth = tf.placeholder(tf.int32, [None, self.hp.num_class], name="truth") self.is_training = tf.placeholder(tf.bool, shape=None, name="is_training") self.model = True # self.logits_sup, self.logits_ori, self.logits_aug = self._logits_op() self.loss = self._loss_op() self.acc = self._acc_op() self.global_step = self._globalStep_op() self.train = self._training_op()
def __init__(self, context): self.context = context self.token2idx, self.idx2token = load_vocab(context.vocab) vocab_size = len(self.token2idx) # 其实这里的d_model可以是其它维度 self.embeddings = get_token_embeddings(vocab_size, self.context.d_ff, zero_pad=False)
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True) print(self.embeddings)
def __init__(self, hp): self.hp = hp self.en_token2idx, self.en_idx2token = load_vocab(hp.en_vocab) self.ch_token2idx, self.ch_idx2token = load_vocab(hp.ch_vocab) self.en_embeddings, self.ch_embeddings = get_token_embeddings( self.hp.en_vocab_size, self.hp.ch_vocab_size, self.hp.d_model, zero_pad=True)
def __init__(self, hp): self.hp = hp # 预测时词表用错! 应该用目标语言的词表而不是源语言的词表!!! 浪费了我四天的时间!! # 而且应该用dev的词表而不是train的!! 其实用train也可以的吧 因为train基本包括了dev的 dev的词表小会报keyerror self.token2idx, self.idx2token = load_vocab(hp.vocab1) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True) print('embeddings size =', self.hp.vocab_size)
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True) self.input_x = tf.placeholder(dtype=tf.int32, shape=(None, None), name="input_x") self.decoder_input = tf.placeholder(dtype=tf.int32, shape=(None, None), name="decoder_input") self.target = tf.placeholder(dtype=tf.int32, shape=(None, None), name="target") self.is_training = tf.placeholder(dtype=tf.bool, name="is_training") # encoder self.encoder_hidden = self.encode(self.input_x, training=self.is_training) # decoder self.logits = self.decode(self.decoder_input, self.encoder_hidden, training=self.is_training) self.y_hat = tf.to_int32(tf.argmax(self.logits, axis=-1), name="y_predict_v2") # loss self.smoothing_y = label_smoothing( tf.one_hot(self.target, depth=self.hp.vocab_size)) self.ce_loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=self.smoothing_y) nonpadding = tf.to_float( tf.not_equal(self.target, self.token2idx["<pad>"])) self.loss = tf.reduce_sum( self.ce_loss * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7) # optimize self.global_step = tf.train.get_or_create_global_step() self.lr = noam_scheme(self.hp.lr, self.global_step, self.hp.warmup_steps) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.minimize(self.loss, global_step=self.global_step) # tensorboard tf.summary.scalar('lr', self.lr) tf.summary.scalar("loss", self.loss) tf.summary.scalar("global_step", self.global_step) self.summaries = tf.summary.merge_all() # predict part self.y_predict = tf.identity(self.greedy_search(), name="y_predict")
def __init__(self, hp, inj_type=None, quant_min_max=None, inj_layer=None): self.hp = hp self.inj_type = inj_type self.quant_min_max = quant_min_max self.inj_layer = inj_layer self.token2idx, self.idx2token = load_vocab(hp.vocab) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True)
def __init__(self, inputs, labels, dropout, token2idx, idx2token): self.inputs = inputs self.labels = labels self.dropout = dropout self.token2idx = token2idx self.idx2token = idx2token self.embeddings = get_token_embeddings(len(self.token2idx), hp.HIDDEN_SIZE, zero_pad=True) self.logits = self.time_encode(inputs) self.optimize, self.loss = self.train(self.inputs, self.labels)
def encode(self, x, training=True): ''' Returns memory: encoder outputs. (N, T1, d_model) ''' scopes = [] outputs = [] with tf.variable_scope("embeddings", reuse=tf.AUTO_REUSE): self.token2idx, self.idx2token = load_vocab(self.hp.vocab) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True) scopes.append(tf.get_variable_scope().name) outputs.append(self.embeddings) with tf.variable_scope("encoder_embedding_lookup", reuse=tf.AUTO_REUSE): # src_masks src_masks = tf.math.equal(x, 0) # (N, T1) # embedding enc = tf.nn.embedding_lookup(self.embeddings, x) # (N, T1, d_model) enc *= self.hp.d_model**0.5 # scale enc += positional_encoding(enc, self.hp.maxlen1) enc = tf.layers.dropout(enc, self.hp.dropout_rate, training=training) scopes.append(tf.get_variable_scope().name) outputs.append(enc) ## Blocks for i in range(self.hp.num_blocks): with tf.variable_scope("encoder_num_blocks_{}".format(i), reuse=tf.AUTO_REUSE): # self-attention enc = multihead_attention(queries=enc, keys=enc, values=enc, key_masks=src_masks, num_heads=self.hp.num_heads, dropout_rate=self.hp.dropout_rate, training=training, causality=False) # feed forward enc = ff(enc, num_units=[self.hp.d_ff, self.hp.d_model]) scopes.append(tf.get_variable_scope().name) outputs.append(enc) memory = enc return memory, src_masks, outputs, scopes
def __init__(self, hp, word2idx, idx2word): self.hp = hp self.token2idx, self.idx2token = word2idx, idx2word self.d_model = hp.d_model self.embeddings = get_token_embeddings(self.hp.word, self.hp.d_model, zero_pad=True) self.lstm1 = tf.nn.rnn_cell.BasicLSTMCell(hp.d_model, state_is_tuple=True) self.lstm2 = tf.nn.rnn_cell.BasicLSTMCell(hp.d_model, state_is_tuple=True) if hp.lstm_type == 'bi': self.lstmb = tf.nn.rnn_cell.BasicLSTMCell(hp.d_model, state_is_tuple=True)
def _inputEncodingBlock(self, scope): """ :param scope: scope name embeded_left, embeded_right: tensor with shape (batch_size, seq_length, embedding_size) :return: a_bar: tensor with shape (batch_size, seq_length, 2 * hidden_size) b_bar: tensor with shape (batch_size, seq_length, 2 * hidden_size) """ with tf.device('/cpu:0'): self.Embedding = get_token_embeddings(self.n_vocab, self.embedding_size, "embedding", zero_pad=True) # 加入位置向量 self.embeded_left = tf.nn.embedding_lookup(self.Embedding, self.premise) self.embeded_left = self._encode(self.embeded_left) self.embeded_right = tf.nn.embedding_lookup( self.Embedding, self.hypothesis) self.embeded_right = self._encode(self.embeded_right) print_shape('embeded_left', self.embeded_left) print_shape('embeded_right', self.embeded_right) with tf.variable_scope(scope): # a_bar = BiLSTM(a, i) (1) # b_bar = BiLSTM(b, i) (2) outputsPremise, finalStatePremise = self._biLSTMBlock( self.embeded_left, self.hidden_size, 'biLSTM', self.seq_length) outputsHypothesis, finalStateHypothesis = self._biLSTMBlock( self.embeded_right, self.hidden_size, 'biLSTM', self.seq_length, isReuse=True) a_bar = tf.concat(outputsPremise, axis=2) b_bar = tf.concat(outputsHypothesis, axis=2) print_shape('a_bar', a_bar) print_shape('b_bar', b_bar) return a_bar, b_bar
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab( os.path.join(hp.data_dir, hp.vocab)) self.steps = [] if self.hp.fac_embed: self.embeddings1, self.embeddings2 = get_factorized_token_embeddings( self.hp.vocab_size, self.hp.d_embed, self.hp.d_model, zero_pad=True, normalized=self.hp.norm_embedding, ortho=self.hp.ortho_embedding) else: self.embeddings = get_token_embeddings( self.hp.vocab_size, self.hp.d_model, zero_pad=True, normalized=self.hp.norm_embedding, ortho=self.hp.ortho_embedding)
def _init_embeddings(self): if self._embeddings is None: if self._embedding_dim is None and self._context.embedding_dims is None: logging.info("%s embedding is not initialized", self._name) return logging.info("%s embedding is being initialized", self._name) self._embeddings = [] self._token2idxs = [] self._idx2tokens = [] cnt = 0 for i, vocab in enumerate(self._context.vocabs.split(":")): if self._context.embedded_indices is not None: if i not in self._context.embedded_indices: continue token2idx, idx2token = load_vocab(vocab) self._token2idxs.append(token2idx) self._idx2tokens.append(idx2token) vocab_size = len(token2idx) dim = self._embedding_dim if self._context.embedding_dims is not None: dim = self._context.embedding_dims[cnt] assert dim is not None embedding = get_token_embeddings( vocab_size, dim, zero_pad=False, name="{}_{}".format(self._name, self._context.embedding_name[i])) self._embeddings.append(embedding) cnt += 1 logging.info("%s initialized %s embeddings", self._name, cnt) # CHECK assert self._token2idxs is not None and self._idx2tokens is not None assert len(self._embeddings) == len(self._token2idxs) and len( self._token2idxs) == len(self._idx2tokens) for i in range(len(self._embeddings)): assert self._embeddings[i].shape[0] == len(self._token2idxs[i]), \ "%s != %s" % (self._embeddings[i].shape[0], len(self._token2idxs[i]))
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token, self.hp.vocab_size = load_vocab( hp.vocab) self.embd = None if self.hp.preembedding: self.embd = loadGloVe(self.hp.vec_path) self.embeddings = get_token_embeddings(self.embd, self.hp.vocab_size, self.hp.d_model, zero_pad=False) self.x = tf.placeholder(tf.int32, [None, None], name="text_x") self.y = tf.placeholder(tf.int32, [None, None], name="text_y") self.x_len = tf.placeholder(tf.int32, [None]) self.y_len = tf.placeholder(tf.int32, [None]) self.truth = tf.placeholder(tf.int32, [None, self.hp.num_class], name="truth") self.logits = self._logits_op() self.loss = self._loss_op() self.acc = self._acc_op() self.global_step = self._globalStep_op() self.train = self._training_op()
def __init__(self, training=True): self.graph = tf.Graph() with self.graph.as_default(): if training: self.x, self.y, self.num_batch = get_batch() else: self.x = tf.placeholder(tf.int32, shape=(None, hp.max_len)) self.y = tf.placeholder(tf.int32, shape=(None, hp.max_len)) self.decoder_inputs = tf.concat( (tf.ones_like(self.y[:, :1]) * 2, self.y[:, :-1]), -1) de2idx, idx2de = load_data.load_vocab( './preprocessed/de.vocab.tsv') en2idx, idx2en = load_data.load_vocab( './preprocessed/en.vocab.tsv') self.embedding = get_token_embeddings(len(de2idx), hp.hidden_units, zero_pad=True) with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE): self.enc = tf.nn.embedding_lookup(self.embedding, self.x) # scale self.enc *= hp.hidden_units**0.5 # positional encoding self.enc += positional_encoding(self.enc) self.enc = tf.layers.dropout(self.enc, hp.dropout_rate, training=training) for i in range(hp.num_blocks): with tf.variable_scope('num_blocks_{}'.format(i), reuse=tf.AUTO_REUSE): self.enc = multihead_attention( queries=self.enc, keys=self.enc, values=self.enc, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=training, causality=False) self.enc = ff(self.enc, num_units=[hp.d_ff, hp.hidden_units]) with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): self.dec = tf.nn.embedding_lookup(self.embedding, self.decoder_inputs) self.dec *= hp.hidden_units**0.5 self.dec += positional_encoding(self.dec) self.dec = tf.layers.dropout(self.dec, hp.dropout_rate, training=training) for i in range(hp.num_blocks): with tf.variable_scope('num_block_{}'.format(i), reuse=tf.AUTO_REUSE): self.dec = multihead_attention( queries=self.dec, keys=self.dec, values=self.dec, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=training, causality=True, scope='self_attention') self.dec = multihead_attention( queries=self.dec, keys=self.enc, values=self.enc, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=training, causality=False, scope='vanilla_attention') self.dec = ff(self.dec, num_units=[hp.d_ff, hp.hidden_units]) self.logits = tf.layers.dense(self.dec, len(en2idx)) self.preds = tf.to_int32(tf.arg_max(self.logits, dimension=-1)) self.istarget = tf.to_float(tf.not_equal(self.y, 0)) self.acc = tf.reduce_sum( tf.to_float(tf.equal(self.preds, self.y)) * self.istarget) / (tf.reduce_sum(self.istarget)) tf.summary.scalar('acc', self.acc) if training: self.y_smoothed = label_smoothing( tf.one_hot(self.y, depth=len(en2idx))) self.loss = tf.nn.softmax_cross_entropy_with_logits( logits=self.logits, labels=self.y_smoothed) self.mean_loss = tf.reduce_sum( self.loss * self.istarget) / tf.reduce_sum(self.istarget) self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr, beta1=0.9, beta2=0.98, epsilon=1e-8) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
def __init__(self, hp,train_mode=False): self.hp = hp self.train_mode=train_mode self.token2idx, self.idx2token = load_vocab(hp.vocab_filename) self.embeddings = get_token_embeddings(self.hp.VOCAB_SIZE, self.hp.d_model, zero_pad=True)
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) # embedding 层的权重, 两个不同种类的语言恶魔bedding共享?????? self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True)
def __init__(self, hp, zero_pad=True): self.embedding = get_token_embeddings(hp.vocab_size, hp.num_units, zero_pad=zero_pad) self.num_units = hp.num_units self.hp = hp
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab_path) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.num_units, zero_pad=True)
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) self.embeddings = get_token_embeddings( self.hp.vocab_size, self.hp.d_model, zero_pad=True ) #矩阵大小为:[vocab_size,d_model],随机初始化,但是当zero_pad=True时,矩阵第一行全为0.