def decode_data(session, offset, size, encode_input, decode_input): # Decoding with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=True): outputs, states = embedding_rnn_seq2seq(encoder_placeholders, decoder_placeholders, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=True) test_encoder_inputs, test_decoder_inputs = get_batch_data( offset, size, encode_input, decode_input, encoder_length, decoder_length) feed_dict_test = generate_feed_dict(test_encoder_inputs, test_decoder_inputs, pad_index) result = [] for o in outputs: # 注意这里也需要提供 feed_dict m = np.argmax(o.eval(feed_dict_test, session=sess), axis=1) result.append(m[0]) return result
def _generator(self, x): with tf.variable_scope("generator") as scope: seq_length = 5 batch_size = 64 vocab_size = 7 embedding_dim = 50 memory_dim = 100 encode_input = [ tf.placeholder(tf.int32, shape=(None, ), name="inp%i" % t) for t in range(seq_length) ] labels = [ tf.placeholder(tf.int32, shape=(None, ), name="labels%i" % t) for t in range(seq_length) ] weights = [ tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels ] decode_input = ( [tf.zeros_like(encode_input[0], dtype=np.int32, name="GO")] + encode_input[:-1]) previous_memory = tf.zeros((batch_size, memory_dim)) cell = core_rnn_cell.GRUCell(memory_dim) self.decode_outputs, decode_memory = legacy_seq2seq.embedding_rnn_seq2seq( encode_input, decode_input, cell, vocab_size, vocab_size, embedding_dim) return self.decode_outputs
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return seq2seq.embedding_rnn_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=self.vocab_size_encoder, num_decoder_symbols=self.vocab_size_decoder, output_projection=self.output_projection, embedding_size=self.cfg.embedding_size, feed_previous=do_decode)
def seq2seq(x, y, opt, prefix='', feed_previous=False, is_reuse=None, is_tied=True): #y batch * len x batch*len # reverse x #pdb.set_trace() x = tf.reverse(x, axis=[1]) x = tf.unstack(x, axis=1) # X Y Z [tf.shape(Batch_size)]*L y = tf.unstack(y, axis=1) # GO_ A B C with tf.variable_scope(prefix + 'lstm_seq2seq', reuse=is_reuse): cell = tf.contrib.rnn.LSTMCell(opt.n_hid) # with tf.variable_scope(prefix+'lstm_seq2seq', reuse=is_reuse): # weightInit = tf.random_uniform_initializer(-0.001, 0.001) # W = tf.get_variable('W', [opt.n_hid, opt.n_words], initializer = weightInit) # b = tf.get_variable('b', [opt.n_words], initializer = tf.random_uniform_initializer(-0.001, 0.001)) if is_tied: outputs, _ = embedding_tied_rnn_seq2seq( encoder_inputs=x, decoder_inputs=y, cell=cell, feed_previous=feed_previous, num_symbols=opt.n_words, embedding_size=opt.embed_size) else: outputs, _ = embedding_rnn_seq2seq(encoder_inputs=x, decoder_inputs=y, cell=cell, feed_previous=feed_previous, num_encoder_symbols=opt.n_words, num_decoder_symbols=opt.n_words, embedding_size=opt.embed_size) #logits = [nn_ops.xw_plus_b(out, W, b) for out in outputs] logits = outputs syn_sents = [math_ops.argmax(l, 1) for l in logits] syn_sents = tf.stack(syn_sents, 1) loss = sequence_loss( outputs[:-1], y[1:], [tf.cast(tf.ones_like(yy), tf.float32) for yy in y[1:]]) return loss, syn_sents, logits
def Generate(self): with tf.variable_scope("gen", reuse=True): self.weight = tf.get_variable('weight_gen',initializer = tf.truncated_normal([args.rnn_size, args.vocab_size], stddev=0.1),dtype =tf.float32) self.bias = tf.get_variable('bias_gen',initializer = tf.constant(0.1, shape=[args.vocab_size]),dtype = tf.float32,trainable = False) def loop(prev, _): prev = tf.matmul(tf.cast(prev,dtype= tf.float32), self.weight) + self.bias prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(self.emb_matrix, prev_symbol) #inputs = tf.split(tf.cast(tf.nn.embedding_lookup(self.emb_matrix, self.context),dtype=tf.float32), self.args.seq_length, 1) #inputs = tf.split(self.context, self.args.seq_length, 1) #inputs = [tf.squeeze(input_, [1]) for input_ in inputs] #inputs = tf.transpose(inputs,[1,0,2]) #inputs = [tf.squeeze(input_, [1]) for input_ in inputs] # print len(inputs), inputs[0].shape # outputs,_ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, self.cell_fn)#, loop_function = loop) print self.context.shape inputs = tf.split(self.context, self.args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] print self.dec_inp[0].shape dec_inp = tf.split(self.dec_inp, self.args.seq_length, 1) dec_inp = [tf.squeeze(input_, [1]) for input_ in dec_inp] outputs , state = legacy_seq2seq.embedding_rnn_seq2seq(inputs, dec_inp, self.cell_fn,self.args.vocab_size,self.args.rnn_size, self.args.seq_length,feed_previous = True,output_projection = None) # outputs ,_ = legacy_seq2seq.embedding_rnn_seq2seq(self.context,) print len(outputs) print outputs[0].shape output_ = tf.reshape(tf.concat(outputs, 1), [-1, self.args.rnn_size]) print output_.shape logits = tf.matmul(tf.cast(output_,tf.float32), self.weight) + self.bias print logits.shape logits = tf.reshape(logits, [self.args.batch_size,self.args.seq_length,self.args.vocab_size]) probs = tf.nn.softmax(logits) #pred = tf.argmax(probs,1) #sample_res = tf.reshape(pred,[self.args.batch_size,self.args.seq_length]) #fake_data = tf.concat(self.context,prediction,1) #tvars = tf.trainable_variables() #self.Gtvars = [v for v in tvars if v.name.startswith(scope.name)] if self.flag != 'pretrain': return tf.cast(probs,dtype =tf.float64) else: return tf.cast(logits,dtype = tf.float64)
def seq2seq(x, y, opt, prefix='', feed_previous=False, is_reuse=None, is_tied=True): x = tf.reverse(x, axis=[1]) x = tf.unstack(x, axis=1) # X Y Z [tf.shape(Batch_size)]*L y = tf.unstack(y, axis=1) # GO_ A B C with tf.variable_scope(prefix + 'lstm_seq2seq', reuse=is_reuse): cell = tf.contrib.rnn.LSTMCell(opt.n_hid) if is_tied: outputs, _ = embedding_tied_rnn_seq2seq( encoder_inputs=x, decoder_inputs=y, cell=cell, feed_previous=feed_previous, num_symbols=opt.n_words, embedding_size=opt.embed_size) else: outputs, _ = embedding_rnn_seq2seq(encoder_inputs=x, decoder_inputs=y, cell=cell, feed_previous=feed_previous, num_encoder_symbols=opt.n_words, num_decoder_symbols=opt.n_words, embedding_size=opt.embed_size) logits = outputs syn_sents = [math_ops.argmax(l, 1) for l in logits] syn_sents = tf.stack(syn_sents, 1) loss = sequence_loss( outputs[:-1], y[1:], [tf.cast(tf.ones_like(yy), tf.float32) for yy in y[1:]]) return loss, syn_sents, logits
decoder_placeholders = [ tf.placeholder(tf.int32, shape=[None], name="decoder_%d" % i) for i in range(decoder_length) ] target_placeholders = [ tf.placeholder(tf.int32, shape=[None], name="target_%d" % i) for i in range(decoder_length) ] target_weights_placeholders = [ tf.placeholder(tf.float32, shape=[None], name="decoder_weight_%d" % i) for i in range(decoder_length) ] outputs, states = embedding_rnn_seq2seq(encoder_placeholders, decoder_placeholders, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False) loss = sequence_loss(outputs, target_placeholders, target_weights_placeholders) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) # In[87]: #GRUCell tf.reset_default_graph() RNN_CELL_TYPE = 'GRUCell' learning_rate = 1.0
# Decoder input: prepend some "GO" token and drop the final # token of the encoder input dec_inp = [ tf.placeholder(tf.int32, shape=(None, ), name="de_inp%i" % t) for t in range(seq_length) ] # Initial memory value for recurrence. prev_mem = tf.zeros((batch_size, memory_dim)) cell = LSTMCell(memory_dim) dec_outputs, dec_memory = embedding_rnn_seq2seq(enc_inp, dec_inp, cell, vocab_size + 1, vocab_size + 2, embedding_size=embedding_dim, feed_previous=False) loss = sequence_loss(dec_outputs, labels, weights, vocab_size) tf.summary.scalar("loss", loss) magnitude = tf.sqrt(tf.reduce_sum(tf.square(dec_memory[1]))) tf.summary.scalar("magnitude at t=1", magnitude) #summary_op = tf.merge_all_summaries() learning_rate = 0.05 momentum = 0.9 optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) train_op = optimizer.minimize(loss) sess.run(tf.initialize_all_variables())
def def_model(hparams): """ build graph :return: """ tf.reset_default_graph() encoder_index_input = tf.placeholder( tf.int64, shape=[None, hparams.enc_sentence_length], name='input_sentence') encoder_sentence_input = tf.placeholder(dtype=tf.string, name='input') # 使用端对端输入 # encoder_index_input = tf.reshape( # tf.py_func( # func=chatbot_helper.build_input, # inp=[hparams, encoder_sentence_input], # Tout=[tf.int64] # ), # shape=[-1, hparams.enc_sentence_length]) decoder_index_input = tf.placeholder( tf.int64, shape=[None, hparams.dec_sentence_length + 1], name='output_sentences') # batch_major => time_major enc_inputs_t = tf.transpose(encoder_index_input, [1, 0]) dec_inputs_t = tf.transpose(decoder_index_input, [1, 0]) rnn_cell = BasicRNNCell(hparams.hidden_size) # rnn_cell = LSTMCell(hidden_size) # work well with tf.variable_scope("embedding_rnn_seq2seq"): # dec_outputs: [dec_sent_len+1 x batch_size x hidden_size] dec_outputs, dec_last_state = embedding_rnn_seq2seq( encoder_inputs=tf.unstack(enc_inputs_t), # a list decoder_inputs=tf.unstack(dec_inputs_t), # a list cell=rnn_cell, num_encoder_symbols=hparams.enc_vocab_size + 2, # +2因为补充了<s> <unk> num_decoder_symbols=hparams.dec_vocab_size + 3, # +3因为补充了 <s> <unk> <pad> embedding_size=hparams.enc_emb_size, feed_previous=True) # predictions: [batch_size x dec_sentence_lengths+1] predictions = tf.transpose(tf.argmax(tf.stack(dec_outputs), axis=-1), [1, 0]) # labels & logits: [dec_sentence_length+1 x batch_size x dec_vocab_size+2] labels = tf.one_hot(dec_inputs_t, hparams.dec_vocab_size + 3) logits = tf.stack(dec_outputs) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits)) # training_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss) training_op = tf.train.RMSPropOptimizer( learning_rate=0.0001).minimize(loss) model = { "encoder_sentence_input": encoder_sentence_input, # 输入句子 "decoder_sentence_input": encoder_sentence_input, # 输出句子 "encoder_index_input": encoder_index_input, # 输入句子的索引 "decoder_index_input": decoder_index_input, # 输出句子的索引 "predictions": predictions, # 模型输出 "loss": loss, "training_op": training_op } return model
def create_network(self): self.seq2seq_model = "embedding_attention" mode = "train" GO_VALUE = self.out_max_int + 1 self.net = tflearn.input_data(shape=[None, self.in_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(self.net, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack( encoder_inputs, axis=1) # transform to list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(self.net, [0, 0], [-1, self.out_seq_len], name="dec_in") decoder_inputs = tf.unstack( decoder_inputs, axis=1) # transform into list of self.out_seq_len elements go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE) decoder_inputs = [ go_input ] + decoder_inputs[:self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs cells = [] for _ in range(3): cells.append(self.getCell(128)) cell = rnn.MultiRNNCell(cells) if self.seq2seq_model == "embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=1000, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=1000, num_heads=4, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection( tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore self.net = tf.stack( model_outputs, axis=1 ) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] with tf.name_scope( "TargetsData" ): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") self.net = tflearn.regression(self.net, placeholder=targetY, optimizer='adam', learning_rate=0.001, loss=self.sequence_loss, metric=self.accuracy, name="Y") self.model = tflearn.DNN(self.net)
def build_graph(self): # placeholders tf.reset_default_graph() # encoder inputs : list of indices of length xseq_len self.enc_ip = [ tf.placeholder(shape=[ None, ], dtype=tf.int64, name='ei_{}'.format(t)) for t in range(self.xseq_len) ] # labels that represent the real outputs self.labels = [ tf.placeholder(shape=[ None, ], dtype=tf.int64, name='ei_{}'.format(t)) for t in range(self.yseq_len) ] # decoder inputs : 'GO' + [ y1, y2, ... y_t-1 ] self.dec_ip = [ tf.zeros_like(self.enc_ip[0], dtype=tf.int64, name='GO') ] + self.labels[:-1] # Basic LSTM cell wrapped in Dropout Wrapper self.keep_prob = tf.placeholder(tf.float32) # define the basic cell basic_cell = DropoutWrapper(BasicLSTMCell(self.emb_dim, state_is_tuple=True), output_keep_prob=self.keep_prob) # stack cells together : n layered model stacked_lstm = MultiRNNCell([basic_cell] * self.num_layers, state_is_tuple=True) # for parameter sharing between training model # and testing model with tf.variable_scope('decoder') as scope: # build the seq2seq model # inputs : encoder, decoder inputs, LSTM cell type, vocabulary sizes, embedding dimensions self.decode_outputs, self.decode_states = embedding_rnn_seq2seq( self.enc_ip, self.dec_ip, stacked_lstm, self.xvocab_size, self.yvocab_size, self.emb_dim) # share parameters scope.reuse_variables() # testing model, where output of previous timestep is fed as input to the next timestep # note: reuse the parameters for test model self.decode_outputs_test, self.decode_states_test = embedding_rnn_seq2seq( self.enc_ip, self.dec_ip, stacked_lstm, self.xvocab_size, self.yvocab_size, self.emb_dim, feed_previous=True) # now, for training, build loss function # weighted loss # TODO : add parameter hint loss_weights = [ tf.ones_like(label, dtype=tf.float32) for label in self.labels ] self.loss = sequence_loss(self.decode_outputs, self.labels, loss_weights, self.yvocab_size) # train op to minimize the loss self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize( self.loss)
def train(batch_size): # using embedding_rnn_seq2seq model dec_outputs, _ = legacy_seq2seq.embedding_rnn_seq2seq( encoder_inputs=enc_inp, decoder_inputs=dec_inp, cell=set_cell(), num_encoder_symbols=word_size, num_decoder_symbols=word_size, embedding_size=embedding_dim) # calc loss loss = legacy_seq2seq.sequence_loss(dec_outputs, enc_inp, weights) # using optimizer optimizer = set_optimizer(starter_learning_rate).minimize(loss) # define session and summary operation sess = tf.Session() sess.run(tf.global_variables_initializer()) tf.summary.scalar('loss', loss) summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(logs_path + r'/train', sess.graph) test_writer = [] test_writer.append(tf.summary.FileWriter(logs_path + r'/test_10')) test_writer.append(tf.summary.FileWriter(logs_path + r'/test_20')) test_writer.append(tf.summary.FileWriter(logs_path + r'/test_30')) test_writer.append(tf.summary.FileWriter(logs_path + r'/test_50')) # training for step in range(iterations): feed_dict, _ = set_feed_dict(train_length) _, train_loss, summary = sess.run([optimizer, loss, summary_op], feed_dict) train_writer.add_summary(summary, step) if step % 10 == 0: print("itrations: %d, train_loss: %.5f." % (step, train_loss), end='\r') # testing if step % 500 == 0: for i in test_index: # test length test_len = test_len_list[i] feed_dict, X_batch = set_feed_dict(test_len) dec_outputs_batch, test_loss, summary = sess.run( [dec_outputs, loss, summary_op], feed_dict) test_writer[i].add_summary(summary, step) test_writer[i].flush() testing_acc = cal_acc(X_batch, dec_outputs_batch, test_len) print( "test:%d/%d, itrations: %d, test_loss: %.5f, test_acc: %.5f%%." % (test_len, seq_length, step, test_loss, testing_acc * 100.0)) if output_matrix: print( "----------------------------------matrix encode---------------------------------" ) for i in range(4): print(X_batch[i]) print( "----------------------------------matrix decode---------------------------------" ) Y_batch = np.array(dec_outputs_batch).argmax(axis=2) for i in range(4): print(Y_batch[i]) print( '--------------------------------------------------------------------------------' )
weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels] # Decoder input: prepend some "GO" token and drop the final # token of the encoder input dec_inp = ([tf.zeros_like(enc_inp[0], dtype=np.int32, name="GO")] + enc_inp[:-1]) # Initial memory value for recurrence. prev_mem = tf.zeros((batch_size, memory_dim)) cell = single_cell() dec_outputs, dec_memory = legacy_seq2seq.embedding_rnn_seq2seq( enc_inp, dec_inp, cell, vocab_size, vocab_size, embedding_size=memory_dim) loss = legacy_seq2seq.sequence_loss(dec_outputs, labels, weights, vocab_size) print("scalar summary:", tf.summary.scalar("loss", loss)) magnitude = tf.sqrt(tf.reduce_sum(tf.square(dec_memory[1]))) print(tf.summary.scalar("magnitude at t=1", magnitude)) summary_op = tf.summary.merge_all() # merge_all_summaries() print("all summaries:", summary_op) # We build the optimizer
def model(self, mode="train", num_layers=2, cell_size=32, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.0001, tensorboard_verbose=0, checkpoint_path=None): assert mode in ["train", "predict"] checkpoint_path = checkpoint_path or ( "%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else "")) GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN network = tflearn.input_data( shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack( encoder_inputs, axis=1 ) # transform into list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in") # get decoder inputs decoder_inputs = tf.unstack( decoder_inputs, axis=1 ) # transform into list of self.out_seq_len elements, each [-1] go_input = tf.multiply( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) # insert "GO" symbol as the first decoder input; drop the last decoder input decoder_inputs = [ go_input ] + decoder_inputs[:self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs single_cell = getattr(core_rnn_cell, cell_type)(cell_size, state_is_tuple=True) if num_layers == 1: cell = single_cell else: cell = core_rnn_cell.MultiRNNCell([single_cell] * num_layers) if self.seq2seq_model == "embedding_rnn": model_outputs, states = legacy_seq2seq.embedding_rnn_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = legacy_seq2seq.embedding_attention_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection( tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. network = tf.stack( model_outputs, axis=1 ) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] with tf.name_scope( "TargetsData" ): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression(network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=self.sequence_loss, metric=self.accuracy, name="Y") model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path) return model
for t in range(seq_length) ] labels = [ tf.placeholder(tf.int32, shape=(None, ), name="labels%i" % t) for t in range(seq_length) ] weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels] decode_input = ([tf.zeros_like(encode_input[0], dtype=np.int32, name="GO")] + encode_input[:-1]) previous_memory = tf.zeros((batch_size, memory_dim)) cell = core_rnn_cell.GRUCell(memory_dim) decode_outputs, decode_memory = legacy_seq2seq.embedding_rnn_seq2seq( encode_input, decode_input, cell, vocab_size, vocab_size, embedding_dim) loss = legacy_seq2seq.sequence_loss(decode_outputs, labels, weights, vocab_size) tf.summary.scalar("loss", loss) manitude = tf.sqrt(tf.reduce_sum(tf.square(decode_memory[1]))) tf.summary.scalar("manitude at t=1", manitude) summary_op = tf.summary.merge_all() learning_rate = 0.05 momentum = 0.9 optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) train_op = optimizer.minimize(loss)
for labels_t in labels] decode_input = ([ tf.zeros_like( tensor=encode_input[0], dtype=np.int32, name="GO")] + encode_input[:-1]) previous_memory = tf.zeros(shape=(batch_size, memory_dim)) cell = core_rnn_cell.GRUCell(num_units=memory_dim) decode_outputs, decode_memory = legacy_seq2seq.embedding_rnn_seq2seq( encoder_inputs=encode_input, decoder_inputs=decode_input, cell=cell, num_encoder_symbols=vocab_size, num_decoder_symbols=vocab_size, embedding_size=embedding_dim) loss = legacy_seq2seq.sequence_loss( logits=decode_outputs, targets=labels, weights=weights) tf.summary.scalar("loss", loss) manitude = tf.sqrt(tf.reduce_sum(tf.square(decode_memory[1]))) tf.summary.scalar("manitude at t=1", manitude) summary_op = tf.summary.merge_all()
def build_model(self): outputProjection = None # Sampled softmax only makes sense if we sample less than vocabulary size. if 0 < self.args.softmaxSamples < self.text_data.getVocabularySize(): outputProjection = ProjectionOp( (self.text_data.getVocabularySize, self.args.hidden_size), scope='softmax_projection', dtype=tf.float32) def sampledSoftmax(labels, inputs): labels = tf.reshape( labels, [-1, 1]) # Add one dimension (nb of true classes, here 1) # We need to compute the sampled_softmax_loss using 32 bit floats to avoid numerical instabilities. localWt = tf.cast(outputProjection.W_t, tf.float32) localB = tf.cast(outputProjection.b, tf.float32) localInputs = tf.cast(inputs, tf.float32) return tf.cast( tf.nn.sampled_softmax_loss( localWt, # Should have shape [num_classes, dim] localB, labels, localInputs, self.args. softmaxSamples, # The number of classes to randomly sample per batch self.text_data.getVocabularySize( ) # The number of classes ), tf.float32) # define mutil RNN cell def create_cell(): cell = tf.nn.rnn_cell.LSTMCell(self.args.hidden_size) cell = tf.nn.rnn_cell.DropoutWrapper( cell, input_keep_prob=1.0, output_keep_prob=self.args.dropout) return cell self.cell = tf.nn.rnn_cell.MultiRNNCell( [create_cell() for _ in range(self.args.rnn_layers)]) # define placeholder with tf.name_scope("encoder_placeholder"): self.encoder_inputs = [ tf.placeholder(tf.int32, [ None, ]) for _ in range(self.args.maxLengthEnco) ] with tf.name_scope("decoder_placeholder"): self.decoder_inputs = [ tf.placeholder(tf.int32, [ None, ], name='decoder_inputs') for _ in range(self.args.maxLengthDeco) ] self.decoder_targets = [ tf.placeholder(tf.int32, [ None, ], name='decoder_targets') for _ in range(self.args.maxLengthDeco) ] self.decoder_weights = [ tf.placeholder(tf.float32, [ None, ], name='decoder_weights') for _ in range(self.args.maxLengthDeco) ] decoder_output, state = embedding_rnn_seq2seq(self.encoder_inputs, self.decoder_inputs, self.cell, self.num_encoder_symbols, self.num_decoder_symbols, self.args.embedding_size, output_projection=None, feed_previous=bool( self.args.test), dtype=None, scope=None) # For testing only if self.args.test is not None: if not outputProjection: self.outputs = decoder_output else: self.outputs = [ outputProjection(output) for output in decoder_output ] else: self.loss = sequence_loss(logits=decoder_output, targets=self.decoder_targets, weights=self.decoder_weights) tf.summary.scalar('loss', self.loss) # Keep track of the cost print("模型构建完毕")
# <Model> enc_in = tf.placeholder(tf.int32, shape=[batch_size, input_max_len]) labels = tf.placeholder(tf.int32, shape=[batch_size, output_max_len]) dec_in = tf.placeholder(tf.int32, shape=[batch_size, output_max_len]) enc_in2 = tf.unstack(enc_in, axis=1) labels2 = tf.unstack(labels, axis=1) dec_in2 = tf.unstack(dec_in, axis=1) with tf.variable_scope('decoder'): cell = tf.contrib.rnn.GRUCell(num_units) decode_outputs, decode_states = seq2seq.embedding_rnn_seq2seq( enc_in2, dec_in2, cell, vocab_size, vocab_size, embed_dim, output_projection=None, feed_previous=False) with tf.variable_scope('decoder', reuse=True): cell = tf.contrib.rnn.GRUCell(num_units) decode_outputs_t, decode_states_t = seq2seq.embedding_rnn_seq2seq( enc_in2, dec_in2, cell, vocab_size, vocab_size, embed_dim, output_projection=None,