def main(): global simi_log global server_port global thread_num global word_vec_dim # global top_num_simi global work_data_dir global work_src_file global work_src_matrix global work_test_matrix cp = ConfigParser.SafeConfigParser() cp.read('conf_aysimi_skipthought.conf') server_port = cp.get('server', 'port') thread_num = int(cp.get('simi_calc', 'thread_num')) # word_vec_dim = int(cp.get('simi_calc', 'word_vec_dim')) word_vec_dim = FLAGS.num_units # top_num_simi = int(cp.get('simi_calc', 'top_num_simi')) work_data_dir = cp.get('simi_calc', 'work_data_dir') work_src_file = cp.get('simi_calc', 'work_src_file') work_src_matrix = cp.get('simi_calc', 'work_src_matrix') work_test_matrix = cp.get('simi_calc', 'work_test_matrix') simi_log = FinalLogger('aysimi_skipthought.log') init_tf_model() simi_log.info('---start anyou simi skipthought server---') application.listen(server_port) tornado.ioloop.IOLoop.instance().start()
def __init__(self, vocab_size, start_vocab, max_target_len, unit_type, num_units, num_layers, dropout, embedding_size, learning_rate, num_keep_ckpts): self.vocab_size = vocab_size # src & tgt share vocab_size self.start_vocab = start_vocab # start_vocab = ['<pad>', '<go>', '<eos>', '<unk>'] self.max_target_len = max_target_len # net-parameters self.unit_type = unit_type self.num_units = num_units self.num_layers = num_layers self.dropout = dropout self.embedding_size = embedding_size self.learning_rate = learning_rate self.embedding_share = None # net-output-data self.curr_encoder_output = None self.curr_encoder_state = None self.prev_train_logits = None self.prev_predict_logits = None self.next_train_logits = None self.next_predict_logits = None self.loss = None self.gradients = None self.train_op = None # net-transit-data self.encoder_output = None self.encoder_state = None self.prev_train_decoder_output = None self.prev_predict_decoder_output = None self.next_train_decoder_output = None self.next_predict_decoder_output = None # init-log self._logger = FinalLogger(self.LOG_FILE) # init-device self.num_gpus = 0 self._init_device_gpus() # init placeholder self._init_placeholder() # embeded init self._init_embeddings() # build graph self._build_graph() # compute and apply gradients self._build_train() # predict self._build_predict() # save train self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=num_keep_ckpts)
def main(): global simi_log global work_data_dir global thread_num global word_vec_dim global class_server_url global seg_a_word global minshi_firstlist global minshi_nodemap global minshi_label_map global xingshi_firstlist global xingshi_nodemap global xingshi_label_map simi_log = FinalLogger('aysimi_skipthought_close_beta.log') seg_a_word = SegCNAWord() cp = ConfigParser.SafeConfigParser() cp.read('conf_aysimi_skipthought_close.conf') server_port = cp.get('server', 'port') work_data_dir = cp.get('simi_calc', 'work_data_dir') thread_num = cp.get('simi_calc', 'thread_num') word_vec_dim = FLAGS.num_units class_server_url = cp.get('class_server', 'server_url') node.loadConfig(work_data_dir + 'AY_minshi.xml', minshi_firstlist, minshi_nodemap, minshi_label_map) node.loadConfig(work_data_dir + 'AY_xingshi.xml', xingshi_firstlist, xingshi_nodemap, xingshi_label_map) init_tf_model() simi_log.info('---anyou similar skipthought close beta start server---') application.listen(server_port) tornado.ioloop.IOLoop.instance().start()
def __init__(self, fname, line_min_words=5, line_process_fn=lambda x: x.strip(), max_vocab_size=100000, max_len=100, verbose=10000): """Class for reading text data and making batches. Args: fname (str): File with data. line_process_fn (callable): Line processing function (str -> str). Use it if you want to do lemmatization or remove stopwords or smth. Default lambda x: x.strip() max_vocab_size (int): Maximum vocabulary size. Most frequent words are used. verbose (int): Verbosity level on reading data. """ self.verbose = verbose self._logger = FinalLogger( os.path.dirname(__file__) + '/text_data.log') self.fname = fname self.line_min_words = line_min_words self.max_len = max_len self.max_vocab_size = max_vocab_size self.line_process_fn = line_process_fn self._check_args() self.vocab = Vocab() self.total_lines = None self.prev_sent = None self.curr_sent = None self.next_sent = None # test # self.len_over_100 = 0 # self.num_sents = 0 # self.len_sents = 0 self._build_vocabulary_and_stats()
continue prev_predict, next_predict = sess.run( [skip_thought_model.prev_predict_logits, skip_thought_model.next_predict_logits], feed_dict=eval_feed_dict(skip_thought_model, pred_batch) ) train_log.info('%d, %s', l, '------') for pred_i in prev_predict: pred_str = '' for pred_j in pred_i: pred_str += text_data.vocab.index2word[pred_j] + ',' train_log.info(pred_str) for next_i in next_predict: next_str = '' for next_j in next_i: next_str += text_data.vocab.index2word[next_j] + ',' train_log.info(next_str) # save session skip_thought_model.saver.save(sess, FLAGS.checkpoint_dir + 'model.ckpt', global_step=i) train_log.info('Elapse time: ' + str((time.time() - start_time))) if __name__ == '__main__': train_log = FinalLogger('skip_thought_train.log') train_log.info('start') tf.app.run() train_log.info('ok')
class TextData: NUM_LINE_TRIPLES = 3 ONE_LINE_TOKEN = '<one>' def __init__(self, fname, line_min_words=5, line_process_fn=lambda x: x.strip(), max_vocab_size=100000, max_len=100, verbose=10000): """Class for reading text data and making batches. Args: fname (str): File with data. line_process_fn (callable): Line processing function (str -> str). Use it if you want to do lemmatization or remove stopwords or smth. Default lambda x: x.strip() max_vocab_size (int): Maximum vocabulary size. Most frequent words are used. verbose (int): Verbosity level on reading data. """ self.verbose = verbose self._logger = FinalLogger( os.path.dirname(__file__) + '/text_data.log') self.fname = fname self.line_min_words = line_min_words self.max_len = max_len self.max_vocab_size = max_vocab_size self.line_process_fn = line_process_fn self._check_args() self.vocab = Vocab() self.total_lines = None self.prev_sent = None self.curr_sent = None self.next_sent = None # test # self.len_over_100 = 0 # self.num_sents = 0 # self.len_sents = 0 self._build_vocabulary_and_stats() def _check_args(self): import os assert self.max_vocab_size > 0 assert os.path.isfile(self.fname) def _build_vocabulary_and_stats(self): """Builds vocabulary, calculates maximum length and total number of lines in file. """ with open(self.fname) as f: # self.vocab = Vocab() self.total_lines = 0 for line in f: tokens = self._tok_line(line) # tmp_max_len = max(map(len, map(self._tok_line, seg_sentence(line)))) + 2 # 2 = len([<go>, <eos>]) # self.len_over_100 += len(filter( # lambda x: x >= 100, map(len, map(self._tok_line, seg_sentence(line))))) # self.num_sents += len(map(self._tok_line, seg_sentence(line))) # self.len_sents += sum(map(len, map(self._tok_line, seg_sentence(line)))) # if tmp_max_len > self.max_len: # self.max_len = tmp_max_len if not tokens: continue self.vocab.add_words(tokens) self.total_lines += 1 if self.total_lines % self.verbose == 0: self._logger.info('Read\t{0} lines.'.format( self.total_lines)) self.vocab.cut_by_freq(self.max_vocab_size) self._logger.info('Read\t{0} lines.'.format(self.total_lines)) self._logger.info('Done building vocab: %d and stats.', len(self.vocab)) def pro_triples_data(self, batch_size): """Generate triples data, reads lines from file and encodes words. """ self.prev_sent = list() self.curr_sent = list() self.next_sent = list() with open(self.fname) as f: for line in f: line = line.strip() if not line: continue seg_line = seg_sentence(line, self.line_min_words) if not seg_line: continue triples = self.make_lines_triples(seg_line) self.prev_sent.extend(triples[0]) self.curr_sent.extend(triples[1]) self.next_sent.extend(triples[2]) if len(self.curr_sent) < batch_size: continue # TODO: Optimize batch-data initialization. for data_iter in self.triples_data_iterator( self.prev_sent, self.curr_sent, self.next_sent, max_len=self.max_len, batch_size=batch_size): yield data_iter self.prev_sent = list() self.curr_sent = list() self.next_sent = list() def pro_tuple_data(self, out_file_name, batch_size=1): """Generate one tuple data, reads lines from file and encodes words. """ if not out_file_name or not os.path.exists(out_file_name): return curr_sent = [] with open(out_file_name) as f: for line in f: line = line.strip() if not line: continue seg_line = seg_sentence(line, self.line_min_words) if not seg_line: continue curr_sent.extend(filter(lambda x: x, seg_line)) for data_iter in self.lines_curr_iterator( curr_sent, batch_size=batch_size): yield data_iter yield TextData.ONE_LINE_TOKEN curr_sent = [] def _tok_line(self, line): """Tokenizes raw line. Args: line (str): Raw line. Returns: tokens (list of str): List of tokens. """ if not line or not isinstance(line, str): return return self.line_process_fn(line).split() def encode_line(self, line, with_eos=False, with_go=False): """Encodes raw line to list of word indices. Applies ``line_process_fn`` before encoding. Args: line (str): Raw lines. with_eos (bool): Whether to append eos_value at the end or not. with_go (bool): Whether to append go_token in the beginning of line or not. Returns: encoded (list of ints): Encoded line. """ tokens = self._tok_line(line) encoded = self.vocab.encode_words(tokens, with_eos, with_go) return encoded def encode_lines(self, lines, with_eos=False, with_go=False): """Encodes raw lines to list of word indices. Applies ``line_process_fn`` for each line. Args: lines (list of str): List of raw lines. with_eos (bool): Whether to append eos_value at the end of each line or not. with_go (bool): Whether to append go_token in the beginning of each line or not. Returns: encoded (list of list of ints): List of encoded lines. """ encoded = [self.encode_line(line, with_eos, with_go) for line in lines] return encoded def decode_line(self, encoded_line): return self.vocab.decode_idxs(encoded_line) def make_batch(self, encoded_lines, max_len=None): """Makes `Batch` instance based on `encoded_lines`. Args: encoded_lines (list of list of int): List of encoded lines. Encoded lines can be obtained via ``encode_lines`` or ``encode_line`` methods. max_len (int): If not None, lines will be padded up to max_len with vocab.pad_value. Otherwise, lines will be padded using maximum length of line in ``encoded_lines``. Returns: batch (Batch): Batch instance. """ if not max_len: max_len = min(max(map(len, encoded_lines)), self.max_len) encoded_lines = [line[:max_len] for line in encoded_lines] padded_lines = utils.pad_sequences(encoded_lines, max_len, self.vocab.pad_value) batch = Batch(padded_lines, self.vocab.pad_value, self.vocab.go_value, self.vocab.eos_value) return batch @staticmethod def _make_triples_for_paragraph(paragraph): """Generate prev, curr, next lists based on paragraph. """ if len(paragraph) < TextData.NUM_LINE_TRIPLES: return [], [], [] prev = paragraph[:-2] curr = paragraph[1:-1] next = paragraph[2:] return prev, curr, next def make_triples(self, lines): """Returns prev, curr, next lists based on lines. Context is not shared between different paragraphs in text. So, last line in one paragraph will not be in context with first line in the next paragraph. Paragraphs must be separated by '\n\n' There will be asymmetric context for first and last lines. Args: lines (list of str): List of lines. Returns: prev, curr, next (tuple of list of str): """ idxs = [-1] + list( filter( None, [i if len(lines[i]) == 0 else None for i in range(len(lines))])) + [len(lines)] all_prev, all_curr, all_next = [], [], [] for start, end in zip(idxs[:-1], idxs[1:]): tmp_prev, tmp_curr, tmp_next = self._make_triples_for_paragraph( lines[start + 1:end]) if tmp_prev == [] or tmp_curr == [] or tmp_next == []: continue all_prev.extend(tmp_prev) all_curr.extend(tmp_curr) all_next.extend(tmp_next) return all_prev, all_curr, all_next @staticmethod def make_lines_triples(lines): """Returns prev, curr, next lists based on lines. """ lines = filter(lambda x: x, lines) if len(lines) < TextData.NUM_LINE_TRIPLES: return [], [], [] # prev, curr, next return lines[:-2], lines[1:-1], lines[2:] def triples_data_iterator(self, prev_data, curr_data, next_data, max_len, batch_size=64, shuffle=False): """Creates iterator for (current sentence, prev sentence, next sentence) data. Is is useful for training skip-thought vectors. Args: curr_data (list of lists of ints): List with raw lines which corresponds to current sentences. Lines can be with different lengths. They will be encoder inputs. prev_data (list of lists of ints): List with raw previous lines. Lines can be with different lengths. next_data (list of lists of ints): List with raw next lines. Lines can be with different lengths. max_len (int): Maximum length for padding previous and next sentences. batch_size (int): Size of batch. shuffle (bool): Whether to shuffle data or not. Yields: enc_inp, prev_inp, prev_targ, next_inp, next_targ (Batch) """ if shuffle: indices = np.random.permutation(len(curr_data)) curr_data = [curr_data[i] for i in indices] prev_data = [prev_data[i] for i in indices] next_data = [next_data[i] for i in indices] total_processed_examples = 0 total_steps = int(np.ceil(len(curr_data) / float(batch_size))) for step in range(total_steps): batch_start = step * batch_size curr = curr_data[batch_start:batch_start + batch_size] prev = prev_data[batch_start:batch_start + batch_size] next = next_data[batch_start:batch_start + batch_size] if batch_start + batch_size > len(curr_data): num_index = min(batch_size, batch_start + batch_size - len(curr_data)) data_index = random.sample(range(len(curr_data)), num_index) for i_index in data_index: curr.append(curr_data[i_index]) prev.append(prev_data[i_index]) next.append(next_data[i_index]) enc_inp = self.make_batch(self.encode_lines(curr)) prev_inp = self.make_batch(self.encode_lines(prev, with_go=True), max_len) prev_targ = self.make_batch(self.encode_lines(prev, with_eos=True), max_len) next_inp = self.make_batch(self.encode_lines(next, with_go=True), max_len) next_targ = self.make_batch(self.encode_lines(next, with_eos=True), max_len) assert prev_inp.shape == prev_targ.shape == next_inp.shape == next_targ.shape, ( prev, curr, next) yield enc_inp, prev_inp, prev_targ, next_inp, next_targ total_processed_examples += len(curr) if total_processed_examples >= len(curr_data): break assert total_processed_examples >= len(curr_data), \ 'Expected {} and processed {}'.format(len(curr_data), total_processed_examples) def lines_curr_iterator(self, curr_data, batch_size=1, shuffle=False): """Creates iterator for current sentence data. Is is useful for predicting | encoding skip-thought vectors. """ if shuffle: indices = np.random.permutation(len(curr_data)) curr_data = [curr_data[i] for i in indices] total_processed_examples = 0 total_steps = int(np.ceil(len(curr_data) / float(batch_size))) for step in range(total_steps): batch_start = step * batch_size curr = curr_data[batch_start:batch_start + batch_size] if batch_start + batch_size > len(curr_data): num_index = min(batch_size, batch_start + batch_size - len(curr_data)) data_index = random.sample(range(len(curr_data)), num_index) for i_index in data_index: curr.append(curr_data[i_index]) enc_inp = self.make_batch(self.encode_lines(curr)) assert curr yield enc_inp total_processed_examples += len(curr) if total_processed_examples >= len(curr_data): break assert total_processed_examples >= len(curr_data), \ 'Expected {} and processed {}'.format(len(curr_data), total_processed_examples)
class SkipThoughtModel(object): """ Model skip-thought """ VOCAB_SIZE_THRESHOLD_CPU = 20000 MAX_GRADIENT_NORM = 5.0 LOG_FILE = 'skip_thought_model.log' def __init__(self, vocab_size, start_vocab, max_target_len, unit_type, num_units, num_layers, dropout, embedding_size, learning_rate, num_keep_ckpts): self.vocab_size = vocab_size # src & tgt share vocab_size self.start_vocab = start_vocab # start_vocab = ['<pad>', '<go>', '<eos>', '<unk>'] self.max_target_len = max_target_len # net-parameters self.unit_type = unit_type self.num_units = num_units self.num_layers = num_layers self.dropout = dropout self.embedding_size = embedding_size self.learning_rate = learning_rate self.embedding_share = None # net-output-data self.curr_encoder_output = None self.curr_encoder_state = None self.prev_train_logits = None self.prev_predict_logits = None self.next_train_logits = None self.next_predict_logits = None self.loss = None self.gradients = None self.train_op = None # net-transit-data self.encoder_output = None self.encoder_state = None self.prev_train_decoder_output = None self.prev_predict_decoder_output = None self.next_train_decoder_output = None self.next_predict_decoder_output = None # init-log self._logger = FinalLogger(self.LOG_FILE) # init-device self.num_gpus = 0 self._init_device_gpus() # init placeholder self._init_placeholder() # embeded init self._init_embeddings() # build graph self._build_graph() # compute and apply gradients self._build_train() # predict self._build_predict() # save train self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=num_keep_ckpts) def _init_device_gpus(self): """Init device GPU and CPU.""" gpu_names = [ x.name for x in device_lib.list_local_devices() if x.device_type == 'GPU' ] self.num_gpus = len(gpu_names) self._logger.info('{0} GPUs are detected : {1}'.format( self.num_gpus, gpu_names)) def _init_placeholder(self): """Init prev_curr_next data placeholder.""" self._logger.info('Init prev_curr_next data placeholder.') with tf.variable_scope('placeholders'): # curr input self.curr_source_data = tf.placeholder(tf.int32, [None, None], name='curr_data') self.curr_source_seq_len = tf.placeholder(tf.int32, [None], name='curr_data_seq_len') self.batch_size = tf.size(self.curr_source_seq_len, name='batch_size') # prev target self.prev_target_data_input = tf.placeholder( tf.int32, [None, None], name='prev_targets_input') self.prev_target_data_output = tf.placeholder( tf.int32, [None, None], name='prev_targets_output') self.prev_target_mask = tf.placeholder(tf.float32, [None, None], name='prev_targets_mask') self.prev_target_seq_len = tf.placeholder( tf.int32, [None], name='prev_targets_seq_len') # next target self.next_target_data_input = tf.placeholder( tf.int32, [None, None], name='next_targets_input') self.next_target_data_output = tf.placeholder( tf.int32, [None, None], name='next_targets_output') self.next_target_mask = tf.placeholder(tf.float32, [None, None], name='next_targets_mask') self.next_target_seq_len = tf.placeholder( tf.int32, [None], name='next_targets_seq_len') def _build_cell(self, unit_type, num_units, num_layers, dropout): """Build cell""" cell_list = [] for i in range(num_layers): single_cell = self._create_rnn_cell( unit_type=unit_type, num_units=num_units, dropout=dropout, device_str=self._get_device_str(i, self.num_gpus)) cell_list.append(single_cell) if len(cell_list) == 1: return cell_list[0] else: # Multi layers return tf.contrib.rnn.MultiRNNCell(cell_list) def _create_rnn_cell(self, unit_type, num_units, dropout, device_str=None): """Create rnn single-cell""" # cell if unit_type == 'lstm': single_cell = tf.contrib.rnn.LSTMCell( num_units, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) elif unit_type == 'gru': single_cell = tf.contrib.rnn.GRUCell(num_units) else: raise ValueError('Unknown cell type %s!' % unit_type) # dropout wrapper if dropout > 0.0: single_cell = tf.contrib.rnn.DropoutWrapper( cell=single_cell, input_keep_prob=(1.0 - dropout), output_keep_prob=1.0) # device wrapper if device_str: single_cell = tf.contrib.rnn.DeviceWrapper(single_cell, device_str) self._logger.info(' %s, device=%s' % (type(single_cell).__name__, device_str)) return single_cell @staticmethod def _get_device_str(device_id, num_gpus): """Return a device string for multi-GPU setup.""" if num_gpus == 0: return '/cpu:0' device_str_output = '/gpu:%d' % (device_id % num_gpus) return device_str_output def _get_embed_device(self, vocab_size): """Get embed device""" if vocab_size < self.VOCAB_SIZE_THRESHOLD_CPU and self.num_gpus > 0: return '/gpu:0' else: return '/cpu:0' def _init_embeddings(self): """Init embedding.""" # share vocab self._logger.info('Init embedding src_tgt_share.') with tf.device(self._get_embed_device(self.vocab_size)): self.embedding_share = tf.get_variable( 'embedding_share', [self.vocab_size, self.embedding_size], dtype=tf.float32) self._logger.info(' %s, device=%s' % (type(self.embedding_share).__name__, self._get_embed_device(self.vocab_size))) def _build_encoder(self, enc_scope_name): """Network encoder.""" self._logger.info('Build encoder.') with tf.variable_scope(enc_scope_name): # shape, [batch_size, max_time, embed_size] # encoder_embed_input = tf.contrib.layers.embed_sequence(self.curr_source_data, self.vocab_size, # self.embedding_size) encoder_embed_input = tf.nn.embedding_lookup( self.embedding_share, self.curr_source_data) cell = self._build_cell(self.unit_type, self.num_units, self.num_layers, self.dropout) encoder_output, encoder_state = tf.nn.dynamic_rnn( cell, encoder_embed_input, sequence_length=self.curr_source_seq_len, dtype=tf.float32) return encoder_output, encoder_state def _build_decoder(self, encoder_output, encoder_state, target_data, target_seq_len, dec_scope_name): """Network decoder.""" self._logger.info('Build %s.', dec_scope_name) with tf.variable_scope(dec_scope_name): # decoder_embeddings = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size])) cell = self._build_cell(self.unit_type, self.num_units, self.num_layers, self.dropout) # attention-model cell, encoder_state = self._build_attention( encoder_output, encoder_state, cell) # output_layer output_layer = Dense( self.vocab_size, use_bias=False, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) self._logger.info(' Build decoder train.') with tf.variable_scope(dec_scope_name + '_train'): # Data format of target_data: <GO>...<PAD> # shape: [batch_size, max_time, embed_size], type: float32. decoder_embed_input = tf.nn.embedding_lookup( self.embedding_share, target_data) train_helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_embed_input, sequence_length=target_seq_len, time_major=False) train_decoder = tf.contrib.seq2seq.BasicDecoder( cell, train_helper, encoder_state, output_layer) train_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode( train_decoder, impute_finished=True, maximum_iterations=self.max_target_len) self._logger.info(' Build decoder predict.') with tf.variable_scope(dec_scope_name + '_predict', reuse=True): # start_tokens = tf.tile( # tf.constant([self.start_vocab.index('<go>')], dtype=tf.int32), # [self.batch_size], name='start_tokens') predict_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( self.embedding_share, tf.fill([self.batch_size], self.start_vocab.index('<go>')), self.start_vocab.index('<eos>')) predict_decoder = tf.contrib.seq2seq.BasicDecoder( cell, predict_helper, encoder_state, output_layer) predict_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode( predict_decoder, impute_finished=True, maximum_iterations=self.max_target_len) return train_decoder_output, predict_decoder_output def _build_attention(self, encoder_output, encoder_state, cell): """Attention""" # attention_states: [batch_size, max_time, num_units] # attention_states = tf.transpose(encoder_output, [1, 0, 2]) attention_mechanism = tf.contrib.seq2seq.LuongAttention( self.num_units, encoder_output, memory_sequence_length=self.curr_source_seq_len) cell = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=self.num_units) decoder_initial_state = cell.zero_state( self.batch_size, tf.float32).clone(cell_state=encoder_state) cell = tf.contrib.rnn.DeviceWrapper( cell, self._get_device_str(self.num_layers - 1, self.num_gpus)) return cell, decoder_initial_state def _build_graph(self): """Build skip-thought model by seq2seq model""" self._logger.info('Build graph.') # curr_data encoder self.encoder_output, self.encoder_state = self._build_encoder( 'encoder') # prev_data decoder self.prev_train_decoder_output, self.prev_predict_decoder_output = self._build_decoder( self.encoder_output, self.encoder_state, self.prev_target_data_input, self.prev_target_seq_len, 'prev_decoder') # next_data decoder self.next_train_decoder_output, self.next_predict_decoder_output = self._build_decoder( self.encoder_output, self.encoder_state, self.next_target_data_input, self.next_target_seq_len, 'next_decoder') self._logger.info('Compute loss.') # compute loss with tf.device(self._get_device_str(self.num_layers - 1, self.num_gpus)): # prev loss prev_train_logits = tf.identity( self.prev_train_decoder_output.rnn_output, name='prev_logits') prev_loss = self._compute_loss(self.prev_target_data_output, self.prev_target_mask, prev_train_logits) # next loss next_train_logits = tf.identity( self.next_train_decoder_output.rnn_output, name='next_logits') next_loss = self._compute_loss(self.next_target_data_output, self.next_target_mask, next_train_logits) # loss self.loss = prev_loss + next_loss def _compute_loss(self, target_output, target_mask, logits): """Compute optimization loss.""" crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=target_output, logits=logits) loss = tf.reduce_sum(crossent * target_mask) / tf.to_float( self.batch_size) return loss def _build_train(self): """Train, compute and apply gradients""" self._logger.info('Build train.') params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_grads, _ = tf.clip_by_global_norm(gradients, self.MAX_GRADIENT_NORM) opt = tf.train.AdamOptimizer(self.learning_rate) self.train_op = opt.apply_gradients(zip(clipped_grads, params)) def _build_predict(self): """Predict output: curr_data encoder, prev_predict and next_predict data""" self._logger.info('Build predict.') with tf.device(self._get_device_str(self.num_layers - 1, self.num_gpus)): with tf.variable_scope('prev'): self.prev_train_logits = tf.identity( self.prev_train_decoder_output.rnn_output, name='logits') self.prev_predict_logits = tf.identity( self.prev_predict_decoder_output.sample_id, name='predictions') with tf.variable_scope('next'): self.next_train_logits = tf.identity( self.next_train_decoder_output.rnn_output, name='logits') self.next_predict_logits = tf.identity( self.next_predict_decoder_output.sample_id, name='predictions') with tf.variable_scope('curr'): self.curr_encoder_output = tf.identity(self.encoder_output, name='output') self.curr_encoder_state = tf.identity(self.encoder_state, name='state')
pass with open(FLAGS.pred_tgt_path, 'w') as f: pred_data = text_data.pro_tuple_data(FLAGS.pred_src_path, batch_size=FLAGS.pred_batch_size) for j, batch in enumerate(pred_data): if batch == text_data.ONE_LINE_TOKEN: f.write('\n'.encode('utf-8')) continue prev_predict, curr_state = sess.run( [skip_thought_model.prev_predict_logits, skip_thought_model.curr_encoder_state], feed_dict=pred_feed_dict(skip_thought_model, batch) ) predict_log.info('%d, %s', j, '------') for pred_i in prev_predict: pred_str = '' for pred_j in pred_i: pred_str += text_data.vocab.index2word[pred_j] + ',' predict_log.info(pred_str) f.write((' '.join(map(str, curr_state[-1][-1])) + ' ').encode('utf-8')) predict_log.info('Elapse time: ' + str((time.time() - start_time))) if __name__ == '__main__': predict_log = FinalLogger('skip_thought_pred.log') predict_log.info('start') tf.app.run() predict_log.info('ok')