def inference(params): embedding_size = params['embedding_size'] vocab_size = params['vocab_size'] sentence_len = params['num_words_before'] + params['num_words_after'] embedding_wd = utils.get_dict_value(params, 'embedding_wd', 0.0) embedding_device = utils.get_dict_value(params, 'embedding_device', None) embedding_initializer = utils.get_dict_value(params, 'embedding_initializer', None) embedding_keep_prob = utils.get_dict_value(params, 'embedding_keep_prob', 0.0) print("USING EMBEDDING DEVICE %s" % embedding_device) if embedding_device is not None: with tf.device(embedding_device): embedding_matrix = nlp.variable_with_weight_decay( 'embedding_matrix', [vocab_size, embedding_size], initializer=embedding_initializer, wd=embedding_wd) else: embedding_matrix = nlp.variable_with_weight_decay( 'embedding_matrix', [vocab_size, embedding_size], initializer=embedding_initializer, wd=embedding_wd) if embedding_keep_prob is not None and embedding_keep_prob < 1.0: [embedding_matrix], _ = core.dropout([embedding_matrix], [embedding_keep_prob]) input_sentence = tf.placeholder(tf.int32, [None, sentence_len], 'sentence') emb_sentence = tf.nn.embedding_lookup(embedding_matrix, input_sentence, 'emb_sentence') enc_sentence, _ = sentence_encoder(emb_sentence, params) return enc_sentence, None
def inference(params): embedding_size = params['embedding_size'] sentence_len = params['num_before'] + params['num_after'] embedding_wd = utils.get_dict_value(params, 'embedding_wd') embedding_device = utils.get_dict_value(params, 'embedding_device') embedding_initializer = utils.get_dict_value(params, 'embedding_initializer') embedding_keep_prob = utils.get_dict_value(params, 'embedding_keep_prob') word_embedding_size = utils.get_dict_value(params, 'word_embedding_size', embedding_size) vocab_size = utils.get_dict_value(params, 'vocab_size', 256) if embedding_device is not None: with tf.device(embedding_device): word_embedding_matrix = nlp.variable_with_weight_decay( 'word_embedding_matrix', [vocab_size, word_embedding_size], initializer=embedding_initializer, wd=embedding_wd) else: word_embedding_matrix = nlp.variable_with_weight_decay( 'word_embedding_matrix', [vocab_size, word_embedding_size], initializer=embedding_initializer, wd=embedding_wd) input_sentence = tf.placeholder(tf.int32, [None, sentence_len], 'sentence') emb_sentence = tf.nn.embedding_lookup(word_embedding_matrix, input_sentence, 'emb_word') if embedding_keep_prob is not None and embedding_keep_prob < 1.0: [emb_sentence], _ = core.dropout([emb_sentence], [embedding_keep_prob]) enc_sentence, _ = encoder(emb_sentence, params) return enc_sentence, None
def inference(params): # batch_size = params['batch_size'] num_steps = params['num_steps'] cell_size = params['cell_size'] vocab_size = params['vocab_size'] num_layers = params['num_layers'] cell_type = params['cell_type'] is_training = tf.get_default_graph().get_tensor_by_name('is_training:0') embedding_wd = utils.get_dict_value(params, 'embedding_wd') # embedding_device = utils.get_dict_value(params, 'embedding_device') embedding_initializer = utils.get_dict_value(params, 'embedding_initializer') embedding_keep_prob = utils.get_dict_value(params, 'embedding_keep_prob') rnn_dropout_keep_prob = utils.get_dict_value(params, 'rnn_dropout_keep_prob', 1.0) cell_activation = utils.get_dict_value(params, 'cell_activation', None) embedding_matrix = nlp.variable_with_weight_decay( 'embedding_matrix', [vocab_size, cell_size], initializer=embedding_initializer, wd=embedding_wd) words = tf.placeholder(tf.int32, [None, None], name='x') emb_words = tf.nn.embedding_lookup(embedding_matrix, words, 'emb_words') # add dropout if needed if embedding_keep_prob is not None and embedding_keep_prob < 1.0: [emb_words], _ = core.dropout([emb_words], [embedding_keep_prob]) if num_layers > 1: cell_list = [] for _ in range(num_layers): if cell_type == 'GRU': cell = tf.contrib.rnn.GRUCell(cell_size) elif cell_type == 'BlockLSTM': cell = tf.contrib.rnn.LSTMBlockCell(cell_size) else: cell = tf.contrib.rnn.BasicLSTMCell(cell_size, activation=cell_activation) if rnn_dropout_keep_prob < 1.00: [cell], _ = core.rnn_dropout([cell], [rnn_dropout_keep_prob]) cell_list.append(cell) cell = tf.contrib.rnn.MultiRNNCell(cell_list, state_is_tuple=True) else: if cell_type == 'GRU': cell = tf.contrib.rnn.GRUCell(cell_size) elif cell_type == 'BlockLSTM': cell = tf.contrib.rnn.LSTMBlockCell(cell_size) else: cell = tf.contrib.rnn.BasicLSTMCell(cell_size, activation=cell_activation) if rnn_dropout_keep_prob < 1.00: [cell], _ = core.rnn_dropout([cell], [rnn_dropout_keep_prob]) """ # change for GRU. not yet working! + if cell_type == 'GRU': + state_placeholder = tf.placeholder(tf.float32, [num_layers, None, cell_size], name='state') + l = tf.unstack(state_placeholder, axis=0) + state = tf.reshape(state_placeholder, [None, num_layers, cell_size]) + else: + state_placeholder = tf.placeholder(tf.float32, [num_layers, 2, None, cell_size], name='state') + l = tf.unstack(state_placeholder, axis=0) + state = tuple( + [tf.nn.rnn_cell.LSTMStateTuple(l[idx][0], l[idx][1]) + for idx in range(num_layers)] + ) """ state_placeholder = tf.placeholder(tf.float32, [num_layers, 2, None, cell_size], name='state') l = tf.unstack(state_placeholder, axis=0) state = tuple([ tf.nn.rnn_cell.LSTMStateTuple(l[idx][0], l[idx][1]) for idx in range(num_layers) ]) outputs, final_state = tf.nn.dynamic_rnn(cell, emb_words, initial_state=state) final = tf.identity(final_state, name='final_state') output = tf.reshape(tf.concat(outputs, 1), [-1, cell_size]) softmax_w = nlp.variable_with_weight_decay('softmax_w', [cell_size, vocab_size]) softmax_b = nlp.variable_with_weight_decay('softmax_b', [vocab_size]) logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) logits = tf.reshape(logits, [-1, num_steps, vocab_size], name='output_logits') if utils.get_dict_value(params, 'use_single_sm', False): smei = tf.placeholder(tf.int32, [None, None], name='smei') # softmax evaluation index exp_logits = tf.exp(logits) numerator = tf.gather_nd( exp_logits, tf.stack([ tf.tile(tf.expand_dims(tf.range(tf.shape(smei)[0]), 1), [1, tf.shape(smei)[1]]), tf.transpose( tf.tile(tf.expand_dims(tf.range(tf.shape(smei)[1]), 1), [1, tf.shape(smei)[0]])), smei ], 2)) logits_smei = tf.divide(numerator, tf.reduce_sum(exp_logits, axis=-1), 'output_single_sm') logits_sm = tf.nn.softmax(logits, name='output_logits_sm') return [logits]