def data_pipeline_calls( features, params, mode, GPU ): init = tf.initializers.truncated_normal(0.0, 0.01) # if you want to train the embeds from scratch # embedding_vectors = tf.get_variable(name='embedding_vectors', shape=(params['vocab_size'], 512), initializer=init) # embedded_input = tf.nn.embedding_lookup(embedding_vectors, features['encoder_inputs']) # If you don't want to train the embeddings: lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab']) embedded_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs']) forget_bias = get_forget_bias(params, mode) with tf.device(GPU['sidekick']): # high_dim_embedding_vecs = tf.layers.dense(embedded_input, units=512, activation=tf.nn.relu) positional_embeddings = tf.get_variable('position_embedding', shape=(params['input_max_length'], 50)) positions = tf.range(params['input_max_length']) positions = tf.reshape(tf.tile(positions, [params['batch_size']]), (-1, params['input_max_length'])) position_embeddings = tf.cast(tf.nn.embedding_lookup(positional_embeddings, positions), tf.float32) transformer_input = tf.add(embedded_input, position_embeddings) transformer_input = tf.nn.dropout(transformer_input, keep_prob=0.5) return transformer_input
def model_fn(features, labels, mode, params): # particular to this project word2index = params['word2index'] # index2word = params['index2word'] GPUs = get_available_gpus() GPU = { 'titan': GPUs[1], 'sidekick': GPUs[0]} lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab']) embedded_enc_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs']) forget_bias = get_forget_bias(params, mode) num_units = [2048, 2048] init = tf.initializers.truncated_normal(0.0, 0.01) with tf.device(GPU['titan']): encoder_cells = [tf.nn.rnn_cell.LSTMCell(num_units=num, forget_bias=forget_bias, initializer=init) for num in num_units] encoder_stacked_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(encoder_cells) enc_outputs, enc_final_state = tf.nn.dynamic_rnn(encoder_stacked_rnn_cell, embedded_enc_input, sequence_length=features['encoder_input_lengths'], dtype=tf.float32) # Decoder model with tf.device(GPU['sidekick']): partial_embedding_helper = partial(embedding_helper, emb_vectors=emb_vectors) if mode == tf.estimator.ModeKeys.TRAIN: embed_dec_inputs = tf.nn.embedding_lookup(emb_vectors, features['decoder_inputs']) helper = tf.contrib.seq2seq.TrainingHelper( inputs=embed_dec_inputs, sequence_length=features['decoder_input_lengths'], ) else: helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=partial_embedding_helper, start_tokens=tf.tile([word2index['<GO>']], [tf.shape(features['encoder_inputs'])[0]]), end_token=word2index['<EOS>']) dec_cell = tf.nn.rnn_cell.LSTMCell(num_units=num_units[-1], # needs to match size of last layer of encoder forget_bias=forget_bias, initializer=init) decoder = tf.contrib.seq2seq.BasicDecoder( cell=dec_cell, helper=helper, initial_state=enc_final_state[-1], output_layer=Dense(params['vocab_size'], use_bias=False)) dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=False, impute_finished=True, maximum_iterations=params['output_max_length']) logits = tf.identity(dec_outputs.rnn_output, 'logits') if mode == tf.estimator.ModeKeys.PREDICT: indices = predict_words(logits) predictions = {'sentence_tokens': indices} return tf.estimator.EstimatorSpec(mode, predictions=predictions) training_labels = labels['target_sequences'] weights = tf.cast(tf.cast(tf.not_equal(training_labels, tf.constant(word2index['<PAD>'])), tf.bool), tf.float32) sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=training_labels, weights=weights) tf.summary.scalar('sequence_loss', sequence_loss) if mode == tf.estimator.ModeKeys.EVAL: metrics = {'loss': sequence_loss} return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, eval_metric_ops=metrics) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train_op = optimizer.minimize(sequence_loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, train_op=train_op)
def model_fn(features, labels, mode, params): # particular to this project word2index = params['word2index'] GPUs = get_available_gpus() CPUs = get_available_cpus() GPU = { 'titan': GPUs[0], 'sidekick': GPUs[1]} CPU = { 'main_cpu': CPUs[0] } lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab']) embedded_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs']) forget_bias = get_forget_bias(params, mode) positional_embeddings = tf.get_variable('position_embedding', shape=(params['input_max_length'], 50)) positions = tf.range(features['encoder_input_lengths']) position_embeddings = tf.cast(tf.nn.embedding_lookup(positional_embeddings, positions), tf.float32) num_units = [256] init = tf.initializers.truncated_normal(0.0, 0.01) embedded_enc_input = tf.add(embedded_input, position_embeddings) with tf.device(GPU['titan']): init = tf.initializers.truncated_normal(0.0, 0.01) three_channel = tf.expand_dims(embedded_enc_input, axis=3) conv = tf.layers.conv2d(tf.cast(three_channel, tf.float32), 126, (5, 5), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv1') conv = tf.layers.conv2d(conv, 32, (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv2') conv = tf.layers.conv2d(conv, 16, (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv3') conv = tf.layers.conv2d(conv, 8, (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv4') conv = tf.layers.conv2d(conv, 16, (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv5') conv = tf.layers.conv2d(conv, 32, (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv6') # start_flat = tf.reduce_mean(start_transformed, axis=1, name='encoded_text') start_flat = tf.layers.flatten(conv6, name='start_flatten') # end_flat = tf.layers.flatten(end_transformed, name='end_flatten') # start pred start_hidden = tf.layers.dense(start_flat, units=params['input_max_length']) start_predictions = tf.layers.dense(start_hidden, units=1, activation=tf.nn.sigmoid, kernel_initializer=init()) start_predictions_transformed = transform_to_range(start_predictions, min_value=0, max_value=params['input_max_length']) # end pred # end_input = tf.concat((start_predictions, end_flat), 1, name='end_input') # end_hidden = tf.layers.dense(end_input, units=params['input_max_length']) # end_predictions = tf.layers.dense(end_hidden, activation=tf.nn.sigmoid, use_bias=True, units=1) # end_predictions_transformed = transform_to_range(end_predictions, min_value=0, max_value=params['input_max_length']) if mode == tf.estimator.ModeKeys.PREDICT: starts = tf.to_int32(start_predictions_transformed) ends = tf.to_int32(end_predictions_transformed) predictions = {'question_starts': starts}#, 'question_ends': ends} return tf.estimator.EstimatorSpec(mode, predictions=predictions) starts = labels['starts'] # compute losses print(labels['starts']) question_start_labels = tf.reshape(tf.to_float(labels['starts']), ( -1, 1)) # question_end_labels = tf.reshape(tf.to_float(labels['stops']), (-1, 1)) start_loss = tf.losses.mean_squared_error(labels=question_start_labels, predictions=start_predictions_transformed) # end_loss = tf.losses.mean_squared_error(labels=question_end_labels, predictions=end_predictions_transformed) # order_penalty = tf.cast( # tf.divide( # tf.cast( # tf.nn.relu(start_predictions_transformed - end_predictions_transformed), # tf.float32), # tf.constant(10.0, tf.float32) # ), tf.float32 # ) # zero_spread_penalty = tf.cast(tf.reduce_sum(tf.abs(start_predictions_transformed - end_predictions_transformed)), tf.float32) combined_loss = start_loss# + end_loss + tf.reduce_mean(order_penalty)# + zero_spread_penalty tf.summary.scalar('start_loss', start_loss) # tf.summary.scalar('end_loss', end_loss) # tf.summary.scalar('penalty_loss', tf.reduce_mean(order_penalty)) # tf.summary.scalar('zero_spread_penalty', zero_spread_penalty) if mode == tf.estimator.ModeKeys.EVAL: metrics = { 'start_loss': combined_loss } return tf.estimator.EstimatorSpec(mode, loss=combined_loss, eval_metric_ops=metrics) global_step = tf.train.get_global_step() # starter_learning_rate = 0.1 # learning_rate = tf.train.exponential_decay( # learning_rate=starter_learning_rate, # global_step=global_step, # decay_steps=100000, # decay_rate=0.96, # staircase=False, # name='lr_decay_rate') learning_rate = 0.0001 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsiolon=1e-09) gvs = optimizer.compute_gradients(combined_loss) capped_gvs = [(tf.clip_by_value(grad, -.5, .5), var) for grad, var in gvs] train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=combined_loss, train_op=train_op)
def model_fn(features, labels, mode, params): word2index = params['word2index'] # index2word = params['index2word'] lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab']) embedded_enc_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs']) forget_bias = get_forget_bias(params, mode) init = tf.initializers.truncated_normal(0.0, 0.01) num_units = [params['num_rnn_units_1'], params['num_rnn_units_2']] cells = [tf.nn.rnn_cell.LSTMCell(num_units=n, forget_bias=forget_bias, initializer=init) for n in num_units] stacked_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(cells) enc_outputs, enc_final_state = tf.nn.dynamic_rnn(stacked_rnn_cell, embedded_enc_input, sequence_length=features['encoder_input_lengths'], dtype=tf.float32) # Classifier init = tf.initializers.truncated_normal(0.0, .001) average_outputs = tf.reduce_mean(enc_outputs, axis=1) fcl = tf.layers.dense(average_outputs, params['dense_1'], activation=tf.nn.relu, kernel_initializer=init) fc2 = tf.layers.dense(fcl, params['dense_2'], activation=tf.nn.relu, kernel_initializer=init) class_logits = tf.layers.dense(fc2, params['num_classes'], kernel_initializer=init) probabilities = tf.nn.softmax(class_logits) pred_num_q = tf.argmax(probabilities, axis=1) one_hot_labels = tf.one_hot(labels['num_questions_labels'], params['num_classes']) crossentropy_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits_v2(labels=one_hot_labels, logits=class_logits)) accuracy = tf.metrics.accuracy(labels=labels['num_questions_labels'], predictions=pred_num_q, name='accuracy_op') # Decoder model partial_embedding_helper = partial(embedding_helper, emb_vectors=emb_vectors) if mode == tf.estimator.ModeKeys.TRAIN: embed_dec_inputs = tf.nn.embedding_lookup(emb_vectors, features['decoder_inputs']) helper = tf.contrib.seq2seq.TrainingHelper( inputs=embed_dec_inputs, sequence_length=features['decoder_input_lengths'], ) else: helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=partial_embedding_helper, start_tokens=tf.tile([word2index['<GO>']], [tf.shape(features['encoder_inputs'])[0]]), end_token=word2index['<EOS>']) dec_cell = tf.nn.rnn_cell.LSTMCell(num_units=params['num_units'], forget_bias=forget_bias, initializer=init ) decoder = tf.contrib.seq2seq.BasicDecoder( cell=dec_cell, helper=helper, initial_state=enc_final_state[1], output_layer=Dense(params['vocab_size'], use_bias=False)) dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=False, impute_finished=True, maximum_iterations=params['output_max_length']) logits = tf.identity(dec_outputs.rnn_output, 'logits') if mode == tf.estimator.ModeKeys.PREDICT: indices = predict_words(logits) predictions = {'sentence_tokens': indices} return tf.estimator.EstimatorSpec(mode, predictions=predictions) training_labels = labels['target_sequences'] weights = tf.cast(tf.cast(tf.not_equal(training_labels, tf.constant(word2index['<PAD>'])), tf.bool), tf.float32) sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=training_labels, weights=weights) tf.summary.scalar('sequence_loss', sequence_loss) tf.summary.scalar('crossentropy_loss', crossentropy_loss) tf.summary.scalar('accuracy', accuracy) if mode == tf.estimator.ModeKeys.EVAL: metrics = { 'accuracy': accuracy, 'sequence_loss': sequence_loss, 'cross_entropy_loss': crossentropy_loss } return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, eval_metric_ops=metrics) total_loss = sequence_loss + crossentropy_loss optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate']) train_op = optimizer.minimize(total_loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, train_op=train_op)
def model_fn(features, labels, mode, params): # particular to this project word2index = params['word2index'] # index2word = params['index2word'] GPUs = get_available_gpus() GPU = {'titan': GPUs[1], 'sidekick': GPUs[0]} lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab']) embedded_enc_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs']) forget_bias = get_forget_bias(params, mode) num_units = [1024, 512] init = tf.initializers.truncated_normal(0.0, 0.01, seed=42) # forward_cells = [ for num in num_units] # reverse_cells = [tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(num, initializer=init), input_keep_prob=forget_bias) for num in num_units] def make_cell(size, scope): with tf.variable_scope(scope): cell = tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.LSTMCell( size, initializer=init), input_keep_prob=forget_bias) return cell with tf.device(GPU['titan']): output = embedded_enc_input for count, units in enumerate(num_units, 1): cell = make_cell(units, scope='BLSTM_' + str(count)) # cell_bw = reverse_cells[count] state_fw = cell.zero_state(params['batch_size'], tf.float32) # state_bw = cell_bw.zero_state(params['batch_size'], tf.float32) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_state, encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn( cell, cell, output, initial_state_fw=state_fw, initial_state_bw=state_fw, scope='bRNN_' + str(count), dtype=tf.float32) output = tf.concat([encoder_fw_outputs, encoder_bw_outputs], axis=2) encoder_state_c = tf.concat((encoder_fw_state.c, encoder_bw_state.c), 1, name='bidirectional_concat_c') encoder_state_h = tf.concat((encoder_fw_state.h, encoder_bw_state.h), 1, name='bidirectional_concat_h') encoder_state = tf.nn.rnn_cell.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h) # Decoder model with tf.device(GPU['sidekick']): partial_embedding_helper = partial(embedding_helper, emb_vectors=emb_vectors) if mode == tf.estimator.ModeKeys.TRAIN: embed_dec_inputs = tf.nn.embedding_lookup( emb_vectors, features['decoder_inputs']) helper = tf.contrib.seq2seq.TrainingHelper( inputs=embed_dec_inputs, sequence_length=features['decoder_input_lengths'], ) else: helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=partial_embedding_helper, start_tokens=tf.tile( [word2index['<GO>']], [tf.shape(features['encoder_inputs'])[0]]), end_token=word2index['<EOS>']) dec_cell = tf.nn.rnn_cell.LSTMCell( num_units=num_units[-1] * 2, # needs to match size of last layer of encoder forget_bias=forget_bias, initializer=init) decoder = tf.contrib.seq2seq.BasicDecoder(cell=dec_cell, helper=helper, initial_state=encoder_state, output_layer=Dense( params['vocab_size'], use_bias=False)) dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=False, impute_finished=True, maximum_iterations=params['output_max_length']) logits = tf.identity(dec_outputs.rnn_output, 'logits') if mode == tf.estimator.ModeKeys.PREDICT: indices = predict_words(logits) predictions = {'sentence_tokens': indices} return tf.estimator.EstimatorSpec(mode, predictions=predictions) training_labels = labels['target_sequences'] weights = tf.cast( tf.cast( tf.not_equal(training_labels, tf.constant(word2index['<PAD>'])), tf.bool), tf.float32) sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=training_labels, weights=weights) tf.summary.scalar('sequence_loss', sequence_loss) if mode == tf.estimator.ModeKeys.EVAL: metrics = {'loss': sequence_loss} return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, eval_metric_ops=metrics) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train_op = optimizer.minimize(sequence_loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, train_op=train_op)