def _get_multi_valid_ops(): """ Defines multi-device OPs used to evaluate the model; used for the external validation step only. CURRENTLY UNUSED. """ # TODO: DEPRECATED! # Track tower-wise outputs tower_batch_losses = list() tower_sentence_losses = list() tower_words = list() with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE) as outer_scope: for gpu_id, gpu in enumerate(operators): name = 'tower_{}'.format(gpu_id) # Assign variables to the CPU and tensor OPs to GPUs with tf.device(assign_to_device(controller, gpu)), tf.name_scope(name): # Compute and store losses and gradients next_batch = iterator.get_next() _, _, _, batch_loss, sentence_loss, words_processed = model.train_model(next_batch) # Training/ validation OPs tower_batch_losses.append(batch_loss) tower_words.append(words_processed) tower_sentence_losses.append(sentence_loss) # Reuse variables outer_scope.reuse_variables() # Merged validation OPs averaged_batch_loss = tf.reduce_mean(tower_batch_losses) joint_sentence_losses = tf.concat(tower_sentence_losses, axis=0) total_words_processed = tf.reduce_sum(tower_words) valid_ops = [averaged_batch_loss, joint_sentence_losses, total_words_processed] return valid_ops
def _get_valid_ops(next_batch): """ Defines single-device OPs used to evaluate the model; used for the external validation step only. """ # TODO: DEPRECATED! with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): with tf.device(assign_to_device(controller, operators[0])): # Surface OPs _, _, _, batch_loss, sentence_loss, words_processed = model.train_model(next_batch) valid_ops = [batch_loss, sentence_loss, words_processed] return valid_ops
def _get_translation_ops(next_batch): """ Defines single-device OPs used to obtain translations from the model. """ with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): with tf.device(assign_to_device(controller, operators[0])): # Surface OPs greedy_translations, _, _ = model.decode_greedy(next_batch) sampled_translations, _ = model.decode_with_sampling(next_batch) beam_translations, beam_scores = model.decode_with_beam_search(next_batch) translation_ops = [next_batch[0], next_batch[2], greedy_translations, sampled_translations, beam_translations, beam_scores] return translation_ops
def _get_train_ops(next_batch): """ Defines single-device OPs used to train the model. """ with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): with tf.device(assign_to_device(controller, operators[0])): # Surface OPs grads_and_vars, _, batch_loss, sentence_loss, words_processed, _ = model.train_model(next_batch) proto_train_ops = [grads_and_vars, batch_loss, sentence_loss, words_processed] # Create summaries if not no_summaries: summaries = model.get_summaries(batch_loss) proto_train_ops.append(summaries) return proto_train_ops
def _get_multi_translation_ops(): """ Defines multi-device OPs used to obtain translations from the model. CURRENTLY UNUSED. """ # Track tower-wise outputs tower_greedy_translations = list() tower_sampled_translations = list() tower_beam_translations = list() tower_beam_scores = list() with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE) as outer_scope: for gpu_id, gpu in enumerate(operators): name = 'tower_{}'.format(gpu_id) # Assign variables to the CPU and tensor OPs to GPUs with tf.device(assign_to_device(controller, gpu)), tf.name_scope(name): # Translation OPs (output has to be padded to same size before concatenation) next_batch = iterator.get_next() greedy_translations, _, _ = model.decode_greedy(next_batch) sampled_translations, _ = model.decode_with_sampling(next_batch) beam_translations, beam_scores = model.decode_with_beam_search(next_batch) tower_greedy_translations.append(greedy_translations) tower_sampled_translations.append(sampled_translations) tower_beam_translations.append(beam_translations) tower_beam_scores.append(beam_scores) # Reuse variables outer_scope.reuse_variables() with tf.name_scope('translation'), tf.device('/cpu:0'): # Merged translation OPs greedy_mst = tf.reduce_max([tf.shape(batch)[-1] for batch in tower_greedy_translations]) sampled_mst = tf.reduce_max([tf.shape(batch)[-1] for batch in tower_sampled_translations]) beam_mst = tf.reduce_max([tf.shape(batch)[-1] for batch in tower_beam_translations]) padded_greedy_translators = [_pad_to_max_step_len(batch, greedy_mst) for batch in tower_greedy_translations] padded_sampled_translators = [_pad_to_max_step_len(batch, sampled_mst) for batch in tower_sampled_translations] padded_beam_translators = [_pad_to_max_step_len(batch, beam_mst, True) for batch in tower_beam_translations] joint_greedy_translators = tf.concat(padded_greedy_translators, axis=0) joint_sampled_translators = tf.concat(padded_sampled_translators, axis=0) joint_beam_translators = tf.concat(padded_beam_translators, axis=0) joint_beam_scores = tf.concat(tower_beam_scores, axis=0) translation_ops = \ [joint_greedy_translators, joint_sampled_translators, joint_beam_translators, joint_beam_scores] return translation_ops
def _get_train_ops(): """ Defines multi-device OPs used to train the model. """ # Track tower-wise outputs tower_grads_and_vars = list() tower_batch_losses = list() tower_sentence_losses = list() tower_words = list() tower_targets = list() with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE) as outer_scope: for gpu_id, gpu in enumerate(operators): try: name = 'tower_{}'.format(gpu_id) # Assign variables to the CPU and tensor OPs to GPUs with tf.device(assign_to_device(controller, gpu)), tf.name_scope(name): # Compute and store losses and gradients next_batch = iterator.get_next() grads_and_vars, _, batch_loss, sentence_loss, words_processed, words_evaluated = \ model.train_model(next_batch) # Training OPs tower_grads_and_vars.append(grads_and_vars) tower_batch_losses.append(batch_loss) tower_words.append(words_processed) tower_sentence_losses.append(sentence_loss) tower_targets.append(words_evaluated) # Reuse variables outer_scope.reuse_variables() # TODO: Not sure if this does anything, as code is used at graph construction time except tf.errors.OutOfRangeError: break # TODO: Same here if len(tower_grads_and_vars) == 0: raise tf.errors.OutOfRangeError # Weigh batch gradients based on the number of words contained within the batch max_tokens = tf.cast(tf.reduce_max(tower_targets), dtype=tf.int32) tower_weights = [tf.to_float(token_count / max_tokens) for token_count in tower_words] # Average grads averaged_grads_and_vars = list() for grads_and_vars in zip(*tower_grads_and_vars): grads = [grad for grad, _ in grads_and_vars] var = grads_and_vars[0][1] if type(grads[0]) != tf.IndexedSlices: # Apply tower weights grads = [grads[tower_id] * tower_weights[tower_id] for tower_id in range(len(grads))] averaged_grad = tf.reduce_mean(grads, 0) else: # Concatenate IndexedSlices (equivalent to averaging of tensors) values = [grads[tower_id].values * tower_weights[tower_id] for tower_id in range(len(grads))] joint_values = tf.concat(values, axis=0) joint_indices = tf.concat([grad.indices for grad in grads], axis=0) averaged_grad = \ tf.IndexedSlices(values=joint_values, indices=joint_indices, dense_shape=grads[0].dense_shape) averaged_grad_and_var = (averaged_grad, var) averaged_grads_and_vars.append(averaged_grad_and_var) # Average losses averaged_batch_loss = tf.reduce_mean(tower_batch_losses) joint_sentence_losses = tf.concat(tower_sentence_losses, axis=0) total_words_processed = tf.reduce_sum(tower_words) # Compile OPs and add summaries proto_train_ops = [averaged_grads_and_vars, averaged_batch_loss, joint_sentence_losses, total_words_processed] if not no_summaries: # Create summaries averaged_summaries = model.get_summaries(averaged_batch_loss) proto_train_ops.append(averaged_summaries) # Proto-OPs are forwarded to gradient accumulation or optimization return proto_train_ops
def main(): #Constants DATASET_PATH = os.path.join(".") LEARNING_RATE_1 = 0.0001 EPOCHS = 2 BATCH_SIZE = 32 NUM_CLASSES = 48 Z_SCORE = 1.96 WEIGHT_DECAY_1 = 0.0005 print("Current Setup:-") print( "Starting Learning Rate: {}, Epochs: {}, Batch Size: {}, Confidence Interval Z-Score {}, Number of classes: {}, Starting Weight Decay: {}" .format(LEARNING_RATE_1, EPOCHS, BATCH_SIZE, Z_SCORE, NUM_CLASSES, WEIGHT_DECAY_1)) #Get the number of GPUs NUM_GPUS = util.get_available_gpus() print("Number of GPUs available : {}".format(NUM_GPUS)) with tf.device('/cpu:0'): tower_grads = [] reuse_vars = False dataset_len = 1207350 #Placeholders learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate') weight_decay = tf.placeholder(tf.float32, shape=[], name="weight_decay") for i in range(NUM_GPUS): with tf.device( util.assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')): #Need to split data between GPUs train_features, train_labels, train_filenames = util.train_input_fn( DATASET_PATH, BATCH_SIZE, EPOCHS) print("At GPU {}, Train Features : {}".format( i, train_features)) #Model _, train_op, tower_grads, train_cross_entropy, train_conf_matrix_op, train_accuracy, reuse_vars = initiate_vgg_model( train_features, train_labels, train_filenames, NUM_CLASSES, weight_decay, learning_rate, reuse=reuse_vars, tower_grads=tower_grads, gpu_num=i, handle="training") #tf.summary.scalar("training_confusion_matrix", tf.reshape(tf.cast(conf_matrix_op, tf.float32),[1, NUM_CLASSES, NUM_CLASSES, 1])) tower_grads = util.average_gradients(tower_grads) train_op = train_op.apply_gradients(tower_grads) saver = tf.train.Saver() if not os.path.exists(os.path.join("./multi_dl_research_train/")): os.mkdir(os.path.join("./multi_dl_research_train/")) with tf.Session() as sess: with np.printoptions(threshold=np.inf): writer = tf.summary.FileWriter("./multi_tensorboard_logs/") writer.add_graph(sess.graph) merged_summary = tf.summary.merge_all() train_highest_acc = 0 sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) for epoch in range(EPOCHS): if epoch == 18: LEARNING_RATE_1 = 0.00005 print("Learning Rate changed to {} at epoch {}".format( LEARNING_RATE_1, epoch)) elif epoch == 29: LEARNING_RATE_1 = 0.00001 WEIGHT_DECAY_1 = 0.0 print("Learning Rate changed to {} at epoch {}".format( LEARNING_RATE_1, epoch)) print("Weight Decay changed to {} at epoch {}".format( WEIGHT_DECAY_1, epoch)) elif epoch == 42: LEARNING_RATE_1 = 0.000005 print("Learning Rate changed to {} at epoch {}".format( LEARNING_RATE_1, epoch)) elif epoch == 51: LEARNING_RATE_1 = 0.000001 print("Learning Rate changed to {} at epoch {}".format( LEARNING_RATE_1, epoch)) print("Current Epoch: {}".format(epoch)) for i in range(2): print("Current Training Iteration : {}/{}".format( i, 10)) train_acc, _, _, train_ce, train_summary = util.training( BATCH_SIZE, NUM_CLASSES, learning_rate, weight_decay, sess, train_op, train_conf_matrix_op, LEARNING_RATE_1, WEIGHT_DECAY_1, train_cross_entropy, merged_summary, train_accuracy) train_value1, train_value2 = util.confidence_interval( train_acc, Z_SCORE, 32) print("Training Accuracy : {}".format(train_acc)) print("Training Loss (Cross Entropy) : {}".format( train_ce)) print("Training Confidence Interval: [{} , {}]".format( train_value2, train_value1)) if train_highest_acc <= train_acc: train_highest_acc = train_acc print( "Highest Training Accuracy Reached: {}".format( train_highest_acc)) #For every epoch, we will save the model saver.save( sess, os.path.join("./multi_dl_research_train/", "model.ckpt")) print( "Latest Model is saving and Tensorboard Logs are updated" ) writer.add_summary( train_summary, epoch * int((dataset_len * 0.8) / BATCH_SIZE) + i)