Python assign_to_device Examples, util.assign_to_device Python Examples

Example #1

0

Show file

    def _get_multi_valid_ops():
        """ Defines multi-device OPs used to evaluate the model; used for the external validation step only.
        CURRENTLY UNUSED. """
        # TODO: DEPRECATED!
        # Track tower-wise outputs
        tower_batch_losses = list()
        tower_sentence_losses = list()
        tower_words = list()

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE) as outer_scope:
            for gpu_id, gpu in enumerate(operators):
                name = 'tower_{}'.format(gpu_id)
                # Assign variables to the CPU and tensor OPs to GPUs
                with tf.device(assign_to_device(controller, gpu)), tf.name_scope(name):
                    # Compute and store losses and gradients
                    next_batch = iterator.get_next()
                    _, _, _, batch_loss, sentence_loss, words_processed = model.train_model(next_batch)

                    # Training/ validation OPs
                    tower_batch_losses.append(batch_loss)
                    tower_words.append(words_processed)
                    tower_sentence_losses.append(sentence_loss)

                # Reuse variables
                outer_scope.reuse_variables()

            # Merged validation OPs
            averaged_batch_loss = tf.reduce_mean(tower_batch_losses)
            joint_sentence_losses = tf.concat(tower_sentence_losses, axis=0)
            total_words_processed = tf.reduce_sum(tower_words)
            valid_ops = [averaged_batch_loss, joint_sentence_losses, total_words_processed]
        return valid_ops

Example #2

0

Show file

    def _get_valid_ops(next_batch):
        """ Defines single-device OPs used to evaluate the model; used for the external validation step only. """
        # TODO: DEPRECATED!
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            with tf.device(assign_to_device(controller, operators[0])):
                # Surface OPs
                _, _, _, batch_loss, sentence_loss, words_processed = model.train_model(next_batch)

        valid_ops = [batch_loss, sentence_loss, words_processed]
        return valid_ops

Example #3

0

Show file

    def _get_translation_ops(next_batch):
        """ Defines single-device OPs used to obtain translations from the model. """
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            with tf.device(assign_to_device(controller, operators[0])):
                # Surface OPs
                greedy_translations, _, _ = model.decode_greedy(next_batch)
                sampled_translations, _ = model.decode_with_sampling(next_batch)
                beam_translations, beam_scores = model.decode_with_beam_search(next_batch)

        translation_ops = [next_batch[0], next_batch[2], greedy_translations, sampled_translations,
                           beam_translations, beam_scores]
        return translation_ops

Example #4

0

Show file

    def _get_train_ops(next_batch):
        """ Defines single-device OPs used to train the model. """
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            with tf.device(assign_to_device(controller, operators[0])):
                # Surface OPs
                grads_and_vars, _, batch_loss, sentence_loss, words_processed, _ = model.train_model(next_batch)
                proto_train_ops = [grads_and_vars, batch_loss, sentence_loss, words_processed]

        # Create summaries
        if not no_summaries:
            summaries = model.get_summaries(batch_loss)
            proto_train_ops.append(summaries)
        return proto_train_ops

Example #5

0

Show file

    def _get_multi_translation_ops():
        """ Defines multi-device OPs used to obtain translations from the model. CURRENTLY UNUSED. """
        # Track tower-wise outputs
        tower_greedy_translations = list()
        tower_sampled_translations = list()
        tower_beam_translations = list()
        tower_beam_scores = list()

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE) as outer_scope:
            for gpu_id, gpu in enumerate(operators):
                name = 'tower_{}'.format(gpu_id)
                # Assign variables to the CPU and tensor OPs to GPUs
                with tf.device(assign_to_device(controller, gpu)), tf.name_scope(name):
                    # Translation OPs (output has to be padded to same size before concatenation)
                    next_batch = iterator.get_next()
                    greedy_translations, _, _ = model.decode_greedy(next_batch)
                    sampled_translations, _ = model.decode_with_sampling(next_batch)
                    beam_translations, beam_scores = model.decode_with_beam_search(next_batch)

                    tower_greedy_translations.append(greedy_translations)
                    tower_sampled_translations.append(sampled_translations)
                    tower_beam_translations.append(beam_translations)
                    tower_beam_scores.append(beam_scores)
                # Reuse variables
                outer_scope.reuse_variables()

        with tf.name_scope('translation'), tf.device('/cpu:0'):
            # Merged translation OPs
            greedy_mst = tf.reduce_max([tf.shape(batch)[-1] for batch in tower_greedy_translations])
            sampled_mst = tf.reduce_max([tf.shape(batch)[-1] for batch in tower_sampled_translations])
            beam_mst = tf.reduce_max([tf.shape(batch)[-1] for batch in tower_beam_translations])

            padded_greedy_translators = [_pad_to_max_step_len(batch, greedy_mst) for batch in tower_greedy_translations]
            padded_sampled_translators = [_pad_to_max_step_len(batch, sampled_mst) for batch in
                                          tower_sampled_translations]
            padded_beam_translators = [_pad_to_max_step_len(batch, beam_mst, True) for batch in tower_beam_translations]

            joint_greedy_translators = tf.concat(padded_greedy_translators, axis=0)
            joint_sampled_translators = tf.concat(padded_sampled_translators, axis=0)
            joint_beam_translators = tf.concat(padded_beam_translators, axis=0)
            joint_beam_scores = tf.concat(tower_beam_scores, axis=0)
            translation_ops = \
                [joint_greedy_translators, joint_sampled_translators, joint_beam_translators, joint_beam_scores]
        return translation_ops

Example #6

0

Show file

    def _get_train_ops():
        """ Defines multi-device OPs used to train the model. """
        # Track tower-wise outputs
        tower_grads_and_vars = list()
        tower_batch_losses = list()
        tower_sentence_losses = list()
        tower_words = list()
        tower_targets = list()

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE) as outer_scope:
            for gpu_id, gpu in enumerate(operators):
                try:
                    name = 'tower_{}'.format(gpu_id)
                    # Assign variables to the CPU and tensor OPs to GPUs
                    with tf.device(assign_to_device(controller, gpu)), tf.name_scope(name):
                        # Compute and store losses and gradients
                        next_batch = iterator.get_next()

                        grads_and_vars, _, batch_loss, sentence_loss, words_processed, words_evaluated = \
                            model.train_model(next_batch)
                        # Training OPs
                        tower_grads_and_vars.append(grads_and_vars)
                        tower_batch_losses.append(batch_loss)
                        tower_words.append(words_processed)
                        tower_sentence_losses.append(sentence_loss)
                        tower_targets.append(words_evaluated)

                    # Reuse variables
                    outer_scope.reuse_variables()

                # TODO: Not sure if this does anything, as code is used at graph construction time
                except tf.errors.OutOfRangeError:
                    break
        # TODO: Same here
        if len(tower_grads_and_vars) == 0:
            raise tf.errors.OutOfRangeError

        # Weigh batch gradients based on the number of words contained within the batch
        max_tokens = tf.cast(tf.reduce_max(tower_targets), dtype=tf.int32)
        tower_weights = [tf.to_float(token_count / max_tokens) for token_count in tower_words]

        # Average grads
        averaged_grads_and_vars = list()
        for grads_and_vars in zip(*tower_grads_and_vars):
            grads = [grad for grad, _ in grads_and_vars]
            var = grads_and_vars[0][1]
            if type(grads[0]) != tf.IndexedSlices:
                # Apply tower weights
                grads = [grads[tower_id] * tower_weights[tower_id] for tower_id in range(len(grads))]
                averaged_grad = tf.reduce_mean(grads, 0)
            else:
                # Concatenate IndexedSlices (equivalent to averaging of tensors)
                values = [grads[tower_id].values * tower_weights[tower_id] for tower_id in range(len(grads))]
                joint_values = tf.concat(values, axis=0)

                joint_indices = tf.concat([grad.indices for grad in grads], axis=0)
                averaged_grad = \
                    tf.IndexedSlices(values=joint_values, indices=joint_indices, dense_shape=grads[0].dense_shape)
            averaged_grad_and_var = (averaged_grad, var)
            averaged_grads_and_vars.append(averaged_grad_and_var)

        # Average losses
        averaged_batch_loss = tf.reduce_mean(tower_batch_losses)
        joint_sentence_losses = tf.concat(tower_sentence_losses, axis=0)
        total_words_processed = tf.reduce_sum(tower_words)
        # Compile OPs and add summaries
        proto_train_ops = [averaged_grads_and_vars, averaged_batch_loss, joint_sentence_losses, total_words_processed]
        if not no_summaries:
            # Create summaries
            averaged_summaries = model.get_summaries(averaged_batch_loss)
            proto_train_ops.append(averaged_summaries)
        # Proto-OPs are forwarded to gradient accumulation or optimization
        return proto_train_ops

Example #7

0

Show file

File: multiple_gpu_train.py Project: bcjr1997/DeepLearningResearch

def main():
    #Constants
    DATASET_PATH = os.path.join(".")
    LEARNING_RATE_1 = 0.0001
    EPOCHS = 2
    BATCH_SIZE = 32
    NUM_CLASSES = 48
    Z_SCORE = 1.96
    WEIGHT_DECAY_1 = 0.0005

    print("Current Setup:-")
    print(
        "Starting Learning Rate: {}, Epochs: {}, Batch Size: {}, Confidence Interval Z-Score {}, Number of classes: {}, Starting Weight Decay: {}"
        .format(LEARNING_RATE_1, EPOCHS, BATCH_SIZE, Z_SCORE, NUM_CLASSES,
                WEIGHT_DECAY_1))

    #Get the number of GPUs
    NUM_GPUS = util.get_available_gpus()

    print("Number of GPUs available : {}".format(NUM_GPUS))
    with tf.device('/cpu:0'):
        tower_grads = []
        reuse_vars = False
        dataset_len = 1207350

        #Placeholders
        learning_rate = tf.placeholder(tf.float32,
                                       shape=[],
                                       name='learning_rate')
        weight_decay = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="weight_decay")

        for i in range(NUM_GPUS):
            with tf.device(
                    util.assign_to_device('/gpu:{}'.format(i),
                                          ps_device='/cpu:0')):

                #Need to split data between GPUs
                train_features, train_labels, train_filenames = util.train_input_fn(
                    DATASET_PATH, BATCH_SIZE, EPOCHS)
                print("At GPU {}, Train Features : {}".format(
                    i, train_features))

                #Model
                _, train_op, tower_grads, train_cross_entropy, train_conf_matrix_op, train_accuracy, reuse_vars = initiate_vgg_model(
                    train_features,
                    train_labels,
                    train_filenames,
                    NUM_CLASSES,
                    weight_decay,
                    learning_rate,
                    reuse=reuse_vars,
                    tower_grads=tower_grads,
                    gpu_num=i,
                    handle="training")
                #tf.summary.scalar("training_confusion_matrix", tf.reshape(tf.cast(conf_matrix_op, tf.float32),[1, NUM_CLASSES, NUM_CLASSES, 1]))

        tower_grads = util.average_gradients(tower_grads)
        train_op = train_op.apply_gradients(tower_grads)

        saver = tf.train.Saver()

        if not os.path.exists(os.path.join("./multi_dl_research_train/")):
            os.mkdir(os.path.join("./multi_dl_research_train/"))

        with tf.Session() as sess:
            with np.printoptions(threshold=np.inf):
                writer = tf.summary.FileWriter("./multi_tensorboard_logs/")
                writer.add_graph(sess.graph)
                merged_summary = tf.summary.merge_all()
                train_highest_acc = 0
                sess.run([
                    tf.global_variables_initializer(),
                    tf.local_variables_initializer()
                ])

                for epoch in range(EPOCHS):
                    if epoch == 18:
                        LEARNING_RATE_1 = 0.00005
                        print("Learning Rate changed to {} at epoch {}".format(
                            LEARNING_RATE_1, epoch))
                    elif epoch == 29:
                        LEARNING_RATE_1 = 0.00001
                        WEIGHT_DECAY_1 = 0.0
                        print("Learning Rate changed to {} at epoch {}".format(
                            LEARNING_RATE_1, epoch))
                        print("Weight Decay changed to {} at epoch {}".format(
                            WEIGHT_DECAY_1, epoch))
                    elif epoch == 42:
                        LEARNING_RATE_1 = 0.000005
                        print("Learning Rate changed to {} at epoch {}".format(
                            LEARNING_RATE_1, epoch))
                    elif epoch == 51:
                        LEARNING_RATE_1 = 0.000001
                        print("Learning Rate changed to {} at epoch {}".format(
                            LEARNING_RATE_1, epoch))

                    print("Current Epoch: {}".format(epoch))
                    for i in range(2):
                        print("Current Training Iteration : {}/{}".format(
                            i, 10))
                        train_acc, _, _, train_ce, train_summary = util.training(
                            BATCH_SIZE, NUM_CLASSES, learning_rate,
                            weight_decay, sess, train_op, train_conf_matrix_op,
                            LEARNING_RATE_1, WEIGHT_DECAY_1,
                            train_cross_entropy, merged_summary,
                            train_accuracy)
                        train_value1, train_value2 = util.confidence_interval(
                            train_acc, Z_SCORE, 32)
                        print("Training Accuracy : {}".format(train_acc))
                        print("Training Loss (Cross Entropy) : {}".format(
                            train_ce))
                        print("Training Confidence Interval: [{} , {}]".format(
                            train_value2, train_value1))
                        if train_highest_acc <= train_acc:
                            train_highest_acc = train_acc
                            print(
                                "Highest Training Accuracy Reached: {}".format(
                                    train_highest_acc))
                            #For every epoch, we will save the model
                            saver.save(
                                sess,
                                os.path.join("./multi_dl_research_train/",
                                             "model.ckpt"))
                            print(
                                "Latest Model is saving and Tensorboard Logs are updated"
                            )
                        writer.add_summary(
                            train_summary,
                            epoch * int((dataset_len * 0.8) / BATCH_SIZE) + i)