def main():
    # capture the config path from the run arguments
    # then process the json configuration file
    try:
        args = get_args()
        config = process_config(args.config)
    except:
        print("missing or invalid arguments")
        exit(0)

    # create the experiments dirs
    create_dirs([config.summary_dir, config.checkpoint_dir])

    print('Create the data generator.')
    data_generator = DataGenerator(config)

    print('Create the model.')
    model = MultiLabelConvModel(config, data_generator.get_word_index())

    print('Create the trainer')
    trainer = MultiLabelConvModelTrainer(model.model,
                                         data_generator.get_train_data(),
                                         config)

    print('Start training the model.')
    trainer.train()

    print('Visualize the losses')
    trainer.visualize()
Exemple #2
0
def main():

    try:
        args = get_args()
        config = process_config(args.config)
    except:
        print("missing or invalid arguments")
        exit(0)

    # create the experiments dirs
    create_dirs(([config.summary_dir, config.checkpoint_dir]))

    print("Create the data generator")
    data_generator = DataGenerator(config)

    print("Create the model.")
    model = CNNModel(config, data_generator.get_word_index())

    print("Trainer initiatise")
    trainer = ModelTrainer(model.model, data_generator.get_train_data(),
                           config)

    print("Training Start")
    trainer.train()

    print("Visualization of loss and accuracy")
    trainer.visualize("FastText +CNN")
Exemple #3
0
import datetime
from utils.config import process_config
from data_loader.data_generator import DataGenerator

slim = tf.contrib.slim

global_step = tf.Variable(0, trainable=False)
config = '../configs/example.json'


config = process_config(config)
print(config)

dataset_train = DataGenerator(config.input)

x_train, y_train = dataset_train.get_train_data()


sess = tf.train.MonitoredTrainingSession(
        master='',
        is_chief=True,
        checkpoint_dir=None,
        scaffold=None,
        hooks=None,
        chief_only_hooks=None,
        save_checkpoint_secs=600,
        save_summaries_steps=100,
        save_summaries_secs=None,
        config=None,
        stop_grace_period_secs=120,
        log_step_count_steps=100
def main(_):
    # capture the config path from the run arguments
    # then process the json configration file
    config = process_config(FLAGS.config_path)
    print(config)

    tf.logging.set_verbosity(tf.logging.DEBUG)

    with tf.Graph().as_default():
        ######################
        # Config model_deploy#
        ######################
        deploy_config = deploy.DeploymentConfig(
            num_clones=config.deploy.num_clone)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = tf.Variable(0, trainable=False, name='global_step')

        # select model and build net
        net = u_net.Unet(config)

        # create batch dataset
        with tf.device(deploy_config.inputs_device()):
            data = DataGenerator(config.input)
            x_train, y_train = data.get_train_data()
            x_train = tf.expand_dims(x_train, -1)
            x_train.set_shape([
                None, config.input.img_out_shape[0],
                config.input.img_out_shape[1], config.input.img_out_shape[2]
            ])
            y_train.set_shape([
                None, config.input.mask_out_shape[0],
                config.input.mask_out_shape[1]
            ])
            y_train = tf.cast(y_train, tf.int32)
            y_train_hot = tf.one_hot(y_train,
                                     depth=config.network.num_classes,
                                     axis=-1)

            batch_queue = [x_train, y_train_hot]

        # =================================================================== #
        # Define the model running on every GPU.
        # =================================================================== #
        def clone_fn(batch_queue):
            x_train, y_train_hot = batch_queue
            print(x_train)
            print(y_train_hot)
            f_score, end_points = net.net(x_train)
            # Add loss function.
            net.loss(f_score, y_train_hot, type=config.network.loss_type)

            return f_score, end_points, x_train, y_train_hot

        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = deploy.create_clones(deploy_config, clone_fn, [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope):
            summaries.add(tf.summary.scalar(loss.op.name, loss))

        f_score, _, x_train, y_train_hot = clones[0].outputs
        f_score_img = tf.expand_dims(
            tf.cast(tf.argmax(f_score, axis=-1), tf.float32), -1)
        y_train_img = tf.argmax(y_train_hot, axis=-1)
        summaries.add(tf.summary.image("Images/Original_image", x_train, 2))
        summaries.add(
            tf.summary.image(
                "Images/Ground_truth",
                tf.expand_dims(tf.cast(y_train_img, tf.float32), -1), 2))
        summaries.add(tf.summary.image("Images/Predict_", f_score_img, 2))

        ## add precision and recall
        f_score = tf.cast(tf.argmax(f_score, -1), tf.int32)
        f_score = tf.one_hot(f_score,
                             depth=config.network.num_classes,
                             axis=-1)
        pred = tf.reduce_sum(f_score * y_train_hot, axis=(0, 1, 2))
        all_pred = tf.reduce_sum(f_score, axis=(0, 1, 2)) + 1e-5
        all_true = tf.reduce_sum(y_train_hot, axis=(0, 1, 2)) + 1e-5
        recall = pred / all_pred
        prec = pred / all_true
        dice = pred * 2 / (all_true + all_pred)
        with tf.variable_scope('evaluation'):
            for i in range(config.network.num_classes):
                summaries.add(
                    tf.summary.scalar('{}th_class_precision'.format(i),
                                      prec[i]))
                summaries.add(
                    tf.summary.scalar('{}th_class_recall'.format(i),
                                      recall[i]))
                summaries.add(
                    tf.summary.scalar('{}th_class_dice'.format(i), dice[i]))

        #################################
        # Configure the moving averages #
        #################################
        if config.train.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                config.train.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = tf_utils.configure_learning_rate(
                config, global_step)
            optimizer = tf_utils.configure_optimizer(config.train,
                                                     learning_rate)

        if config.train.moving_average_decay:
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = tf_utils.get_variables_to_train(config.finetune)

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)

        if config.train.clip_gradient_norm > 0:
            with ops.name_scope('clip_grads'):
                clones_gradients = slim.learning.clip_gradient_norms(
                    clones_gradients, config.train.clip_gradient_norm)
        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op],
                                                          total_loss,
                                                          name='train_op')

        # train_tensor = slim.learning.create_train_op(total_loss, optimizer, gradient_multipliers=gradient_multipliers)
        summaries.add(tf.summary.scalar('learning_rate', learning_rate))
        summaries.add(tf.summary.scalar('total_loss', total_loss))
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # =================================================================== #
        # Kicks off the training.
        # =================================================================== #
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=config.deploy.gpu_memory_fraction)
        configproto = tf.ConfigProto(
            gpu_options=gpu_options,
            log_device_placement=False,
            allow_soft_placement=True,
        )

        saver = tf.train.Saver(max_to_keep=100)

        scaffold = tf.train.Scaffold(
            init_op=None,
            init_feed_dict=None,
            init_fn=tf_utils.get_init_fn(config),
            ready_op=None,
            ready_for_local_init_op=None,
            local_init_op=[data.get_iterator().initializer],
            summary_op=summary_op,
            saver=saver,
            copy_from_scaffold=None)

        ckpt_hook = tf.train.CheckpointSaverHook(
            checkpoint_dir=config.summary.train_dir,
            save_secs=config.summary.save_checkpoint_secs,
            save_steps=config.summary.save_checkpoint_steps,
            saver=None,
            checkpoint_basename='model.ckpt',
            scaffold=scaffold,
            listeners=None)
        sum_writer = tf.summary.FileWriter(logdir=config.summary.train_dir)
        sum_hook = tf.train.SummarySaverHook(
            save_steps=None,
            save_secs=config.summary.save_summaries_secs,
            output_dir=config.summary.train_dir,
            summary_writer=sum_writer,
            scaffold=None,
            summary_op=summary_op,
        )

        with tf.train.MonitoredTrainingSession(
                master='',
                is_chief=True,
                checkpoint_dir=config.summary.train_dir,
                scaffold=scaffold,
                hooks=[ckpt_hook, sum_hook],
                save_checkpoint_secs=None,
                save_summaries_steps=None,
                save_summaries_secs=None,
                config=configproto,
                log_step_count_steps=config.summary.log_every_n_steps) as sess:
            while not sess.should_stop():
                _, loss, g_step = sess.run(
                    [train_tensor, total_loss, global_step])
                print("{} step loss is {}".format(g_step, loss))