Example #1
0
def init_config():
    if config.TRAINER == 'horovod':
        ngpu = hvd.size()
    else:
        ngpu = get_num_gpu()
    assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu
    if config.NUM_GPUS is None:
        config.NUM_GPUS = ngpu
    else:
        if config.TRAINER == 'horovod':
            assert config.NUM_GPUS == ngpu
        else:
            assert config.NUM_GPUS <= ngpu
    print_config()
Example #2
0
def init_config():
    if config.TRAINER == 'horovod':
        ngpu = hvd.size()
    else:
        ngpu = get_num_gpu()
    assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu
    if config.NUM_GPUS is None:
        config.NUM_GPUS = ngpu
    else:
        if config.TRAINER == 'horovod':
            assert config.NUM_GPUS == ngpu
        else:
            assert config.NUM_GPUS <= ngpu
    print_config()
Example #3
0
    parser.add_argument('--visualize', action='store_true')
    parser.add_argument('--evaluate', help='path to the output json eval file')
    parser.add_argument('--predict', help='path to the input image file')
    args = parser.parse_args()
    if args.datadir:
        config.BASEDIR = args.datadir

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.visualize or args.evaluate or args.predict:
        # autotune is too slow for inference
        os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'

        assert args.load
        print_config()
        if args.visualize:
            visualize(args.load)
        else:
            pred = OfflinePredictor(
                PredictConfig(model=Model(),
                              session_init=get_model_loader(args.load),
                              input_names=['image'],
                              output_names=get_model_output_names()))
            if args.evaluate:
                assert args.evaluate.endswith('.json')
                offline_evaluate(pred, args.evaluate)
            elif args.predict:
                COCODetection(
                    config.BASEDIR,
                    'train2014')  # to load the class names into caches
Example #4
0
    parser.add_argument('--visualize', action='store_true')
    parser.add_argument('--evaluate', help='path to the output json eval file')
    parser.add_argument('--predict', help='path to the input image file')
    args = parser.parse_args()
    if args.datadir:
        config.BASEDIR = args.datadir

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.visualize or args.evaluate or args.predict:
        # autotune is too slow for inference
        os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'

        assert args.load
        print_config()

        if args.predict or args.visualize:
            config.RESULT_SCORE_THRESH = config.RESULT_SCORE_THRESH_VIS

        if args.visualize:
            visualize(args.load)
        else:
            pred = OfflinePredictor(PredictConfig(
                model=Model(),
                session_init=get_model_loader(args.load),
                input_names=['image'],
                output_names=get_model_output_names()))
            if args.evaluate:
                assert args.evaluate.endswith('.json')
                offline_evaluate(pred, args.evaluate)
Example #5
0
            common.msg('Status', '{0} updated'.format(app))
            apps[app] = proxy
            config_changed = True

        if args.remove == app or args.remove == None:
            common.msg('Status', '{0} proxy is off'.format(app))
            apps[app]['use_proxy'] = False
            config_changed = True

    if True == config_changed:
        common.msg('Status', 'Saving')
        configuration.save_config()

    if True == args.config:
        for app in apps:
            common.print_config(app)

    if False == is_docker:
        # cant configure a docker inside a docker
        if True == args.all or True == args.docker:
            app_docker.configure()

    if True == args.all or True == args.git:
        app_git.configure()
    if True == args.all or True == args.npm:
        app_npm.configure()
    if True == args.all or True == args.yarn:
        app_yarn.configure()
    #if True == args.all or True == args.shell:
    #    configure_shell()
Example #6
0
def main(_):
    global config
    print_config(config)
    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        deploy_config = model_deploy.DeploymentConfig()
        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = tf.contrib.training.get_or_create_eval_step()
        dataset = get_dataset(dataset_name='guidewire', dataset_dir=dataset_dir)
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                shuffle=False,
                common_queue_capacity=20 * batch_size,
                common_queue_min=10 * batch_size)
            [image, label] = provider.get(['image', 'labels_class'])
            image, label = preprocess_fn(image, label, eval_image_size, is_training=False)
            images, labels = tf.train.batch(
                [image, label],
                batch_size=batch_size,
                num_threads=4,
                capacity=5 * batch_size)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * 1)
            images, labels = batch_queue.dequeue()
            # network_config = Config(is_training=False, max_stride=max_stride)
            network_fn = unet
            logits, endpoints = network_fn(images, config)
            variables_to_restore = slim.get_variables_to_restore()

            pred = tf.argmax(logits, axis=-1)
            pred = tf.expand_dims(pred, -1)
            label_visualize = tf.cast(pred, tf.uint8) * 255
            tf.summary.image('pred', label_visualize)
            tf.summary.image('images', images)
            tf.summary.image('labels', tf.cast(labels, tf.uint8)*255)

        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'veal/mean_absolute': slim.metrics.streaming_mean_absolute_error(tf.cast(pred, tf.int64),
                                                                             tf.cast(labels,tf.int64))
        })

        if tf.gfile.IsDirectory(ckpt_dir):
            checkpoint_path = tf.train.latest_checkpoint(ckpt_dir)
        else:
            checkpoint_path = ckpt_dir

        tf.logging.info('Evaluating %s' % checkpoint_path)

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0, allow_growth=True)
        sess_config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)

        # slim.evaluation.evaluation_loop(
        #     master='',
        #     checkpoint_dir=ckpt_dir,
        #     logdir=eval_dir,
        #     num_evals=1,
        #     eval_op=list(names_to_updates.values()),
        #     variables_to_restore=variables_to_restore,
        #     eval_interval_secs=20,
        #     summary_op=tf.summary.merge_all(),
        #     session_config=sess_config)

        slim.evaluation.evaluate_once(
            master='',
            checkpoint_path='/raid/wuyudong/Models/Unet/Seg_stride16/model.ckpt-5840',
            logdir=eval_dir,
            variables_to_restore=variables_to_restore,
            eval_op=list(names_to_updates.values()),
            summary_op=tf.summary.merge_all(),
            session_config=sess_config
        )
Example #7
0
def main():
    global config
    print_config(config)
    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        deploy_config = model_deploy.DeploymentConfig()
        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()
        # dataset
        dataset = get_dataset(dataset_name='guidewire',
                              dataset_dir=config.Dataset.dataset_dir)
        network_fn = unet
        preprocess_fn = preprocess_image_and_label_Simple
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=4,
                common_queue_capacity=20 * config.Train.batch_size,
                common_queue_min=10 * config.Train.batch_size)
            [image, label] = provider.get(['image', 'labels_class'])
            image, label = preprocess_fn(
                image,
                label,
                config.Train.image_size,
                is_training=True,
                num_classes=config.Dataset.seg_num_class)
            images, labels = tf.train.batch([image, label],
                                            batch_size=config.Train.batch_size,
                                            num_threads=4,
                                            capacity=5 *
                                            config.Train.batch_size)
            batch_queue = slim.prefetch_queue.prefetch_queue([images, labels],
                                                             capacity=2 * 1)

        seg_loss_func = get_seg_loss_func(config.Train.seg_loss_func)

        def clone_fn(batch_queue):
            images, labels = batch_queue.dequeue()

            logits, endpoints = network_fn(images, config)
            # loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='loss'))
            seg_loss = seg_loss_func(logits, labels)
            # loss = dice_loss(logits, labels)
            slim.losses.add_loss(seg_loss)
            endpoints['images'] = images
            endpoints['labels'] = labels
            return endpoints

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        end_points = clones[0].outputs
        pred = tf.argmax(end_points['logits'], axis=-1)
        pred = tf.expand_dims(pred, -1)
        pred_visualize = tf.cast(pred, tf.uint8) * 255
        tf.summary.image('pred', pred_visualize)
        tf.summary.image('images', end_points['images'])
        label_visualize = tf.cast(end_points['labels'], tf.uint8) * 255
        tf.summary.image('labels', label_visualize)

        #################################
        # Configure the moving averages #
        #################################
        if config.Train.moving_average_decay is not None:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                config.Train.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step, config)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if config.Train.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()
        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))
        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0,
                                    allow_growth=True)
        sess_config = tf.ConfigProto(gpu_options=gpu_options,
                                     log_device_placement=False)
        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=config.Train.train_log_dir,
            master='',
            is_chief=True,
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=config.Train.max_number_of_steps,
            log_every_n_steps=config.Train.log_every_n_steps,
            save_summaries_secs=config.Train.save_summaries_secs,
            save_interval_secs=config.Train.save_interval_secs,
            session_config=sess_config,
            sync_optimizer=None)