def init_config(): if config.TRAINER == 'horovod': ngpu = hvd.size() else: ngpu = get_num_gpu() assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu if config.NUM_GPUS is None: config.NUM_GPUS = ngpu else: if config.TRAINER == 'horovod': assert config.NUM_GPUS == ngpu else: assert config.NUM_GPUS <= ngpu print_config()
parser.add_argument('--visualize', action='store_true') parser.add_argument('--evaluate', help='path to the output json eval file') parser.add_argument('--predict', help='path to the input image file') args = parser.parse_args() if args.datadir: config.BASEDIR = args.datadir if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.visualize or args.evaluate or args.predict: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' assert args.load print_config() if args.visualize: visualize(args.load) else: pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['image'], output_names=get_model_output_names())) if args.evaluate: assert args.evaluate.endswith('.json') offline_evaluate(pred, args.evaluate) elif args.predict: COCODetection( config.BASEDIR, 'train2014') # to load the class names into caches
parser.add_argument('--visualize', action='store_true') parser.add_argument('--evaluate', help='path to the output json eval file') parser.add_argument('--predict', help='path to the input image file') args = parser.parse_args() if args.datadir: config.BASEDIR = args.datadir if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.visualize or args.evaluate or args.predict: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' assert args.load print_config() if args.predict or args.visualize: config.RESULT_SCORE_THRESH = config.RESULT_SCORE_THRESH_VIS if args.visualize: visualize(args.load) else: pred = OfflinePredictor(PredictConfig( model=Model(), session_init=get_model_loader(args.load), input_names=['image'], output_names=get_model_output_names())) if args.evaluate: assert args.evaluate.endswith('.json') offline_evaluate(pred, args.evaluate)
common.msg('Status', '{0} updated'.format(app)) apps[app] = proxy config_changed = True if args.remove == app or args.remove == None: common.msg('Status', '{0} proxy is off'.format(app)) apps[app]['use_proxy'] = False config_changed = True if True == config_changed: common.msg('Status', 'Saving') configuration.save_config() if True == args.config: for app in apps: common.print_config(app) if False == is_docker: # cant configure a docker inside a docker if True == args.all or True == args.docker: app_docker.configure() if True == args.all or True == args.git: app_git.configure() if True == args.all or True == args.npm: app_npm.configure() if True == args.all or True == args.yarn: app_yarn.configure() #if True == args.all or True == args.shell: # configure_shell()
def main(_): global config print_config(config) tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): deploy_config = model_deploy.DeploymentConfig() # Create global_step with tf.device(deploy_config.variables_device()): global_step = tf.contrib.training.get_or_create_eval_step() dataset = get_dataset(dataset_name='guidewire', dataset_dir=dataset_dir) with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=20 * batch_size, common_queue_min=10 * batch_size) [image, label] = provider.get(['image', 'labels_class']) image, label = preprocess_fn(image, label, eval_image_size, is_training=False) images, labels = tf.train.batch( [image, label], batch_size=batch_size, num_threads=4, capacity=5 * batch_size) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * 1) images, labels = batch_queue.dequeue() # network_config = Config(is_training=False, max_stride=max_stride) network_fn = unet logits, endpoints = network_fn(images, config) variables_to_restore = slim.get_variables_to_restore() pred = tf.argmax(logits, axis=-1) pred = tf.expand_dims(pred, -1) label_visualize = tf.cast(pred, tf.uint8) * 255 tf.summary.image('pred', label_visualize) tf.summary.image('images', images) tf.summary.image('labels', tf.cast(labels, tf.uint8)*255) names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'veal/mean_absolute': slim.metrics.streaming_mean_absolute_error(tf.cast(pred, tf.int64), tf.cast(labels,tf.int64)) }) if tf.gfile.IsDirectory(ckpt_dir): checkpoint_path = tf.train.latest_checkpoint(ckpt_dir) else: checkpoint_path = ckpt_dir tf.logging.info('Evaluating %s' % checkpoint_path) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0, allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) # slim.evaluation.evaluation_loop( # master='', # checkpoint_dir=ckpt_dir, # logdir=eval_dir, # num_evals=1, # eval_op=list(names_to_updates.values()), # variables_to_restore=variables_to_restore, # eval_interval_secs=20, # summary_op=tf.summary.merge_all(), # session_config=sess_config) slim.evaluation.evaluate_once( master='', checkpoint_path='/raid/wuyudong/Models/Unet/Seg_stride16/model.ckpt-5840', logdir=eval_dir, variables_to_restore=variables_to_restore, eval_op=list(names_to_updates.values()), summary_op=tf.summary.merge_all(), session_config=sess_config )
def main(): global config print_config(config) tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): deploy_config = model_deploy.DeploymentConfig() # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() # dataset dataset = get_dataset(dataset_name='guidewire', dataset_dir=config.Dataset.dataset_dir) network_fn = unet preprocess_fn = preprocess_image_and_label_Simple with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=4, common_queue_capacity=20 * config.Train.batch_size, common_queue_min=10 * config.Train.batch_size) [image, label] = provider.get(['image', 'labels_class']) image, label = preprocess_fn( image, label, config.Train.image_size, is_training=True, num_classes=config.Dataset.seg_num_class) images, labels = tf.train.batch([image, label], batch_size=config.Train.batch_size, num_threads=4, capacity=5 * config.Train.batch_size) batch_queue = slim.prefetch_queue.prefetch_queue([images, labels], capacity=2 * 1) seg_loss_func = get_seg_loss_func(config.Train.seg_loss_func) def clone_fn(batch_queue): images, labels = batch_queue.dequeue() logits, endpoints = network_fn(images, config) # loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='loss')) seg_loss = seg_loss_func(logits, labels) # loss = dice_loss(logits, labels) slim.losses.add_loss(seg_loss) endpoints['images'] = images endpoints['labels'] = labels return endpoints # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) end_points = clones[0].outputs pred = tf.argmax(end_points['logits'], axis=-1) pred = tf.expand_dims(pred, -1) pred_visualize = tf.cast(pred, tf.uint8) * 255 tf.summary.image('pred', pred_visualize) tf.summary.image('images', end_points['images']) label_visualize = tf.cast(end_points['labels'], tf.uint8) * 255 tf.summary.image('labels', label_visualize) ################################# # Configure the moving averages # ################################# if config.Train.moving_average_decay is not None: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( config.Train.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step, config) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if config.Train.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0, allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=config.Train.train_log_dir, master='', is_chief=True, init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=config.Train.max_number_of_steps, log_every_n_steps=config.Train.log_every_n_steps, save_summaries_secs=config.Train.save_summaries_secs, save_interval_secs=config.Train.save_interval_secs, session_config=sess_config, sync_optimizer=None)