def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="visual", also_save_raw_results=False, ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.TEST_FILE_LIST dataset = SegDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Generator full colormap for maximum 256 classes color_map = get_color_map(256) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = os.path.join(vis_dir, 'visual_results') makedirs(save_dir) if also_save_raw_results: raw_save_dir = os.path.join(vis_dir, 'raw_results') makedirs(raw_save_dir) fetch_list = [pred.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) img_cnt = 0 for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: pred_shape = (imgs.shape[2], imgs.shape[3]) pred, = exe.run(program=test_prog, feed={'image': imgs}, fetch_list=fetch_list, return_numpy=True) num_imgs = pred.shape[0] # TODO: use multi-thread to write images for i in range(num_imgs): # Add more comments res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) img_name = img_names[i] grt = grts[i] res_shape = (res_map.shape[0], res_map.shape[1]) if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]: res_map = cv2.resize(res_map, pred_shape, interpolation=cv2.INTER_NEAREST) valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] org_shape = (org_shapes[i, 0], org_shapes[i, 1]) res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) if grt is not None: grt = grt[0:valid_shape[0], 0:valid_shape[1]] grt = cv2.resize(grt, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) png_fn = to_png_fn(img_names[i]) if also_save_raw_results: raw_fn = os.path.join(raw_save_dir, png_fn) dirname = os.path.dirname(raw_save_dir) makedirs(dirname) cv2.imwrite(raw_fn, res_map) # colorful segment result visualization vis_fn = os.path.join(save_dir, png_fn) dirname = os.path.dirname(vis_fn) makedirs(dirname) pred_mask = colorize(res_map, org_shapes[i], color_map) if grt is not None: grt = colorize(grt, org_shapes[i], color_map) cv2.imwrite(vis_fn, pred_mask) img_cnt += 1 print("#{} visualize image path: {}".format(img_cnt, vis_fn)) # Use Tensorboard to visualize image if log_writer is not None: # Calulate epoch from ckpt_dir folder name epoch = int(os.path.split(ckpt_dir)[-1]) print("Tensorboard visualization epoch", epoch) log_writer.add_image("Predict/{}".format(img_names[i]), pred_mask[..., ::-1], epoch, dataformats='HWC') # Original image # BGR->RGB img = cv2.imread( os.path.join(cfg.DATASET.DATA_DIR, img_names[i]))[..., ::-1] log_writer.add_image("Images/{}".format(img_names[i]), img, epoch, dataformats='HWC') #add ground truth (label) images if grt is not None: log_writer.add_image("Label/{}".format(img_names[i]), grt[..., ::-1], epoch, dataformats='HWC') # If in local_test mode, only visualize 5 images just for testing # procedure if local_test and img_cnt >= 5: break
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() drop_last = True dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # place = places[0] gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) py_reader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) py_reader.decorate_sample_generator(data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(startup_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_vars = [] load_fail_vars = [] def var_shape_matched(var, shape): """ Check whehter persitable variable shape is match with current network """ var_exist = os.path.exists( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) if var_exist: var_shape = parse_shape_from_file( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) return var_shape == shape return False for x in train_prog.list_vars(): if isinstance(x, fluid.framework.Parameter): shape = tuple(fluid.global_scope().find_var( x.name).get_tensor().shape()) if var_shape_matched(x, shape): load_vars.append(x) else: load_fail_vars.append(x) fluid.io.load_vars(exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars) for var in load_vars: print_info("Parameter[{}] loaded sucessfully!".format(var.name)) for var in load_fail_vars: print_info( "Parameter[{}] don't exist or shape does not match current network, skip" " to load it.".format(var.name)) print_info("{}/{} pretrained parameters loaded successfully!".format( len(load_vars), len(load_vars) + len(load_fail_vars))) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions(precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_tb: if not args.tb_log_dir: print_info("Please specify the log directory by --tb_log_dir.") exit(1) from tb_paddle import SummaryWriter log_writer = SummaryWriter(args.tb_log_dir) # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) global_step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError(( "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): py_reader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - global_step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_tb: log_writer.add_scalar('Train/mean_iou', mean_iou, global_step) log_writer.add_scalar('Train/mean_acc', mean_acc, global_step) log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/step/sec', speed, global_step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, lr = exe.run(program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, speed, calculate_eta(all_step - global_step, speed))) if args.use_tb: log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/speed', speed, global_step) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: py_reader.reset() break except Exception as e: print(e) if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(exe, train_prog, epoch) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate(cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_tb: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, global_step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, global_step) # Use Tensorboard to visualize results if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None: visualize(cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) # save final model if cfg.TRAINER_ID == 0: save_checkpoint(exe, train_prog, 'final')
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): np.set_printoptions(precision=5, suppress=True) startup_prog = fluid.Program() test_prog = fluid.Program() dataset = SegDataset( file_list=cfg.DATASET.VAL_FILE_LIST, mode=ModelPhase.EVAL, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): #TODO: check is batch reader compatitable with Windows if use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() for b in data_gen: yield b[0], b[1], b[2] data_loader, avg_loss, pred, grts, masks = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL, arch=kwargs['arch']) data_loader.set_sample_generator( data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment places = fluid.cuda_places() if use_gpu else fluid.cpu_places() place = places[0] dev_count = len(places) print("#Device count: {}".format(dev_count)) exe = fluid.Executor(place) exe.run(startup_prog) test_prog = test_prog.clone(for_test=True) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if not os.path.exists(ckpt_dir): raise ValueError('The TEST.TEST_MODEL {} is not found'.format(ckpt_dir)) if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) # Use streaming confusion matrix to calculate mean_iou np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) fetch_list = [avg_loss.name, pred.name, grts.name, masks.name] num_images = 0 step = 0 all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() data_loader.start() while True: try: step += 1 loss, pred, grts, masks = exe.run( test_prog, fetch_list=fetch_list, return_numpy=True) loss = np.mean(np.array(loss)) num_images += pred.shape[0] conf_mat.calculate(pred, grts, masks) _, iou = conf_mat.mean_iou() _, acc = conf_mat.accuracy() speed = 1.0 / timer.elapsed_time() print( "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} step/sec={:.2f} | ETA {}" .format(step, loss, acc, iou, speed, calculate_eta(all_step - step, speed))) timer.restart() sys.stdout.flush() except fluid.core.EOFException: break category_iou, avg_iou = conf_mat.mean_iou() category_acc, avg_acc = conf_mat.accuracy() print("[EVAL]#image={} acc={:.4f} IoU={:.4f}".format( num_images, avg_acc, avg_iou)) print("[EVAL]Category IoU:", category_iou) print("[EVAL]Category Acc:", category_acc) print("[EVAL]Kappa:{:.4f}".format(conf_mat.kappa())) return category_iou, avg_iou, category_acc, avg_acc
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() if args.enable_ce: startup_prog.random_seed = 1000 train_prog.random_seed = 1000 drop_last = True dataset = SegDataset( file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) build_model(test_prog, fluid.Program(), phase=ModelPhase.EVAL) data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(startup_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_vdl: if not args.vdl_log_dir: print_info("Please specify the log directory by --vdl_log_dir.") exit(1) from visualdl import LogWriter log_writer = LogWriter(args.vdl_log_dir) # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 best_mIoU = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_vdl: log_writer.add_scalar('Train/mean_iou', mean_iou, step) log_writer.add_scalar('Train/mean_acc', mean_acc, step) log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/step/sec', speed, step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, lr = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, speed, calculate_eta(all_step - step, speed))) if args.use_vdl: log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/speed', speed, step) sys.stdout.flush() avg_loss = 0.0 timer.restart() # NOTE : used for benchmark, profiler tools if args.is_profiler and epoch == 1 and step == args.log_steps: profiler.start_profiler("All") elif args.is_profiler and epoch == 1 and step == args.log_steps + 5: profiler.stop_profiler("total", args.profiler_path) return except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(train_prog, epoch) save_infer_program(test_prog, ckpt_dir) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate( cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_vdl: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step) if mean_iou > best_mIoU: best_mIoU = mean_iou update_best_model(ckpt_dir) print_info("Save best model {} to {}, mIoU = {:.4f}".format( ckpt_dir, os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'), mean_iou)) # Use VisualDL to visualize results if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None: visualize( cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) # save final model if cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(train_prog, 'final') save_infer_program(test_prog, ckpt_dir)
def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="visual", ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.VIS_FILE_LIST dataset = SegDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, out = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Generator full colormap for maximum 256 classes color_map = get_color_map_list(256) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = vis_dir makedirs(save_dir) fetch_list = [pred.name, out.name] test_reader = dataset.batch(dataset.generator, batch_size=cfg.TEST.BATCH_SIZE, is_test=True) img_cnt = 0 total_logit = None test_reader = tqdm(test_reader, total=math.ceil(cfg.DATASET.VIS_TOTAL_IMAGES // cfg.TEST.BATCH_SIZE), ncols=120) for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: pred_shape = (imgs.shape[2], imgs.shape[3]) pred, out = exe.run(program=test_prog, feed={'image': imgs}, fetch_list=fetch_list, return_numpy=True) # if total_logit is None: # total_logit = out # else: # total_logit = np.concatenate([total_logit, out]) num_imgs = pred.shape[0] # TODO: use multi-thread to write images for i in range(num_imgs): # Add more comments res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) img_name = img_names[i] res_shape = (res_map.shape[0], res_map.shape[1]) if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]: res_map = cv2.resize(res_map, pred_shape, interpolation=cv2.INTER_NEAREST) valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] org_shape = (org_shapes[i, 0], org_shapes[i, 1]) res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) png_fn = to_png_fn(img_name) # colorful segment result visualization vis_fn = os.path.join(save_dir, png_fn) dirname = os.path.dirname(vis_fn) makedirs(dirname) pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L') #pred_mask.putpalette(color_map) pred_mask.save(vis_fn) img_cnt += 1 # print("#{} visualize image path: {}".format(img_cnt, vis_fn)) # Use VisualDL to visualize image if log_writer is not None: # Calulate epoch from ckpt_dir folder name epoch = int(os.path.split(ckpt_dir)[-1]) print("VisualDL visualization epoch", epoch) pred_mask_np = np.array(pred_mask.convert("RGB")) log_writer.add_image("Predict/{}".format(img_name), pred_mask_np, epoch) # Original image # BGR->RGB img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR, img_name))[..., ::-1] log_writer.add_image("Images/{}".format(img_name), img, epoch) # add ground truth (label) images grt = grts[i] if grt is not None: grt = grt[0:valid_shape[0], 0:valid_shape[1]] grt_pil = PILImage.fromarray(grt.astype(np.uint8), mode='P') grt_pil.putpalette(color_map) grt_pil = grt_pil.resize((org_shape[1], org_shape[0])) grt = np.array(grt_pil.convert("RGB")) log_writer.add_image("Label/{}".format(img_name), grt, epoch) # If in local_test mode, only visualize 5 images just for testing # procedure if local_test and img_cnt >= 5: break
def train(cfg): # startup_prog = fluid.Program() # train_prog = fluid.Program() drop_last = True dataset = SegDataset( file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # place = places[0] gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) data_loader, loss, lr, pred, grts, masks, image = build_model( phase=ModelPhase.TRAIN) data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) cfg.update_from_file(args.teacher_cfg_file) # teacher_arch = teacher_cfg.architecture teacher_program = fluid.Program() teacher_startup_program = fluid.Program() with fluid.program_guard(teacher_program, teacher_startup_program): with fluid.unique_name.guard(): _, teacher_loss, _, _, _, _, _ = build_model( teacher_program, teacher_startup_program, phase=ModelPhase.TRAIN, image=image, label=grts, mask=masks) exe.run(teacher_startup_program) teacher_program = teacher_program.clone(for_test=True) ckpt_dir = cfg.SLIM.KNOWLEDGE_DISTILL_TEACHER_MODEL_DIR assert ckpt_dir is not None print('load teacher model:', ckpt_dir) if os.path.exists(ckpt_dir): try: fluid.load(teacher_program, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=teacher_program) # cfg = load_config(FLAGS.config) cfg.update_from_file(args.cfg_file) data_name_map = { 'image': 'image', 'label': 'label', 'mask': 'mask', } merge(teacher_program, fluid.default_main_program(), data_name_map, place) distill_pairs = [[ 'teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_0.tmp_0' ]] def distill(pairs, weight): """ Add 3 pairs of distillation losses, each pair of feature maps is the input of teacher and student's yolov3_loss respectively """ loss = l2_loss(pairs[0][0], pairs[0][1]) weighted_loss = loss * weight return weighted_loss distill_loss = distill(distill_pairs, 0.1) cfg.update_from_file(args.cfg_file) optimizer = solver.Solver(None, None) all_loss = loss + distill_loss lr = optimizer.optimise(all_loss) exe.run(fluid.default_startup_program()) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, fluid.default_main_program()) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram( fluid.default_main_program()).with_data_parallel( loss_name=all_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, fluid.default_main_program()) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): load_pretrained_weights(exe, fluid.default_main_program(), cfg.TRAIN.PRETRAINED_MODEL_DIR) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) #fetch_list = [avg_loss.name, lr.name] fetch_list = [ loss.name, 'teacher_' + teacher_loss.name, distill_loss.name, lr.name ] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_vdl: if not args.vdl_log_dir: print_info("Please specify the log directory by --vdl_log_dir.") exit(1) from visualdl import LogWriter log_writer = LogWriter(args.vdl_log_dir) # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 avg_t_loss = 0.0 avg_d_loss = 0.0 best_mIoU = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_vdl: log_writer.add_scalar('Train/mean_iou', mean_iou, step) log_writer.add_scalar('Train/mean_acc', mean_acc, step) log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/step/sec', speed, step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, t_loss, d_loss, lr = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) avg_t_loss += np.mean(np.array(t_loss)) avg_d_loss += np.mean(np.array(d_loss)) step += 1 if step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps avg_t_loss /= args.log_steps avg_d_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} teacher loss={:.4f} distill loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, avg_t_loss, avg_d_loss, speed, calculate_eta(all_step - step, speed))) if args.use_vdl: log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/speed', speed, step) sys.stdout.flush() avg_loss = 0.0 avg_t_loss = 0.0 avg_d_loss = 0.0 timer.restart() except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(fluid.default_main_program(), epoch) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate( cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_vdl: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step) if mean_iou > best_mIoU: best_mIoU = mean_iou update_best_model(ckpt_dir) print_info("Save best model {} to {}, mIoU = {:.4f}".format( ckpt_dir, os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'), mean_iou)) # Use VisualDL to visualize results if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None: visualize( cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) if cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(fluid.default_main_program(), epoch) # save final model if cfg.TRAINER_ID == 0: save_checkpoint(fluid.default_main_program(), 'final')
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() if args.enable_ce: startup_prog.random_seed = 1000 train_prog.random_seed = 1000 drop_last = True dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # place = places[0] gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) config_info = {'input_size': 769, 'output_size': 1, 'block_num': 7} config = ([(cfg.SLIM.NAS_SPACE_NAME, config_info)]) factory = SearchSpaceFactory() space = factory.get_search_space(config) port = cfg.SLIM.NAS_PORT server_address = (cfg.SLIM.NAS_ADDRESS, port) sa_nas = SANAS(config, server_addr=server_address, search_steps=cfg.SLIM.NAS_SEARCH_STEPS, is_server=cfg.SLIM.NAS_IS_SERVER) for step in range(cfg.SLIM.NAS_SEARCH_STEPS): arch = sa_nas.next_archs()[0] start_prog = fluid.Program() train_prog = fluid.Program() data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, start_prog, arch=arch, phase=ModelPhase.TRAIN) cur_flops = flops(train_prog) print('current step:', step, 'flops:', cur_flops) data_loader.set_sample_generator(data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(start_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram( train_prog).with_data_parallel(loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_vars = [] load_fail_vars = [] def var_shape_matched(var, shape): """ Check whehter persitable variable shape is match with current network """ var_exist = os.path.exists( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) if var_exist: var_shape = parse_shape_from_file( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) return var_shape == shape return False for x in train_prog.list_vars(): if isinstance(x, fluid.framework.Parameter): shape = tuple(fluid.global_scope().find_var( x.name).get_tensor().shape()) if var_shape_matched(x, shape): load_vars.append(x) else: load_fail_vars.append(x) fluid.io.load_vars(exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars) for var in load_vars: print_info("Parameter[{}] loaded sucessfully!".format( var.name)) for var in load_fail_vars: print_info( "Parameter[{}] don't exist or shape does not match current network, skip" " to load it.".format(var.name)) print_info( "{}/{} pretrained parameters loaded successfully!".format( len(load_vars), len(load_vars) + len(load_fail_vars))) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] global_step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]" ).format(begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") best_miou = 0.0 for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: loss, lr = exe.run(program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, speed, calculate_eta(all_step - global_step, speed))) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH: ckpt_dir = save_checkpoint(exe, train_prog, '{}_tmp'.format(port)) _, mean_iou, _, mean_acc = evaluate(cfg=cfg, arch=arch, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if best_miou < mean_iou: print('search step {}, epoch {} best iou {}'.format( step, epoch, mean_iou)) best_miou = mean_iou sa_nas.reward(float(best_miou))
def evaluate(cfg, ckpt_dir=None, use_gpu=False, vis=False, vis_dir='vis_out/test_public', use_mpio=False, **kwargs): np.set_printoptions(precision=5, suppress=True) startup_prog = fluid.Program() test_prog = fluid.Program() dataset = SegDataset( file_list=cfg.DATASET.VAL_FILE_LIST, mode=ModelPhase.EVAL, data_dir=cfg.DATASET.DATA_DIR) fls = [] with open(cfg.DATASET.VAL_FILE_LIST) as fr: for line in fr.readlines(): fls.append(line.strip().split(' ')[0]) if vis: assert cfg.VIS.VISINEVAL is True if not os.path.exists(vis_dir): os.makedirs(vis_dir) def data_generator(): #TODO: check is batch reader compatitable with Windows if use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() for b in data_gen: if cfg.DATASET.INPUT_IMAGE_NUM == 1: yield b[0], b[1], b[2] else: yield b[0], b[1], b[2], b[3] data_loader, avg_loss, pred, grts, masks = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL) data_loader.set_sample_generator( data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment places = fluid.cuda_places() if use_gpu else fluid.cpu_places() place = places[0] dev_count = len(places) print("#Device count: {}".format(dev_count)) exe = fluid.Executor(place) exe.run(startup_prog) test_prog = test_prog.clone(for_test=True) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if not os.path.exists(ckpt_dir): raise ValueError('The TEST.TEST_MODEL {} is not found'.format(ckpt_dir)) if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) # Use streaming confusion matrix to calculate mean_iou np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") class_num = cfg.DATASET.NUM_CLASSES conf_mat = ConfusionMatrix(class_num, streaming=True) fetch_list = [avg_loss.name, pred.name, grts.name, masks.name] num_images = 0 step = 0 all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() data_loader.start() cnt = 0 while True: try: step += 1 loss, pred, grts, masks = exe.run( test_prog, fetch_list=fetch_list, return_numpy=True) if vis: preds = np.array(pred, dtype=np.float32) for j in range(preds.shape[0]): if cnt > len(fls): continue name = fls[cnt].split('/')[-1].split('.')[0] p = np.squeeze(preds[j]) np.save(os.path.join(vis_dir, name + '.npy'), p) cnt += 1 print('vis %d npy... (%d tif sample)' % (cnt, cnt//36)) continue loss = np.mean(np.array(loss)) num_images += pred.shape[0] conf_mat.calculate(pred, grts, masks) _, iou = conf_mat.mean_iou() _, acc = conf_mat.accuracy() fwiou = conf_mat.frequency_weighted_iou() speed = 1.0 / timer.elapsed_time() print( "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} FWIoU={:.4f} step/sec={:.2f} | ETA {}" .format(step, loss, acc, iou, fwiou, speed, calculate_eta(all_step - step, speed))) timer.restart() sys.stdout.flush() except fluid.core.EOFException: break if vis: return category_iou, avg_iou = conf_mat.mean_iou() category_acc, avg_acc = conf_mat.accuracy() fwiou = conf_mat.frequency_weighted_iou() print("[EVAL]#image={} acc={:.4f} IoU={:.4f} FWIoU={:.4f}".format( num_images, avg_acc, avg_iou, fwiou)) print("[EVAL]Category Acc:", category_acc) print("[EVAL]Category IoU:", category_iou) print("[EVAL]Kappa: {:.4f}".format(conf_mat.kappa())) return category_iou, avg_iou, category_acc, avg_acc
def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="show", ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.VIS_FILE_LIST dataset = SegDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit, out = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Generator full colormap for maximum 256 classes color_map = get_color_map_list(256) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = "show" makedirs(save_dir) fetch_list = [pred.name, logit.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) img_cnt = 0 for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: pred_shape = (imgs.shape[2], imgs.shape[3]) pred, logit = exe.run(program=test_prog, feed={'image': imgs}, fetch_list=fetch_list, return_numpy=True) num_imgs = pred.shape[0] # TODO: use multi-thread to write images for i in range(num_imgs): # Add more comments res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) img_name = img_names[i] res_shape = (res_map.shape[0], res_map.shape[1]) if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]: res_map = cv2.resize(res_map, pred_shape, interpolation=cv2.INTER_NEAREST) valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] org_shape = (org_shapes[i, 0], org_shapes[i, 1]) res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) png_fn = to_png_fn(img_name) # colorful segment result visualization vis_fn = os.path.join(save_dir, png_fn) dirname = os.path.dirname(vis_fn) makedirs(dirname) pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L') pred_mask.putpalette(color_map) # pred_mask.save(vis_fn) pred_mask_np = np.array(pred_mask.convert("RGB")) im_pred = PILImage.fromarray(pred_mask_np) # Original image # BGR->RGB img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR, img_name))[..., ::-1] im_ori = PILImage.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # log_writer.add_image("Images/{}".format(img_name), img, epoch) # add ground truth (label) images im_pred_cat = PILImage.blend(im_ori, im_pred, 0.5) im_ori = join(im_ori, im_ori, flag="vertical") im_pred_cat = join(im_pred_cat, im_pred, flag="vertical") new_img = join(im_ori, im_pred_cat) new_img.save(vis_fn) img_cnt += 1 print("#{} show image path: {}".format(img_cnt, vis_fn))
def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="visual", ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.TEST_FILE_LIST dataset = SegDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Generator full colormap for maximum 256 classes color_map = get_color_map_list(cfg.DATASET.NUM_CLASSES**2 if cfg.DATASET.NUM_CLASSES**2 < 256 else 256) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = vis_dir makedirs(save_dir) fetch_list = [pred.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) img_cnt = 0 def exe_run(): if cfg.DATASET.INPUT_IMAGE_NUM == 1: for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: pred_shape = (imgs.shape[2], imgs.shape[3]) pred, = exe.run(program=test_prog, feed={'image1': imgs}, fetch_list=fetch_list, return_numpy=True) yield pred, pred_shape, grts, img_names, valid_shapes, org_shapes else: for img1s, img2s, grts, img1_names, img2_names, valid_shapes, org_shapes in test_reader: pred_shape = (img1s.shape[2], img1s.shape[3]) pred, = exe.run(program=test_prog, feed={ 'image1': img1s, 'image2': img2s }, fetch_list=fetch_list, return_numpy=True) yield pred, pred_shape, grts, img1_names, valid_shapes, org_shapes for pred, pred_shape, grts, img_names, valid_shapes, org_shapes in exe_run( ): idx = pred.shape[0] if cfg.DATASET.INPUT_IMAGE_NUM == 2 and cfg.VIS.SEG_FOR_CD: idx = pred.shape[0] // cfg.DATASET.INPUT_IMAGE_NUM pred1, pred2 = pred[:idx], pred[ idx:] # fluid.layers.split(pred, 2, dim=0) num_imgs = pred1.shape[0] # TODO: use multi-thread to write images for i in range(num_imgs): # Add more comments res_map_list = [] for pred in [pred1, pred2]: if pred.shape[0] == 0: continue #res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) res_map = np.squeeze(pred[i, :, :, :]).astype(np.float32) res_shape = (res_map.shape[0], res_map.shape[1]) if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[ 1]: res_map = cv2.resize(res_map, pred_shape, interpolation=cv2.INTER_NEAREST) valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] org_shape = (org_shapes[i, 0], org_shapes[i, 1]) res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) res_map_list.append(res_map) img_name = img_names[i] png_fn = to_png_fn(img_name) # colorful segment result visualization vis_fn = os.path.join(save_dir, png_fn) dirname = os.path.dirname(vis_fn) makedirs(dirname) if cfg.DATASET.INPUT_IMAGE_NUM == 1 or \ (cfg.DATASET.INPUT_IMAGE_NUM == 2 and not cfg.VIS.SEG_FOR_CD): res_map = res_map_list[0] if cfg.VIS.RAW_PRED: #pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L') #pred_mask.save(vis_fn) np.save(vis_fn.replace(".png", ".npy"), res_map.astype(np.float32)) else: if cfg.VIS.ADD_LABEL: grt_im = cv2.resize(grts[i], pred_shape, interpolation=cv2.INTER_NEAREST) res_map = np.hstack((res_map, grt_im)) pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='P') pred_mask.putpalette(color_map) pred_mask.save(vis_fn) else: res_map1, res_map2 = res_map_list diff = res_map1 * cfg.DATASET.NUM_CLASSES + res_map2 unchange_idx = np.where((res_map1 - res_map2) == 0) diff[unchange_idx] = 0 res_map = np.hstack((res_map1, res_map2, diff)) pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='P') pred_mask.putpalette(color_map) pred_mask.save(vis_fn) img_cnt += 1 print("#{} visualize image path: {}".format(img_cnt, vis_fn)) # Use VisualDL to visualize image if log_writer is not None: # Calulate epoch from ckpt_dir folder name epoch = int(os.path.split(ckpt_dir)[-1]) print("VisualDL visualization epoch", epoch) pred_mask_np = np.array(pred_mask.convert("RGB")) log_writer.add_image("Predict/{}".format(img_name), pred_mask_np, epoch) # Original image # BGR->RGB img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR, img_name))[..., ::-1] log_writer.add_image("Images/{}".format(img_name), img, epoch) # add ground truth (label) images grt = grts[i] if grt is not None: grt = grt[0:valid_shape[0], 0:valid_shape[1]] grt_pil = PILImage.fromarray(grt.astype(np.uint8), mode='P') grt_pil.putpalette(color_map) grt_pil = grt_pil.resize((org_shape[1], org_shape[0])) grt = np.array(grt_pil.convert("RGB")) log_writer.add_image("Label/{}".format(img_name), grt, epoch) # If in local_test mode, only visualize 5 images just for testing # procedure if local_test and img_cnt >= 5: break