def wrapper(self, *args, **kwargs): timer = Timer() # before the method call timer.start() # the actual method call result = method(self, *args, **kwargs) # after the method call timer.stop(method.__name__) return result
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() drop_last = True dataset = build_dataset(cfg.DATASET.DATASET_NAME, file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR, base_size= cfg.DATAAUG.BASE_SIZE, crop_size= cfg.DATAAUG.CROP_SIZE, rand_scale=True) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.TRAIN_BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#device count: {}".format(dev_count)) cfg.TRAIN_BATCH_SIZE = dev_count * int(cfg.TRAIN_BATCH_SIZE_PER_GPU) print_info("#train_batch_size: {}".format(cfg.TRAIN_BATCH_SIZE)) print_info("#batch_size_per_dev: {}".format(cfg.TRAIN_BATCH_SIZE_PER_GPU)) py_reader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) py_reader.decorate_sample_generator( data_generator, batch_size=cfg.TRAIN_BATCH_SIZE_PER_GPU, drop_last=drop_last) exe = fluid.Executor(place) exe.run(startup_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_vars = [] load_fail_vars = [] def var_shape_matched(var, shape): """ Check whehter persitable variable shape is match with current network """ var_exist = os.path.exists( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) if var_exist: var_shape = parse_shape_from_file( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) return var_shape == shape return False for x in train_prog.list_vars(): if isinstance(x, fluid.framework.Parameter): shape = tuple(fluid.global_scope().find_var( x.name).get_tensor().shape()) if var_shape_matched(x, shape): load_vars.append(x) else: load_fail_vars.append(x) fluid.io.load_vars( exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars) for var in load_vars: print_info("Parameter[{}] loaded sucessfully!".format(var.name)) for var in load_fail_vars: print_info( "Parameter[{}] don't exist or shape does not match current network, skip" " to load it.".format(var.name)) print_info("{}/{} pretrained parameters loaded successfully!".format( len(load_vars), len(load_vars) + len(load_fail_vars))) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_vdl: if not args.vdl_log_dir: print_info("Please specify the log directory by --vdl_log_dir.") exit(1) from visualdl import LogWriter log_writer = LogWriter(args.vdl_log_dir) # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.TRAIN_BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.TRAIN_BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): py_reader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={}/{} step={}/{} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, cfg.SOLVER.NUM_EPOCHS, step, all_step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_vdl: log_writer.add_scalar('Train/mean_iou', mean_iou, step) log_writer.add_scalar('Train/mean_acc', mean_acc, step) log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/step/sec', speed, step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, lr = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={}/{} step={}/{} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, cfg.SOLVER.NUM_EPOCHS, global_step, all_step, lr[0], avg_loss, speed, calculate_eta(all_step - global_step, speed))) if args.use_vdl: log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/speed', speed, step) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: py_reader.reset() break except Exception as e: print(e) if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(exe, train_prog, epoch) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate( cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_vdl: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step) # Use VisualDL to visualize results if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None: visualize( cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) # save final model if cfg.TRAINER_ID == 0: save_checkpoint(exe, train_prog, 'final') if args.use_vdl: log_writer.close()
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, multi_scales=False, flip=False, **kwargs): np.set_printoptions(precision=5, suppress=True) num_classes = cfg.DATASET.NUM_CLASSES base_size = cfg.TEST.BASE_SIZE crop_size = cfg.TEST.CROP_SIZE startup_prog = fluid.Program() test_prog = fluid.Program() dataset = build_dataset(cfg.DATASET.DATASET_NAME, file_list=cfg.DATASET.VAL_FILE_LIST, mode=ModelPhase.EVAL, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): #TODO: check is batch reader compatitable with Windows if use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() for b in data_gen: yield b[0], b[1], b[2] py_reader, avg_loss, out, grts, masks = build_model(test_prog, startup_prog, phase=ModelPhase.EVAL) py_reader.decorate_sample_generator(data_generator, drop_last=False, batch_size=cfg.EVAL_BATCH_SIZE, places=fluid.cuda_places()) # Get device environment places = fluid.cuda_places() if use_gpu else fluid.cpu_places() place = places[0] dev_count = len(places) print("#Device count: {}".format(dev_count)) exe = fluid.Executor(place) exe.run(startup_prog) test_prog = test_prog.clone(for_test=True) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if ckpt_dir is not None: filename = '{}_{}_{}_epoch_{}.pdparams'.format( str(cfg.MODEL.MODEL_NAME), str(cfg.MODEL.BACKBONE), str(cfg.DATASET.DATASET_NAME), cfg.SOLVER.NUM_EPOCHS) print("loading testing model file: {}/{}".format(ckpt_dir, filename)) fluid.io.load_params(exe, ckpt_dir, main_program=test_prog, filename=filename) # Use streaming confusion matrix to calculate mean_iou np.set_printoptions(precision=4, suppress=True, linewidth=160, floatmode="fixed") conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) #fetch_list: return of the model fetch_list = [avg_loss.name, out.name] num_images = 0 step = 0 all_step = cfg.DATASET.VAL_TOTAL_IMAGES // cfg.EVAL_BATCH_SIZE timer = Timer() timer.start() for data in py_reader(): mask = np.array(data[0]['mask']) label = np.array(data[0]['label']) image_org = np.array(data[0]['image']) image = np.transpose(image_org, (0, 2, 3, 1)) # BCHW->BHWC image = np.squeeze(image) if cfg.TEST.SLIDE_WINDOW: if not multi_scales: scales = [1.0] else: scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25 ] if cfg.DATASET.DATASET_NAME == 'cityscapes' else [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] #scales = [0.75, 1.0, 1.25] # fast multi-scale testing #strides stride = int(crop_size * 1.0 / 3) # 1/3 > 2/3 > 1/2 for input_size: 769 x 769 h, w = image.shape[0:2] scores = np.zeros(shape=[num_classes, h, w], dtype='float32') for scale in scales: long_size = int(math.ceil(base_size * scale)) if h > w: height = long_size width = int(1.0 * w * long_size / h + 0.5) short_size = width else: width = long_size height = int(1.0 * h * long_size / w + 0.5) short_size = height # print('org_img_size: {}x{}, rescale_img_size: {}x{}'.format(h, w, height, width)) cur_img = image_resize(image, height, width) # pading if long_size <= crop_size: pad_img = pad_single_image(cur_img, crop_size) label_feed, mask_feed = get_feed(pad_img) pad_img = mapper_image(pad_img) loss, pred1 = exe.run(test_prog, feed={ 'image': pad_img, 'label': label_feed, 'mask': mask_feed }, fetch_list=fetch_list, return_numpy=True) pred1 = np.array(pred1) outputs = pred1[:, :, :height, :width] if flip: pad_img_flip = flip_left_right_image(cur_img) pad_img_flip = pad_single_image( pad_img_flip, crop_size) label_feed, mask_feed = get_feed(pad_img_flip) pad_img_flip = mapper_image(pad_img_flip) loss, pred1 = exe.run(test_prog, feed={ 'image': pad_img_flip, 'label': label_feed, 'mask': mask_feed }, fetch_list=fetch_list, return_numpy=True) pred1 = np.flip(pred1, 3) outputs += pred1[:, :, :height, :width] else: if short_size < crop_size: pad_img = pad_single_image(cur_img, crop_size) else: pad_img = cur_img ph, pw = pad_img.shape[0:2] #slid window h_grids = int(math.ceil(1.0 * (ph - crop_size) / stride)) + 1 w_grids = int(math.ceil(1.0 * (pw - crop_size) / stride)) + 1 outputs = np.zeros(shape=[1, num_classes, ph, pw], dtype='float32') count_norm = np.zeros(shape=[1, 1, ph, pw], dtype='int32') for idh in range(h_grids): for idw in range(w_grids): h0 = idh * stride w0 = idw * stride h1 = min(h0 + crop_size, ph) w1 = min(w0 + crop_size, pw) #print('(h0,w0,h1,w1):({},{},{},{})'.format(h0, w0, h1, w1)) crop_img = crop_image(pad_img, h0, w0, h1, w1) pad_crop_img = pad_single_image( crop_img, crop_size) label_feed, mask_feed = get_feed(pad_crop_img) pad_crop_img = mapper_image(pad_crop_img) loss, pred1 = exe.run(test_prog, feed={ 'image': pad_crop_img, 'label': label_feed, 'mask': mask_feed }, fetch_list=fetch_list, return_numpy=True) pred1 = np.array(pred1) outputs[:, :, h0:h1, w0:w1] += pred1[:, :, 0:h1 - h0, 0:w1 - w0] count_norm[:, :, h0:h1, w0:w1] += 1 if flip: pad_img_flip = flip_left_right_image(crop_img) pad_img_flip = pad_single_image( pad_img_flip, crop_size) label_feed, mask_feed = get_feed(pad_img_flip) pad_img_flip = mapper_image(pad_img_flip) loss, pred1 = exe.run(test_prog, feed={ 'image': pad_img_flip, 'label': label_feed, 'mask': mask_feed }, fetch_list=fetch_list, return_numpy=True) pred1 = np.flip(pred1, 3) outputs[:, :, h0:h1, w0:w1] += pred1[:, :, 0:h1 - h0, 0:w1 - w0] count_norm[:, :, h0:h1, w0:w1] += 1 outputs = 1.0 * outputs / count_norm outputs = outputs[:, :, :height, :width] with fluid.dygraph.guard(): outputs = fluid.dygraph.to_variable(outputs) outputs = fluid.layers.resize_bilinear(outputs, out_shape=[h, w]) score = outputs.numpy()[0] scores += score else: # taking the original image as the model input loss, pred = exe.run(test_prog, feed={ 'image': image_org, 'label': label, 'mask': mask }, fetch_list=fetch_list, return_numpy=True) scores = pred[0] # computing IoU with all scale result pred = np.argmax(scores, axis=0).astype('int64') pred = pred[np.newaxis, :, :, np.newaxis] step += 1 num_images += pred.shape[0] conf_mat.calculate(pred, label, mask) _, iou = conf_mat.mean_iou() _, acc = conf_mat.accuracy() print("[EVAL] step={}/{} acc={:.4f} IoU={:.4f}".format( step, all_step, acc, iou)) category_iou, avg_iou = conf_mat.mean_iou() category_acc, avg_acc = conf_mat.accuracy() print("[EVAL] #image={} acc={:.4f} IoU={:.4f}".format( num_images, avg_acc, avg_iou)) print("[EVAL] Category IoU:", category_iou) print("[EVAL] Category Acc:", category_acc) print("[EVAL] Kappa:{:.4f}".format(conf_mat.kappa())) print("flip = ", flip) print("scales = ", scales) return category_iou, avg_iou, category_acc, avg_acc