def predict(self, dynamic): fluid.enable_dygraph(self.device) if dynamic else None model = LeNet() model.prepare(inputs=self.inputs) model.load(self.weight_path) output = model.predict(self.test_dataset, batch_size=64, stack_outputs=True) np.testing.assert_equal(output[0].shape[0], len(self.test_dataset)) acc = compute_acc(output[0], self.val_dataset.labels) np.testing.assert_allclose(acc, self.acc1) sampler = DistributedBatchSampler(self.test_dataset, batch_size=64, shuffle=False) test_loader = fluid.io.DataLoader(self.test_dataset, batch_sampler=sampler, places=self.device, return_list=True) model.evaluate(test_loader) fluid.disable_dygraph() if dynamic else None
def fit(self, dynamic): fluid.enable_dygraph(self.device) if dynamic else None seed = 333 fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed model = LeNet() optim_new = fluid.optimizer.Adam(learning_rate=0.001, parameter_list=model.parameters()) model.prepare(optim_new, loss_function=CrossEntropy(average=False), metrics=Accuracy(), inputs=self.inputs, labels=self.labels) model.fit(self.train_dataset, batch_size=64, shuffle=False) result = model.evaluate(self.val_dataset, batch_size=64) np.testing.assert_allclose(result['acc'], self.acc1) train_sampler = DistributedBatchSampler(self.train_dataset, batch_size=64, shuffle=False) val_sampler = DistributedBatchSampler(self.val_dataset, batch_size=64, shuffle=False) train_loader = fluid.io.DataLoader(self.train_dataset, batch_sampler=train_sampler, places=self.device, return_list=True) val_loader = fluid.io.DataLoader(self.val_dataset, batch_sampler=val_sampler, places=self.device, return_list=True) model.fit(train_loader, val_loader) fluid.disable_dygraph() if dynamic else None
def __init__(self, dataset, batch_size, is_train, num_workers=4): self.dataset = DictDataset(dataset) place = paddle.fluid.CUDAPlace(ParallelEnv().dev_id) \ if ParallelEnv().nranks > 1 else paddle.fluid.CUDAPlace(0) sampler = DistributedBatchSampler( self.dataset, batch_size=batch_size, shuffle=True if is_train else False, drop_last=True if is_train else False) self.dataloader = paddle.io.DataLoader(self.dataset, batch_sampler=sampler, places=place, num_workers=num_workers) self.batch_size = batch_size
def __init__(self, args, place, phase="train", shuffle=False, num_workers=0, drop_last=False): assert phase in [ "train", "test", "predict" ], "phase should be in [train, test, predict], but get %s" % phase if phase == "train": file_name = args.train_file elif phase == "test": file_name = args.test_file elif phase == "predict": file_name = args.predict_file self.dataset = LacDataset(args) self.dataset.file_reader(file_name, phase=phase) if phase == "train": self.sampler = DistributedBatchSampler(dataset=self.dataset, batch_size=args.batch_size, shuffle=shuffle, drop_last=drop_last) else: self.sampler = BatchSampler(dataset=self.dataset, batch_size=args.batch_size, shuffle=shuffle, drop_last=drop_last) self.dataloader = DataLoader(dataset=self.dataset, batch_sampler=self.sampler, places=place, collate_fn=partial( create_lexnet_data_generator, args, phase=phase), num_workers=num_workers, return_list=True)
def main(): BATCH_SIZE = 1 place = fluid.CPUPlace() dataset_obj = SRFolderDataset(lq_folder=lq_folder, gt_folder=gt_folder, pipeline=train_pipeline, scale=4) with fluid.dygraph.guard(place): train_sampler = DistributedBatchSampler( dataset_obj, batch_size=BATCH_SIZE, shuffle=True, drop_last=True) train_loader = DataLoader( dataset_obj, batch_sampler=train_sampler, places=place, num_workers=4, return_list=True) for batch_id, data in enumerate(train_loader): imwrite(var2img(data[1]), "./test/niu.png") break
def evaluate(self, dynamic): fluid.enable_dygraph(self.device) if dynamic else None model = LeNet() model.prepare(metrics=Accuracy(), inputs=self.inputs, labels=self.labels) model.load(self.weight_path) result = model.evaluate(self.val_dataset, batch_size=64) np.testing.assert_allclose(result['acc'], self.acc1) sampler = DistributedBatchSampler(self.val_dataset, batch_size=64, shuffle=False) val_loader = fluid.io.DataLoader(self.val_dataset, batch_sampler=sampler, places=self.device, return_list=True) model.evaluate(val_loader) fluid.disable_dygraph() if dynamic else None
def train(model, train_dataset, places=None, eval_dataset=None, optimizer=None, save_dir='output', iters=10000, batch_size=2, resume_model=None, save_interval_iters=1000, log_iters=10, num_classes=None, num_workers=8, use_vdl=False): ignore_index = model.ignore_index nranks = ParallelEnv().nranks start_iter = 0 if resume_model is not None: start_iter = resume(model, optimizer, resume_model) if not os.path.isdir(save_dir): if os.path.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) if nranks > 1: strategy = fluid.dygraph.prepare_context() ddp_model = fluid.dygraph.DataParallel(model, strategy) batch_sampler = DistributedBatchSampler(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) loader = DataLoader( train_dataset, batch_sampler=batch_sampler, places=places, num_workers=num_workers, return_list=True, ) if use_vdl: from visualdl import LogWriter log_writer = LogWriter(save_dir) timer = Timer() avg_loss = 0.0 iters_per_epoch = len(batch_sampler) best_mean_iou = -1.0 best_model_iter = -1 train_reader_cost = 0.0 train_batch_cost = 0.0 timer.start() iter = 0 while iter < iters: for data in loader: iter += 1 if iter > iters: break train_reader_cost += timer.elapsed_time() images = data[0] labels = data[1].astype('int64') if nranks > 1: loss = ddp_model(images, labels) # apply_collective_grads sum grads over multiple gpus. loss = ddp_model.scale_loss(loss) loss.backward() ddp_model.apply_collective_grads() else: loss = model(images, labels) loss.backward() optimizer.minimize(loss) model.clear_gradients() avg_loss += loss.numpy()[0] lr = optimizer.current_step_lr() train_batch_cost += timer.elapsed_time() if (iter) % log_iters == 0 and ParallelEnv().local_rank == 0: avg_loss /= log_iters avg_train_reader_cost = train_reader_cost / log_iters avg_train_batch_cost = train_batch_cost / log_iters train_reader_cost = 0.0 train_batch_cost = 0.0 remain_iters = iters - iter eta = calculate_eta(remain_iters, avg_train_batch_cost) logger.info( "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}" .format((iter - 1) // iters_per_epoch + 1, iter, iters, avg_loss * nranks, lr, avg_train_batch_cost, avg_train_reader_cost, eta)) if use_vdl: log_writer.add_scalar('Train/loss', avg_loss * nranks, iter) log_writer.add_scalar('Train/lr', lr, iter) log_writer.add_scalar('Train/batch_cost', avg_train_batch_cost, iter) log_writer.add_scalar('Train/reader_cost', avg_train_reader_cost, iter) avg_loss = 0.0 if (iter % save_interval_iters == 0 or iter == iters) and ParallelEnv().local_rank == 0: current_save_dir = os.path.join(save_dir, "iter_{}".format(iter)) if not os.path.isdir(current_save_dir): os.makedirs(current_save_dir) fluid.save_dygraph(model.state_dict(), os.path.join(current_save_dir, 'model')) fluid.save_dygraph(optimizer.state_dict(), os.path.join(current_save_dir, 'model')) if eval_dataset is not None: mean_iou, avg_acc = evaluate(model, eval_dataset, model_dir=current_save_dir, num_classes=num_classes, ignore_index=ignore_index, iter_id=iter) if mean_iou > best_mean_iou: best_mean_iou = mean_iou best_model_iter = iter best_model_dir = os.path.join(save_dir, "best_model") fluid.save_dygraph( model.state_dict(), os.path.join(best_model_dir, 'model')) logger.info( 'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}' .format(best_model_iter, best_mean_iou)) if use_vdl: log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter) log_writer.add_scalar('Evaluate/aAcc', avg_acc, iter) model.train() timer.restart() if use_vdl: log_writer.close()
def main(): device = set_device(FLAGS.device) fluid.enable_dygraph(device) if FLAGS.dynamic else None inputs = [ Input( [None, 1], 'int64', name='img_id'), Input( [None, 2], 'int32', name='img_shape'), Input( [None, 3, None, None], 'float32', name='image') ] labels = [ Input( [None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'), Input( [None, NUM_MAX_BOXES], 'int32', name='gt_label'), Input( [None, NUM_MAX_BOXES], 'float32', name='gt_score') ] if not FLAGS.eval_only: # training mode train_transform = Compose([ ColorDistort(), RandomExpand(), RandomCrop(), RandomFlip(), NormalizeBox(), PadBox(), BboxXYXY2XYWH() ]) train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()]) dataset = COCODataset( dataset_dir=FLAGS.data, anno_path='annotations/instances_train2017.json', image_dir='train2017', with_background=False, mixup=True, transform=train_transform) batch_sampler = DistributedBatchSampler( dataset, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) loader = DataLoader( dataset, batch_sampler=batch_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=train_collate_fn) else: # evaluation mode eval_transform = Compose([ ResizeImage(target_size=608), NormalizeBox(), PadBox(), BboxXYXY2XYWH() ]) eval_collate_fn = BatchCompose([NormalizeImage()]) dataset = COCODataset( dataset_dir=FLAGS.data, anno_path='annotations/instances_val2017.json', image_dir='val2017', with_background=False, transform=eval_transform) # batch_size can only be 1 in evaluation for YOLOv3 # prediction bbox is a LoDTensor batch_sampler = DistributedBatchSampler( dataset, batch_size=1, shuffle=False, drop_last=False) loader = DataLoader( dataset, batch_sampler=batch_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=eval_collate_fn) pretrained = FLAGS.eval_only and FLAGS.weights is None model = yolov3_darknet53( num_classes=dataset.num_classes, model_mode='eval' if FLAGS.eval_only else 'train', pretrained=pretrained) if FLAGS.pretrain_weights and not FLAGS.eval_only: model.load( FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True) optim = make_optimizer( len(batch_sampler), parameter_list=model.parameters()) model.prepare( optim, YoloLoss(num_classes=dataset.num_classes), inputs=inputs, labels=labels, device=FLAGS.device) # NOTE: we implement COCO metric of YOLOv3 model here, separately # from 'prepare' and 'fit' framework for follwing reason: # 1. YOLOv3 network structure is different between 'train' and # 'eval' mode, in 'eval' mode, output prediction bbox is not the # feature map used for YoloLoss calculating # 2. COCO metric behavior is also different from defined Metric # for COCO metric should not perform accumulate in each iteration # but only accumulate at the end of an epoch if FLAGS.eval_only: if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) preds = model.predict(loader, stack_outputs=False) _, _, _, img_ids, bboxes = preds anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json') coco_metric = COCOMetric(anno_path=anno_path, with_background=False) for img_id, bbox in zip(img_ids, bboxes): coco_metric.update(img_id, bbox) coco_metric.accumulate() coco_metric.reset() return if FLAGS.resume is not None: model.load(FLAGS.resume) save_dir = FLAGS.save_dir or 'yolo_checkpoint' model.fit(train_data=loader, epochs=FLAGS.epoch - FLAGS.no_mixup_epoch, save_dir=os.path.join(save_dir, "mixup"), save_freq=10) # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches dataset.mixup = False model.fit(train_data=loader, epochs=FLAGS.no_mixup_epoch, save_dir=os.path.join(save_dir, "no_mixup"), save_freq=5)
def train(model, train_dataset, places=None, eval_dataset=None, optimizer=None, save_dir='output', num_epochs=100, batch_size=2, pretrained_model=None, resume_model=None, save_interval_epochs=1, log_steps=10, num_classes=None, num_workers=8, use_vdl=False): ignore_index = model.ignore_index nranks = ParallelEnv().nranks start_epoch = 0 if resume_model is not None: start_epoch = resume(model, optimizer, resume_model) elif pretrained_model is not None: load_pretrained_model(model, pretrained_model) if not os.path.isdir(save_dir): if os.path.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) if nranks > 1: strategy = fluid.dygraph.prepare_context() ddp_model = fluid.dygraph.DataParallel(model, strategy) batch_sampler = DistributedBatchSampler(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) loader = DataLoader( train_dataset, batch_sampler=batch_sampler, places=places, num_workers=num_workers, return_list=True, ) if use_vdl: from visualdl import LogWriter log_writer = LogWriter(save_dir) timer = Timer() avg_loss = 0.0 steps_per_epoch = len(batch_sampler) total_steps = steps_per_epoch * (num_epochs - start_epoch) num_steps = 0 best_mean_iou = -1.0 best_model_epoch = -1 train_reader_cost = 0.0 train_batch_cost = 0.0 for epoch in range(start_epoch, num_epochs): timer.start() for step, data in enumerate(loader): train_reader_cost += timer.elapsed_time() images = data[0] labels = data[1].astype('int64') if nranks > 1: loss = ddp_model(images, labels) # apply_collective_grads sum grads over multiple gpus. loss = ddp_model.scale_loss(loss) loss.backward() ddp_model.apply_collective_grads() else: loss = model(images, labels) loss.backward() optimizer.minimize(loss) model.clear_gradients() avg_loss += loss.numpy()[0] lr = optimizer.current_step_lr() num_steps += 1 train_batch_cost += timer.elapsed_time() if num_steps % log_steps == 0 and ParallelEnv().local_rank == 0: avg_loss /= log_steps avg_train_reader_cost = train_reader_cost / log_steps avg_train_batch_cost = train_batch_cost / log_steps train_reader_cost = 0.0 train_batch_cost = 0.0 remain_steps = total_steps - num_steps eta = calculate_eta(remain_steps, avg_train_batch_cost) logging.info( "[TRAIN] Epoch={}/{}, Step={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}" .format(epoch + 1, num_epochs, step + 1, steps_per_epoch, avg_loss * nranks, lr, avg_train_batch_cost, avg_train_reader_cost, eta)) if use_vdl: log_writer.add_scalar('Train/loss', avg_loss * nranks, num_steps) log_writer.add_scalar('Train/lr', lr, num_steps) log_writer.add_scalar('Train/batch_cost', avg_train_batch_cost, num_steps) log_writer.add_scalar('Train/reader_cost', avg_train_reader_cost, num_steps) avg_loss = 0.0 timer.restart() if ((epoch + 1) % save_interval_epochs == 0 or epoch + 1 == num_epochs) and ParallelEnv().local_rank == 0: current_save_dir = os.path.join(save_dir, "epoch_{}".format(epoch + 1)) if not os.path.isdir(current_save_dir): os.makedirs(current_save_dir) fluid.save_dygraph(model.state_dict(), os.path.join(current_save_dir, 'model')) fluid.save_dygraph(optimizer.state_dict(), os.path.join(current_save_dir, 'model')) if eval_dataset is not None: mean_iou, avg_acc = evaluate(model, eval_dataset, model_dir=current_save_dir, num_classes=num_classes, ignore_index=ignore_index, epoch_id=epoch + 1) if mean_iou > best_mean_iou: best_mean_iou = mean_iou best_model_epoch = epoch + 1 best_model_dir = os.path.join(save_dir, "best_model") fluid.save_dygraph(model.state_dict(), os.path.join(best_model_dir, 'model')) logging.info( 'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}' .format(best_model_epoch, best_mean_iou)) if use_vdl: log_writer.add_scalar('Evaluate/mIoU', mean_iou, epoch + 1) log_writer.add_scalar('Evaluate/aAcc', avg_acc, epoch + 1) model.train() if use_vdl: log_writer.close()