def main(): args = parse_args() if args.amp: import os os.environ['AMP'] = "1" cfg = get_config(args.config, overrides=args.override) _, world_size = get_dist_info() parallel = world_size != 1 if parallel: paddle.distributed.init_parallel_env() if args.test: test_model(cfg, weights=args.weights, parallel=parallel) elif args.train_dali: train_dali(cfg, weights=args.weights, parallel=parallel) elif args.multigrid: train_model_multigrid(cfg, world_size, validate=args.validate) else: train_model(cfg, weights=args.weights, parallel=parallel, validate=args.validate, fleet=args.fleet, amp=args.amp)
def __init__(self, data_size, batch_size, world_size, log_interval=1, **kwargs): self.data_size = data_size self.batch_size = batch_size _, self.world_size = get_dist_info() self.log_interval = log_interval
def get_acc(self, scores, labels, valid_mode): top1 = paddle.metric.accuracy(input=scores, label=labels, k=1) top5 = paddle.metric.accuracy(input=scores, label=labels, k=5) _, world_size = get_dist_info() #NOTE(shipping): deal with multi cards validate if world_size > 1 and valid_mode: #reduce sum when valid top1 = paddle.distributed.all_reduce( top1, op=paddle.distributed.ReduceOp.SUM) / world_size top5 = paddle.distributed.all_reduce( top5, op=paddle.distributed.ReduceOp.SUM) / world_size return top1, top5
def main(): args = parse_args() cfg = get_config(args.config, overrides=args.override) dataset = build_dataset((cfg.DATASET.test, cfg.PIPELINE.test)) _, world_size = get_dist_info() parallel = world_size != 1 if parallel: paddle.distributed.init_parallel_env() model = build_model(cfg.MODEL) test_model(model, dataset, cfg, args.weights, world_size)
def loss(self, scores, labels, reduce_sum=False, **kwargs): """Calculate the loss accroding to the model output ```scores```, and the target ```labels```. Args: scores (paddle.Tensor): The output of the model. labels (paddle.Tensor): The target output of the model. Returns: losses (dict): A dict containing field 'loss'(mandatory) and 'top1_acc', 'top5_acc'(optional). """ if len(labels) == 1: labels = labels[0] elif len(labels) == 3: labels_a, labels_b, lam = labels return self.mixup_loss(scores, labels_a, labels_b, lam) else: raise NotImplemented if self.ls_eps != 0.: labels = F.one_hot(labels, self.num_classes) labels = F.label_smooth(labels, epsilon=self.ls_eps) # reshape [bs, 1, num_classes] to [bs, num_classes] #NOTE: maybe squeeze is helpful for understanding. labels = paddle.reshape(labels, shape=[-1, self.num_classes]) #labels.stop_gradient = True #XXX(shipping): check necessary losses = dict() #NOTE(shipping): F.crossentropy include logsoftmax and nllloss ! #NOTE(shipping): check the performance of F.crossentropy loss = self.loss_func(scores, labels, **kwargs) avg_loss = paddle.mean(loss) top1 = paddle.metric.accuracy(input=scores, label=labels, k=1) top5 = paddle.metric.accuracy(input=scores, label=labels, k=5) _, world_size = get_dist_info() #NOTE(shipping): deal with multi cards validate if world_size > 1 and reduce_sum: top1 = paddle.distributed.all_reduce( top1, op=paddle.distributed.ReduceOp.SUM) / world_size top5 = paddle.distributed.all_reduce( top5, op=paddle.distributed.ReduceOp.SUM) / world_size losses['top1'] = top1 losses['top5'] = top5 losses['loss'] = avg_loss return losses
def main(): args = parse_args() cfg = get_config(args.config, overrides=args.override) _, world_size = get_dist_info() parallel = world_size != 1 if parallel: paddle.distributed.init_parallel_env() if args.test: test_model(cfg, weights=args.weights, parallel=parallel) else: train_model(cfg, weights=args.weights, parallel=parallel, validate=args.validate)