Ejemplo n.º 1
0
def main():
    args = parse_args()
    if args.amp:
        import os
        os.environ['AMP'] = "1"
    cfg = get_config(args.config, overrides=args.override)

    _, world_size = get_dist_info()
    parallel = world_size != 1
    if parallel:
        paddle.distributed.init_parallel_env()

    if args.test:
        test_model(cfg, weights=args.weights, parallel=parallel)
    elif args.train_dali:
        train_dali(cfg, weights=args.weights, parallel=parallel)
    elif args.multigrid:
        train_model_multigrid(cfg, world_size, validate=args.validate)
    else:
        train_model(cfg,
                    weights=args.weights,
                    parallel=parallel,
                    validate=args.validate,
                    fleet=args.fleet,
                    amp=args.amp)
Ejemplo n.º 2
0
 def __init__(self,
              data_size,
              batch_size,
              world_size,
              log_interval=1,
              **kwargs):
     self.data_size = data_size
     self.batch_size = batch_size
     _, self.world_size = get_dist_info()
     self.log_interval = log_interval
Ejemplo n.º 3
0
    def get_acc(self, scores, labels, valid_mode):
        top1 = paddle.metric.accuracy(input=scores, label=labels, k=1)
        top5 = paddle.metric.accuracy(input=scores, label=labels, k=5)
        _, world_size = get_dist_info()
        #NOTE(shipping): deal with multi cards validate
        if world_size > 1 and valid_mode:  #reduce sum when valid
            top1 = paddle.distributed.all_reduce(
                top1, op=paddle.distributed.ReduceOp.SUM) / world_size
            top5 = paddle.distributed.all_reduce(
                top5, op=paddle.distributed.ReduceOp.SUM) / world_size

        return top1, top5
Ejemplo n.º 4
0
def main():
    args = parse_args()
    cfg = get_config(args.config, overrides=args.override)

    dataset = build_dataset((cfg.DATASET.test, cfg.PIPELINE.test))
    _, world_size = get_dist_info()
    parallel = world_size != 1
    if parallel:
        paddle.distributed.init_parallel_env()

    model = build_model(cfg.MODEL)

    test_model(model, dataset, cfg, args.weights, world_size)
Ejemplo n.º 5
0
    def loss(self, scores, labels, reduce_sum=False, **kwargs):
        """Calculate the loss accroding to the model output ```scores```,
           and the target ```labels```.

        Args:
            scores (paddle.Tensor): The output of the model.
            labels (paddle.Tensor): The target output of the model.

        Returns:
            losses (dict): A dict containing field 'loss'(mandatory) and 'top1_acc', 'top5_acc'(optional).

        """
        if len(labels) == 1:
            labels = labels[0]
        elif len(labels) == 3:
            labels_a, labels_b, lam = labels
            return self.mixup_loss(scores, labels_a, labels_b, lam)
        else:
            raise NotImplemented

        if self.ls_eps != 0.:
            labels = F.one_hot(labels, self.num_classes)
            labels = F.label_smooth(labels, epsilon=self.ls_eps)
            # reshape [bs, 1, num_classes] to [bs, num_classes]
            #NOTE: maybe squeeze is helpful for understanding.
            labels = paddle.reshape(labels, shape=[-1, self.num_classes])
        #labels.stop_gradient = True  #XXX(shipping): check necessary
        losses = dict()
        #NOTE(shipping): F.crossentropy include logsoftmax and nllloss !
        #NOTE(shipping): check the performance of F.crossentropy
        loss = self.loss_func(scores, labels, **kwargs)
        avg_loss = paddle.mean(loss)
        top1 = paddle.metric.accuracy(input=scores, label=labels, k=1)
        top5 = paddle.metric.accuracy(input=scores, label=labels, k=5)

        _, world_size = get_dist_info()

        #NOTE(shipping): deal with multi cards validate
        if world_size > 1 and reduce_sum:
            top1 = paddle.distributed.all_reduce(
                top1, op=paddle.distributed.ReduceOp.SUM) / world_size
            top5 = paddle.distributed.all_reduce(
                top5, op=paddle.distributed.ReduceOp.SUM) / world_size

        losses['top1'] = top1
        losses['top5'] = top5
        losses['loss'] = avg_loss

        return losses
Ejemplo n.º 6
0
def main():
    args = parse_args()
    cfg = get_config(args.config, overrides=args.override)

    _, world_size = get_dist_info()
    parallel = world_size != 1
    if parallel:
        paddle.distributed.init_parallel_env()

    if args.test:
        test_model(cfg, weights=args.weights, parallel=parallel)
    else:
        train_model(cfg,
                    weights=args.weights,
                    parallel=parallel,
                    validate=args.validate)