Example #1
0
def main():
    paddle.enable_static() if FLAGS.static else None
    device = paddle.set_device(FLAGS.device)

    model_list = [x for x in models.__dict__["__all__"]]
    assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format(
        model_list, FLAGS.arch)
    net = models.__dict__[FLAGS.arch](
        pretrained=FLAGS.eval_only and not FLAGS.resume)

    inputs = [Input([None, 3, 224, 224], 'float32', name='image')]
    labels = [Input([None, 1], 'int64', name='label')]

    model = paddle.Model(net, inputs, labels)

    if FLAGS.resume is not None:
        model.load(FLAGS.resume)

    train_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'train'),
                                    mode='train',
                                    image_size=FLAGS.image_size,
                                    resize_short_size=FLAGS.resize_short_size)

    val_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'val'),
                                  mode='val',
                                  image_size=FLAGS.image_size,
                                  resize_short_size=FLAGS.resize_short_size)

    optim = make_optimizer(np.ceil(
        len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks),
                           parameter_list=model.parameters())

    model.prepare(optim, paddle.nn.CrossEntropyLoss(),
                  paddle.metric.Accuracy(topk=(1, 5)))

    if FLAGS.eval_only:
        model.evaluate(val_dataset,
                       batch_size=FLAGS.batch_size,
                       num_workers=FLAGS.num_workers)
        return

    output_dir = os.path.join(
        FLAGS.output_dir, FLAGS.arch,
        time.strftime('%Y-%m-%d-%H-%M', time.localtime()))
    if ParallelEnv().local_rank == 0 and not os.path.exists(output_dir):
        os.makedirs(output_dir)

    model.fit(train_dataset,
              val_dataset,
              batch_size=FLAGS.batch_size,
              epochs=FLAGS.epoch,
              save_dir=output_dir,
              num_workers=FLAGS.num_workers)
Example #2
0
def main():
    device = set_device(FLAGS.device)
    fluid.enable_dygraph(device) if FLAGS.dynamic else None

    model = models.__dict__[FLAGS.arch](pretrained=FLAGS.eval_only and
                                        not FLAGS.resume)

    if FLAGS.resume is not None:
        model.load(FLAGS.resume)

    inputs = [Input([None, 3, 224, 224], 'float32', name='image')]
    labels = [Input([None, 1], 'int64', name='label')]

    train_dataset = ImageNetDataset(
        os.path.join(FLAGS.data, 'train'), mode='train')
    val_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'val'), mode='val')

    optim = make_optimizer(
        np.ceil(
            len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks),
        parameter_list=model.parameters())

    model.prepare(optim, CrossEntropy(), Accuracy(topk=(1, 5)), inputs, labels)

    if FLAGS.eval_only:
        model.evaluate(
            val_dataset,
            batch_size=FLAGS.batch_size,
            num_workers=FLAGS.num_workers)
        return

    output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch,
                              time.strftime('%Y-%m-%d-%H-%M',
                                            time.localtime()))
    if ParallelEnv().local_rank == 0 and not os.path.exists(output_dir):
        os.makedirs(output_dir)

    model.fit(train_dataset,
              val_dataset,
              batch_size=FLAGS.batch_size,
              epochs=FLAGS.epoch,
              save_dir=output_dir,
              num_workers=FLAGS.num_workers)
Example #3
0
def main():
    # 1 load model
    model_list = [x for x in models.__dict__["__all__"]]
    assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format(
        model_list, FLAGS.arch)
    fp32_model = models.__dict__[FLAGS.arch](pretrained=True)
    fp32_model.eval()
    for name, layer in fp32_model.named_sublayers():
        print(name, layer)
    count = 0
    fuse_list = []
    for name, layer in fp32_model.named_sublayers():
        if isinstance(layer, nn.Conv2D):
            fuse_list.append([name])
        if isinstance(layer, nn.BatchNorm2D):
            fuse_list[count].append(name)
            count += 1
    if FLAGS.arch == 'resnet50':
        fuse_list = None
    val_dataset = ImageNetDataset(FLAGS.data, mode='val')

    # 2 quantizations
    ptq = PTQ()
    quant_model = ptq.quantize(fp32_model,
                               fuse=FLAGS.fuse,
                               fuse_list=fuse_list)

    print("Calibrate")
    calibrate(quant_model, val_dataset, FLAGS.quant_batch_num,
              FLAGS.quant_batch_size)

    # 3 save
    quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "int8_infer",
                                    "model")
    input_spec = paddle.static.InputSpec(shape=[None, 3, 224, 224],
                                         dtype='float32')
    ptq.save_quantized_model(quant_model, quant_output_dir, [input_spec])

    fp32_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "fp32_infer",
                                   "model")
    paddle.jit.save(fp32_model, fp32_output_dir, [input_spec])
Example #4
0
def train_mobilenet():
    if not args.use_gpu:
        place = fluid.CPUPlace()
    elif not args.use_data_parallel:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
    with fluid.dygraph.guard(place):
        # 1. init net and optimizer
        place_num = paddle.fluid.core.get_cuda_device_count(
        ) if args.use_gpu else int(os.environ.get('CPU_NUM', 1))
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        if args.model == "MobileNetV1":
            net = MobileNetV1(class_dim=args.class_dim, scale=1.0)
            model_path_pre = 'mobilenet_v1'
        elif args.model == "MobileNetV2":
            net = MobileNetV2(class_dim=args.class_dim, scale=1.0)
            model_path_pre = 'mobilenet_v2'
        else:
            print(
                "wrong model name, please try model = MobileNetV1 or MobileNetV2"
            )
            exit()

        optimizer = create_optimizer(args=args,
                                     parameter_list=net.parameters())
        if args.use_data_parallel:
            net = fluid.dygraph.parallel.DataParallel(net, strategy)

        # 2. load checkpoint
        if args.checkpoint:
            assert os.path.exists(args.checkpoint + ".pdparams"), \
                "Given dir {}.pdparams not exist.".format(args.checkpoint)
            assert os.path.exists(args.checkpoint + ".pdopt"), \
                "Given dir {}.pdopt not exist.".format(args.checkpoint)
            para_dict, opti_dict = fluid.dygraph.load_dygraph(args.checkpoint)
            net.set_dict(para_dict)
            optimizer.set_dict(opti_dict)

        # 3. reader
        test_data_loader = utility.create_data_loader(is_train=False,
                                                      args=args)
        num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
        imagenet_reader = reader.ImageNetReader(seed=0, place_num=place_num)

        train_dataset = ImageNetDataset(os.path.join(args.data_dir, "train"),
                                        mode='train')

        train_data_loader = DataLoader(train_dataset,
                                       batch_size=args.batch_size,
                                       places=place,
                                       shuffle=True,
                                       drop_last=True,
                                       num_workers=10)

        test_dataset = ImageNetDataset(os.path.join(args.data_dir, "val"),
                                       mode='val')

        test_data_loader = DataLoader(test_dataset,
                                      batch_size=args.batch_size,
                                      places=place,
                                      shuffle=True,
                                      drop_last=True,
                                      num_workers=1)

        # 4. train loop
        total_batch_num = 0  #this is for benchmark
        for eop in range(args.num_epochs):
            epoch_start = time.time()

            if num_trainers > 1:
                imagenet_reader.set_shuffle_seed(
                    eop + (args.random_seed if args.random_seed else 0))

            net.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0
            batch_id = 0
            t_last = 0

            # 4.1 for each batch, call net() , backward(), and minimize()
            batch_cost_avg = TimeCostAverage()
            batch_reader_avg = TimeCostAverage()
            batch_net_avg = TimeCostAverage()
            batch_backward_avg = TimeCostAverage()
            batch_start = time.time()
            for img, label in train_data_loader():
                if args.max_iter and total_batch_num == args.max_iter:
                    return
                batch_reader_end = time.time()

                # 4.1.1 call net()
                out = net(img)
                softmax_out = fluid.layers.softmax(out, use_cudnn=False)
                loss = fluid.layers.cross_entropy(input=softmax_out,
                                                  label=label)
                avg_loss = fluid.layers.mean(x=loss)
                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
                batch_net_end = time.time()

                # 4.1.2 call backward()
                if args.use_data_parallel:
                    avg_loss = net.scale_loss(avg_loss)
                    avg_loss.backward()
                    net.apply_collective_grads()
                else:
                    avg_loss.backward()
                batch_backward_end = time.time()

                # 4.1.3 call minimize()
                optimizer.minimize(avg_loss)

                net.clear_gradients()
                t2 = time.time()

                avg_loss_value = avg_loss.numpy()
                acc_top1_value = acc_top1.numpy()
                acc_top5_value = acc_top5.numpy()

                total_loss += avg_loss_value
                total_acc1 += acc_top1_value
                total_acc5 += acc_top5_value

                total_sample += 1
                batch_id += 1

                # NOTE: used for benchmark
                train_batch_cost = time.time() - batch_start
                batch_cost_avg.record(train_batch_cost)
                batch_reader_avg.record(batch_reader_end - batch_start)
                batch_net_avg.record(batch_net_end - batch_reader_end)
                batch_backward_avg.record(batch_backward_end - batch_net_end)

                total_batch_num = total_batch_num + 1
                if batch_id % args.print_step == 0:
                    ips = float(args.batch_size) / batch_cost_avg.get_average()
                    print(
                        "[Epoch %d, batch %d], avg_loss %.5f, acc_top1 %.5f, acc_top5 %.5f, batch_cost: %.5f sec, net_cost: %.5f sec, backward_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f images/sec"
                        % (eop, batch_id, avg_loss_value, acc_top1_value,
                           acc_top5_value, batch_cost_avg.get_average(),
                           batch_net_avg.get_average(),
                           batch_backward_avg.get_average(),
                           batch_reader_avg.get_average(), ips))
                    sys.stdout.flush()
                    batch_cost_avg.reset()
                    batch_net_avg.reset()
                    batch_backward_avg.reset()
                    batch_reader_avg.reset()
                batch_start = time.time()

            if args.ce:
                print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
                print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample))
                print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample))

            train_epoch_cost = time.time() - epoch_start
            print(
                "[Epoch %d], loss %.5f, acc1 %.5f, acc5 %.5f, epoch_cost: %.5f s"
                % (eop, total_loss / total_sample, total_acc1 / total_sample,
                   total_acc5 / total_sample, train_epoch_cost))

            # 4.2 save checkpoint
            save_parameters = (not args.use_data_parallel) or (
                args.use_data_parallel
                and fluid.dygraph.parallel.Env().local_rank == 0)
            if save_parameters:
                if not os.path.isdir(args.model_save_dir):
                    os.makedirs(args.model_save_dir)
                model_path = os.path.join(
                    args.model_save_dir,
                    "_" + model_path_pre + "_epoch{}".format(eop))
                fluid.dygraph.save_dygraph(net.state_dict(), model_path)
                fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path)

            # 4.3 validation
            net.eval()
            eval(net, test_data_loader, eop)

        # 5. save final results
        save_parameters = (not args.use_data_parallel) or (
            args.use_data_parallel
            and fluid.dygraph.parallel.Env().local_rank == 0)
        if save_parameters:
            model_path = os.path.join(args.model_save_dir,
                                      "_" + model_path_pre + "_final")
            fluid.dygraph.save_dygraph(net.state_dict(), model_path)
Example #5
0
def main():
    # create model
    model_list = [x for x in models.__dict__["__all__"]]
    assert FLAGS.arch in model_list, \
        "Expected FLAGS.arch in {}, but received {}".format(
        model_list, FLAGS.arch)
    model = models.__dict__[FLAGS.arch](pretrained=not FLAGS.resume)

    # quantize model
    if FLAGS.enable_quant:
        if not FLAGS.use_naive_api:
            print("use slim api")
            quant_config = {
                'weight_quantize_type': FLAGS.weight_quantize_type,
            }
            dygraph_qat = QAT(quant_config)
        else:
            print("use navie api")
            dygraph_qat = ImperativeQuantAware(
                weight_quantize_type=FLAGS.weight_quantize_type, )
        dygraph_qat.quantize(model)

    # prepare
    model = paddle.Model(model)
    if FLAGS.resume is not None:
        print("Resume from " + FLAGS.resume)
        model.load(FLAGS.resume)

    train_dataset = ImageNetDataset(FLAGS.data, mode='train')
    val_dataset = ImageNetDataset(FLAGS.data, mode='val')

    optim = make_optimizer(
        np.ceil(
            float(len(train_dataset)) / FLAGS.batch_size /
            ParallelEnv().nranks),
        parameter_list=model.parameters())

    model.prepare(optim, paddle.nn.CrossEntropyLoss(), Accuracy(topk=(1, 5)))

    # test
    if FLAGS.eval_only:
        model.evaluate(
            val_dataset,
            batch_size=FLAGS.batch_size,
            num_workers=FLAGS.num_workers)
        return

    # train
    output_dir = os.path.join(FLAGS.output_dir, "checkpoint",
                              FLAGS.arch + "_checkpoint",
                              time.strftime('%Y-%m-%d-%H-%M', time.localtime()))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    model.fit(train_dataset,
              val_dataset,
              batch_size=FLAGS.batch_size,
              epochs=FLAGS.epoch,
              save_dir=output_dir,
              num_workers=FLAGS.num_workers)

    # save
    if FLAGS.enable_quant:
        quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "model")
        input_spec = paddle.static.InputSpec(
            shape=[None, 3, 224, 224], dtype='float32')
        dygraph_qat.save_quantized_model(model.network, quant_output_dir,
                                         [input_spec])
        print("save all checkpoints in " + output_dir)
        print("save quantized inference model in " + quant_output_dir)