예제 #1
0
def eval(args):
    model_list = [m for m in dir(models) if "__" not in m]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    assert os.path.isdir(
        args.pretrained_model
    ), "{} doesn't exist, please load right pretrained model path for eval".format(
        args.pretrained_model)

    assert args.image_shape[
        1] <= args.resize_short_size, "Please check the args:image_shape and args:resize_short_size, The croped size(image_shape[1]) must smaller than or equal to the resized length(resize_short_size) "

    # check gpu: when using gpu, the number of visible cards should divide batch size
    if args.use_gpu:
        assert args.batch_size % fluid.core.get_cuda_device_count(
        ) == 0, "please support correct batch_size({}), which can be divided by available cards({}), you can change the number of cards by indicating: export CUDA_VISIBLE_DEVICES= ".format(
            args.batch_size, fluid.core.get_cuda_device_count())
    image = fluid.data(name='image',
                       shape=[None] + args.image_shape,
                       dtype='float32')
    label = fluid.data(name='label', shape=[None, 1], dtype='int64')

    # model definition
    if args.model.startswith('EfficientNet'):
        model = models.__dict__[args.model](is_test=True,
                                            padding_type=args.padding_type,
                                            use_se=args.use_se)
    elif "ACNet" in args.model:
        model = models.__dict__[args.model](deploy=args.deploy)
    else:
        model = models.__dict__[args.model]()

    if args.model == "GoogLeNet":
        out0, out1, out2 = model.net(input=image, class_dim=args.class_dim)
        cost0 = fluid.layers.cross_entropy(input=out0, label=label)
        cost1 = fluid.layers.cross_entropy(input=out1, label=label)
        cost2 = fluid.layers.cross_entropy(input=out2, label=label)
        avg_cost0 = fluid.layers.mean(x=cost0)
        avg_cost1 = fluid.layers.mean(x=cost1)
        avg_cost2 = fluid.layers.mean(x=cost2)

        avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2
        acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5)
    else:
        out = model.net(input=image, class_dim=args.class_dim)

        cost, pred = fluid.layers.softmax_with_cross_entropy(
            out, label, return_softmax=True)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5)

    test_program = fluid.default_main_program().clone(for_test=True)

    fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))

    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())
    if args.use_gpu:
        places = fluid.framework.cuda_places()
    else:
        places = fluid.framework.cpu_places()
    compiled_program = fluid.compiler.CompiledProgram(
        test_program).with_data_parallel(places=places)

    fluid.io.load_persistables(exe, args.pretrained_model)
    imagenet_reader = reader.ImageNetReader()
    val_reader = imagenet_reader.val(settings=args)

    # set places to run on the multi-card
    feeder = fluid.DataFeeder(place=places, feed_list=[image, label])

    test_info = [[], [], []]
    cnt = 0
    parallel_data = []
    parallel_id = []
    place_num = paddle.fluid.core.get_cuda_device_count(
    ) if args.use_gpu else int(os.environ.get('CPU_NUM', 1))
    real_iter = 0
    info_dict = {}

    for batch_id, data in enumerate(val_reader()):
        #image data and label
        image_data = [items[0:2] for items in data]
        image_id = [items[2] for items in data]
        parallel_id.append(image_id)
        parallel_data.append(image_data)
        if place_num == len(parallel_data):
            t1 = time.time()
            loss_set, acc1_set, acc5_set = exe.run(
                compiled_program,
                fetch_list=fetch_list,
                feed=list(feeder.feed_parallel(parallel_data, place_num)))
            t2 = time.time()
            period = t2 - t1
            loss = np.mean(loss_set)
            acc1 = np.mean(acc1_set)
            acc5 = np.mean(acc5_set)
            test_info[0].append(loss * len(data))
            test_info[1].append(acc1 * len(data))
            test_info[2].append(acc5 * len(data))
            cnt += len(data)
            if batch_id % args.print_step == 0:
                info = "Testbatch {0},loss {1}, acc1 {2},acc5 {3},time {4}".format(real_iter, \
                  "%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5, \
                  "%2.2f sec" % period)
                logger.info(info)
                sys.stdout.flush()

            parallel_id = []
            parallel_data = []
            real_iter += 1

    test_loss = np.sum(test_info[0]) / cnt
    test_acc1 = np.sum(test_info[1]) / cnt
    test_acc5 = np.sum(test_info[2]) / cnt

    info = "Test_loss {0}, test_acc1 {1}, test_acc5 {2}".format(
        "%.5f" % test_loss, "%.5f" % test_acc1, "%.5f" % test_acc5)
    if args.save_json_path:
        info_dict = {
            "Test_loss": test_loss,
            "test_acc1": test_acc1,
            "test_acc5": test_acc5
        }
        save_json(info_dict, args.save_json_path)
    logger.info(info)
    sys.stdout.flush()
예제 #2
0
def train(args):
    """Train model
    
    Args:
        args: all arguments.    
    """
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    train_out = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)
    train_data_loader = train_out[-1]
    if args.use_ema:
        train_fetch_vars = train_out[:-2]
        ema = train_out[-2]
    else:
        train_fetch_vars = train_out[:-1]

    train_fetch_list = [var.name for var in train_fetch_vars]

    if args.validate:
        test_prog = fluid.Program()
        test_out = build_program(
            is_train=False,
            main_prog=test_prog,
            startup_prog=startup_prog,
            args=args)
        test_data_loader = test_out[-1]
        test_fetch_vars = test_out[:-1]

        test_fetch_list = [var.name for var in test_fetch_vars]

        #Create test_prog and set layers' is_test params to True
        test_prog = test_prog.clone(for_test=True)

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))

    #init model by checkpoint or pretrianed model.
    init_model(exe, args, train_prog)
    num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    if args.use_dali:
        import dali
        train_iter = dali.train(settings=args)
        if trainer_id == 0:
            test_iter = dali.val(settings=args)
    else:
        imagenet_reader = reader.ImageNetReader(0 if num_trainers > 1 else None)
        train_reader = imagenet_reader.train(settings=args)
        if args.use_gpu:
            if num_trainers <= 1:
                places = fluid.framework.cuda_places()
            else:
                places = place
        else:
            if num_trainers <= 1:
                places = fluid.framework.cpu_places()
            else:
                places = place

        train_data_loader.set_sample_list_generator(train_reader, places)

        if args.validate:
            test_reader = imagenet_reader.val(settings=args)
            test_data_loader.set_sample_list_generator(test_reader, places)

    compiled_train_prog = best_strategy_compiled(args, train_prog,
                                                 train_fetch_vars[0], exe)
    #NOTE: this for benchmark
    total_batch_num = 0
    for pass_id in range(args.num_epochs):
        if num_trainers > 1 and not args.use_dali:
            imagenet_reader.set_shuffle_seed(pass_id + (
                args.random_seed if args.random_seed else 0))
        train_batch_id = 0
        train_batch_time_record = []
        train_batch_metrics_record = []

        if not args.use_dali:
            train_iter = train_data_loader()
            if args.validate:
                test_iter = test_data_loader()

        t1 = time.time()
        for batch in train_iter:
            #NOTE: this is for benchmark
            if args.max_iter and total_batch_num == args.max_iter:
                return
            train_batch_metrics = exe.run(compiled_train_prog,
                                          feed=batch,
                                          fetch_list=train_fetch_list)
            t2 = time.time()
            train_batch_elapse = t2 - t1
            train_batch_time_record.append(train_batch_elapse)

            train_batch_metrics_avg = np.mean(
                np.array(train_batch_metrics), axis=1)
            train_batch_metrics_record.append(train_batch_metrics_avg)
            if trainer_id == 0:
                print_info("batch", train_batch_metrics_avg, train_batch_elapse,
                           pass_id, train_batch_id, args.print_step)
                sys.stdout.flush()
            train_batch_id += 1
            t1 = time.time()
            #NOTE: this for benchmark profiler
            total_batch_num = total_batch_num + 1
            if args.is_profiler and pass_id == 0 and train_batch_id == args.print_step:
                profiler.start_profiler("All")
            elif args.is_profiler and pass_id == 0 and train_batch_id == args.print_step + 5:
                profiler.stop_profiler("total", args.profiler_path)
                return

        if args.use_dali:
            train_iter.reset()

        if trainer_id == 0 and args.validate:
            if args.use_ema:
                logger.info('ExponentialMovingAverage validate start...')
                with ema.apply(exe):
                    validate(args, test_iter, exe, test_prog, test_fetch_list,
                             pass_id, train_batch_metrics_record,
                             compiled_train_prog)
                logger.info('ExponentialMovingAverage validate over!')

            validate(args, test_iter, exe, test_prog, test_fetch_list, pass_id,
                     train_batch_metrics_record, train_batch_time_record,
                     compiled_train_prog)

            if args.use_dali:
                test_iter.reset()

        if pass_id % args.save_step == 0:
            save_model(args, exe, train_prog, pass_id)
예제 #3
0
def train_resnet():
    epoch = args.epoch

    if not args.use_gpu:
        place = fluid.CPUPlace()
    elif not args.use_data_parallel:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)

    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        resnet = ResNet(class_dim=args.class_dim)
        optimizer = optimizer_setting(parameter_list=resnet.parameters())

        if args.use_data_parallel:
            resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy)

        if args.use_imagenet_data:
            imagenet_reader = reader.ImageNetReader(0)
            train_reader = imagenet_reader.train(settings=args)
        else:
            train_reader = paddle.batch(reader_decorator(
                paddle.dataset.flowers.train(use_xmap=True)),
                                        batch_size=batch_size,
                                        drop_last=True)

        if args.use_imagenet_data:
            test_reader = imagenet_reader.val(settings=args)
        else:
            test_reader = paddle.batch(reader_decorator(
                paddle.dataset.flowers.test(use_xmap=True)),
                                       batch_size=batch_size,
                                       drop_last=True)

        train_loader = fluid.io.DataLoader.from_generator(
            capacity=32,
            use_double_buffer=True,
            iterable=True,
            return_list=True,
            use_multiprocess=True)
        train_loader.set_sample_list_generator(train_reader, places=place)

        test_loader = fluid.io.DataLoader.from_generator(
            capacity=64,
            use_double_buffer=True,
            iterable=True,
            return_list=True,
            use_multiprocess=True)
        test_loader.set_sample_list_generator(test_reader, places=place)

        #NOTE: used in benchmark
        total_batch_num = 0

        for eop in range(epoch):
            epoch_start = time.time()

            resnet.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0

            train_batch_cost_avg = TimeCostAverage()
            train_reader_cost_avg = TimeCostAverage()
            batch_start = time.time()
            for batch_id, data in enumerate(train_loader()):
                #NOTE: used in benchmark
                if args.max_iter and total_batch_num == args.max_iter:
                    return

                train_reader_cost = time.time() - batch_start

                img, label = data
                label.stop_gradient = True

                out = resnet(img)
                loss = fluid.layers.cross_entropy(input=out, label=label)
                avg_loss = fluid.layers.mean(x=loss)

                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

                dy_out = avg_loss.numpy()

                if args.use_data_parallel:
                    avg_loss = resnet.scale_loss(avg_loss)
                    avg_loss.backward()
                    resnet.apply_collective_grads()
                else:
                    avg_loss.backward()

                optimizer.minimize(avg_loss)
                resnet.clear_gradients()

                total_loss += dy_out
                total_acc1 += acc_top1.numpy()
                total_acc5 += acc_top5.numpy()
                total_sample += 1

                train_batch_cost = time.time() - batch_start
                train_batch_cost_avg.record(train_batch_cost)
                train_reader_cost_avg.record(train_reader_cost)

                total_batch_num = total_batch_num + 1  #this is for benchmark
                if batch_id % 10 == 0:
                    ips = float(
                        args.batch_size) / train_batch_cost_avg.get_average()
                    print(
                        "[Epoch %d, batch %d] loss: %.5f, acc1: %.5f, acc5: %.5f, batch_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f images/sec"
                        % (eop, batch_id, total_loss / total_sample,
                           total_acc1 / total_sample, total_acc5 /
                           total_sample, train_batch_cost_avg.get_average(),
                           train_reader_cost_avg.get_average(), ips))
                    train_batch_cost_avg.reset()
                    train_reader_cost_avg.reset()
                batch_start = time.time()

            if args.ce:
                print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
                print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample))
                print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample))

            train_epoch_cost = time.time() - epoch_start
            print(
                "[Epoch %d], loss %.5f, acc1 %.5f, acc5 %.5f, epoch_cost: %.5f s"
                % (eop, total_loss / total_sample, total_acc1 / total_sample,
                   total_acc5 / total_sample, train_epoch_cost))

            resnet.eval()
            eval(resnet, test_loader)

            save_parameters = (not args.use_data_parallel) or (
                args.use_data_parallel
                and fluid.dygraph.parallel.Env().local_rank == 0)
            if save_parameters:
                fluid.save_dygraph(resnet.state_dict(), 'resnet_params')
예제 #4
0
파일: infer.py 프로젝트: yxl-0713/models
def infer(args):
    model_list = [m for m in dir(models) if "__" not in m]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    assert os.path.isdir(args.pretrained_model
                         ), "please load right pretrained model path for infer"

    assert args.image_shape[
        1] <= args.resize_short_size, "Please check the args:image_shape and args:resize_short_size, The croped size(image_shape[1]) must smaller than or equal to the resized length(resize_short_size) "

    if args.image_path:
        assert os.path.isfile(
            args.image_path
        ), "Please check the args:image_path, it should be a path to single image."
        if args.use_gpu:
            assert fluid.core.get_cuda_device_count(
            ) == 1, "please set \"export CUDA_VISIBLE_DEVICES=\" available single card"
        else:
            assert int(os.environ.get('CPU_NUM',
                                      1)) == 1, "please set CPU_NUM as 1"

    image = fluid.data(name='image',
                       shape=[None] + args.image_shape,
                       dtype='float32')

    if args.model.startswith('EfficientNet'):
        model = models.__dict__[args.model](is_test=True,
                                            padding_type=args.padding_type,
                                            use_se=args.use_se)
    else:
        model = models.__dict__[args.model]()

    if args.model == "GoogLeNet":
        out, _, _ = model.net(input=image, class_dim=args.class_dim)
    else:
        out = model.net(input=image, class_dim=args.class_dim)
        out = fluid.layers.softmax(out)

    test_program = fluid.default_main_program().clone(for_test=True)

    fetch_list = [out.name]
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
    if args.use_gpu:
        places = fluid.framework.cuda_places()
    else:
        places = fluid.framework.cpu_places()
    compiled_program = fluid.compiler.CompiledProgram(
        test_program).with_data_parallel(places=places)

    fluid.io.load_persistables(exe, args.pretrained_model)
    if args.save_inference:
        fluid.io.save_inference_model(dirname=args.model,
                                      feeded_var_names=['image'],
                                      main_program=test_program,
                                      target_vars=out,
                                      executor=exe,
                                      model_filename='model',
                                      params_filename='params')
        logger.info("model: {0} is already saved".format(args.model))
        exit(0)

    imagenet_reader = reader.ImageNetReader()
    test_reader = imagenet_reader.test(settings=args)
    feeder = fluid.DataFeeder(place=places, feed_list=[image])

    TOPK = args.topk
    if os.path.exists(args.class_map_path):
        logger.info(
            "The map of readable label and numerical label has been found!")
        with open(args.class_map_path) as f:
            label_dict = {}
            strinfo = re.compile(r"\d+ ")
            for item in f.readlines():
                key = item.split(" ")[0]
                value = [
                    strinfo.sub("", l).replace("\n", "")
                    for l in item.split(", ")
                ]
                label_dict[key] = value

    info = {}
    parallel_data = []
    parallel_id = []
    place_num = paddle.fluid.core.get_cuda_device_count(
    ) if args.use_gpu else int(os.environ.get('CPU_NUM', 1))
    if os.path.exists(args.save_json_path):
        logger.warning("path: {} Already exists! will recover it\n".format(
            args.save_json_path))
    with open(args.save_json_path, "w") as fout:
        for batch_id, data in enumerate(test_reader()):
            image_data = [[items[0]] for items in data]
            image_id = [items[1] for items in data]

            parallel_id.append(image_id)
            parallel_data.append(image_data)

            if place_num == len(parallel_data):
                result = exe.run(
                    compiled_program,
                    fetch_list=fetch_list,
                    feed=list(feeder.feed_parallel(parallel_data, place_num)))
                for i, res in enumerate(result[0]):
                    pred_label = np.argsort(res)[::-1][:TOPK]
                    real_id = str(np.array(parallel_id).flatten()[i])
                    _, real_id = os.path.split(real_id)

                    if os.path.exists(args.class_map_path):
                        readable_pred_label = []
                        for label in pred_label:
                            readable_pred_label.append(label_dict[str(label)])

                        info[real_id] = {}
                        info[real_id]['score'], info[real_id]['class'], info[
                            real_id]['class_name'] = str(res[pred_label]), str(
                                pred_label), readable_pred_label
                    else:
                        info[real_id] = {}
                        info[real_id]['score'], info[real_id]['class'] = str(
                            res[pred_label]), str(pred_label)

                    logger.info("{}, {}".format(real_id, info[real_id]))
                    sys.stdout.flush()
                    fout.write(real_id + "\t" + json.dumps(info[real_id]) +
                               "\n")

                parallel_data = []
                parallel_id = []

    os.remove(".tmp.txt")
예제 #5
0
def train_mobilenet():
    if not args.use_gpu:
        place = fluid.CPUPlace()
    elif not args.use_data_parallel:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
    with fluid.dygraph.guard(place):
        # 1. init net and optimizer
        place_num = paddle.fluid.core.get_cuda_device_count(
        ) if args.use_gpu else int(os.environ.get('CPU_NUM', 1))
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        if args.model == "MobileNetV1":
            net = MobileNetV1(class_dim=args.class_dim, scale=1.0)
            model_path_pre = 'mobilenet_v1'
        elif args.model == "MobileNetV2":
            net = MobileNetV2(class_dim=args.class_dim, scale=1.0)
            model_path_pre = 'mobilenet_v2'
        else:
            print(
                "wrong model name, please try model = MobileNetV1 or MobileNetV2"
            )
            exit()

        optimizer = create_optimizer(args=args,
                                     parameter_list=net.parameters())
        if args.use_data_parallel:
            net = fluid.dygraph.parallel.DataParallel(net, strategy)

        # 2. load checkpoint
        if args.checkpoint:
            assert os.path.exists(args.checkpoint + ".pdparams"), \
                "Given dir {}.pdparams not exist.".format(args.checkpoint)
            assert os.path.exists(args.checkpoint + ".pdopt"), \
                "Given dir {}.pdopt not exist.".format(args.checkpoint)
            para_dict, opti_dict = fluid.dygraph.load_dygraph(args.checkpoint)
            net.set_dict(para_dict)
            optimizer.set_dict(opti_dict)

        # 3. reader
        test_data_loader = utility.create_data_loader(is_train=False,
                                                      args=args)
        num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
        imagenet_reader = reader.ImageNetReader(seed=0, place_num=place_num)

        train_dataset = ImageNetDataset(os.path.join(args.data_dir, "train"),
                                        mode='train')

        train_data_loader = DataLoader(train_dataset,
                                       batch_size=args.batch_size,
                                       places=place,
                                       shuffle=True,
                                       drop_last=True,
                                       num_workers=10)

        test_dataset = ImageNetDataset(os.path.join(args.data_dir, "val"),
                                       mode='val')

        test_data_loader = DataLoader(test_dataset,
                                      batch_size=args.batch_size,
                                      places=place,
                                      shuffle=True,
                                      drop_last=True,
                                      num_workers=1)

        # 4. train loop
        total_batch_num = 0  #this is for benchmark
        for eop in range(args.num_epochs):
            epoch_start = time.time()

            if num_trainers > 1:
                imagenet_reader.set_shuffle_seed(
                    eop + (args.random_seed if args.random_seed else 0))

            net.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0
            batch_id = 0
            t_last = 0

            # 4.1 for each batch, call net() , backward(), and minimize()
            batch_cost_avg = TimeCostAverage()
            batch_reader_avg = TimeCostAverage()
            batch_net_avg = TimeCostAverage()
            batch_backward_avg = TimeCostAverage()
            batch_start = time.time()
            for img, label in train_data_loader():
                if args.max_iter and total_batch_num == args.max_iter:
                    return
                batch_reader_end = time.time()

                # 4.1.1 call net()
                out = net(img)
                softmax_out = fluid.layers.softmax(out, use_cudnn=False)
                loss = fluid.layers.cross_entropy(input=softmax_out,
                                                  label=label)
                avg_loss = fluid.layers.mean(x=loss)
                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
                batch_net_end = time.time()

                # 4.1.2 call backward()
                if args.use_data_parallel:
                    avg_loss = net.scale_loss(avg_loss)
                    avg_loss.backward()
                    net.apply_collective_grads()
                else:
                    avg_loss.backward()
                batch_backward_end = time.time()

                # 4.1.3 call minimize()
                optimizer.minimize(avg_loss)

                net.clear_gradients()
                t2 = time.time()

                avg_loss_value = avg_loss.numpy()
                acc_top1_value = acc_top1.numpy()
                acc_top5_value = acc_top5.numpy()

                total_loss += avg_loss_value
                total_acc1 += acc_top1_value
                total_acc5 += acc_top5_value

                total_sample += 1
                batch_id += 1

                # NOTE: used for benchmark
                train_batch_cost = time.time() - batch_start
                batch_cost_avg.record(train_batch_cost)
                batch_reader_avg.record(batch_reader_end - batch_start)
                batch_net_avg.record(batch_net_end - batch_reader_end)
                batch_backward_avg.record(batch_backward_end - batch_net_end)

                total_batch_num = total_batch_num + 1
                if batch_id % args.print_step == 0:
                    ips = float(args.batch_size) / batch_cost_avg.get_average()
                    print(
                        "[Epoch %d, batch %d], avg_loss %.5f, acc_top1 %.5f, acc_top5 %.5f, batch_cost: %.5f sec, net_cost: %.5f sec, backward_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f images/sec"
                        % (eop, batch_id, avg_loss_value, acc_top1_value,
                           acc_top5_value, batch_cost_avg.get_average(),
                           batch_net_avg.get_average(),
                           batch_backward_avg.get_average(),
                           batch_reader_avg.get_average(), ips))
                    sys.stdout.flush()
                    batch_cost_avg.reset()
                    batch_net_avg.reset()
                    batch_backward_avg.reset()
                    batch_reader_avg.reset()
                batch_start = time.time()

            if args.ce:
                print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
                print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample))
                print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample))

            train_epoch_cost = time.time() - epoch_start
            print(
                "[Epoch %d], loss %.5f, acc1 %.5f, acc5 %.5f, epoch_cost: %.5f s"
                % (eop, total_loss / total_sample, total_acc1 / total_sample,
                   total_acc5 / total_sample, train_epoch_cost))

            # 4.2 save checkpoint
            save_parameters = (not args.use_data_parallel) or (
                args.use_data_parallel
                and fluid.dygraph.parallel.Env().local_rank == 0)
            if save_parameters:
                if not os.path.isdir(args.model_save_dir):
                    os.makedirs(args.model_save_dir)
                model_path = os.path.join(
                    args.model_save_dir,
                    "_" + model_path_pre + "_epoch{}".format(eop))
                fluid.dygraph.save_dygraph(net.state_dict(), model_path)
                fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path)

            # 4.3 validation
            net.eval()
            eval(net, test_data_loader, eop)

        # 5. save final results
        save_parameters = (not args.use_data_parallel) or (
            args.use_data_parallel
            and fluid.dygraph.parallel.Env().local_rank == 0)
        if save_parameters:
            model_path = os.path.join(args.model_save_dir,
                                      "_" + model_path_pre + "_final")
            fluid.dygraph.save_dygraph(net.state_dict(), model_path)
예제 #6
0
def train_mobilenet():
    if not args.use_gpu:
        place = fluid.CPUPlace()
    elif not args.use_data_parallel:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
    with fluid.dygraph.guard(place):
        # 1. init net and optimizer
        place_num = paddle.fluid.core.get_cuda_device_count(
        ) if args.use_gpu else int(os.environ.get('CPU_NUM', 1))
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        if args.model == "MobileNetV1":
            net = MobileNetV1(class_dim=args.class_dim, scale=1.0)
            model_path_pre = 'mobilenet_v1'
        elif args.model == "MobileNetV2":
            net = MobileNetV2(class_dim=args.class_dim, scale=1.0)
            model_path_pre = 'mobilenet_v2'
        else:
            print(
                "wrong model name, please try model = MobileNetV1 or MobileNetV2"
            )
            exit()

        optimizer = create_optimizer(args=args,
                                     parameter_list=net.parameters())
        if args.use_data_parallel:
            net = fluid.dygraph.parallel.DataParallel(net, strategy)

        # 2. load checkpoint
        if args.checkpoint:
            assert os.path.exists(args.checkpoint + ".pdparams"), \
                "Given dir {}.pdparams not exist.".format(args.checkpoint)
            assert os.path.exists(args.checkpoint + ".pdopt"), \
                "Given dir {}.pdopt not exist.".format(args.checkpoint)
            para_dict, opti_dict = fluid.dygraph.load_dygraph(args.checkpoint)
            net.set_dict(para_dict)
            optimizer.set_dict(opti_dict)

        # 3. reader
        train_data_loader, train_data = utility.create_data_loader(
            is_train=True, args=args)
        test_data_loader, test_data = utility.create_data_loader(
            is_train=False, args=args)
        num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
        imagenet_reader = reader.ImageNetReader(seed=0, place_num=place_num)
        train_reader = imagenet_reader.train(settings=args)
        test_reader = imagenet_reader.val(settings=args)
        train_data_loader.set_sample_list_generator(train_reader, place)
        test_data_loader.set_sample_list_generator(test_reader, place)

        # 4. train loop
        for eop in range(args.num_epochs):
            if num_trainers > 1:
                imagenet_reader.set_shuffle_seed(
                    eop + (args.random_seed if args.random_seed else 0))
            net.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0
            batch_id = 0
            t_last = 0
            # 4.1 for each batch, call net() , backward(), and minimize()
            for img, label in train_data_loader():
                t1 = time.time()
                label = to_variable(label.numpy().astype('int64').reshape(
                    int(args.batch_size // place_num), 1))
                t_start = time.time()

                # 4.1.1 call net()
                out = net(img)

                t_end = time.time()
                softmax_out = fluid.layers.softmax(out, use_cudnn=False)
                loss = fluid.layers.cross_entropy(input=softmax_out,
                                                  label=label)
                avg_loss = fluid.layers.mean(x=loss)
                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
                t_start_back = time.time()

                # 4.1.2 call backward()
                if args.use_data_parallel:
                    avg_loss = net.scale_loss(avg_loss)
                    avg_loss.backward()
                    net.apply_collective_grads()
                else:
                    avg_loss.backward()

                t_end_back = time.time()

                # 4.1.3 call minimize()
                optimizer.minimize(avg_loss)

                net.clear_gradients()
                t2 = time.time()
                train_batch_elapse = t2 - t1
                if batch_id % args.print_step == 0:
                    print( "epoch id: %d, batch step: %d,  avg_loss %0.5f acc_top1 %0.5f acc_top5 %0.5f %2.4f sec net_t:%2.4f back_t:%2.4f read_t:%2.4f" % \
                            (eop, batch_id, avg_loss.numpy(), acc_top1.numpy(), acc_top5.numpy(), train_batch_elapse,
                              t_end - t_start, t_end_back - t_start_back,  t1 - t_last))
                    sys.stdout.flush()
                total_loss += avg_loss.numpy()
                total_acc1 += acc_top1.numpy()
                total_acc5 += acc_top5.numpy()
                total_sample += 1
                batch_id += 1
                t_last = time.time()
            if args.ce:
                print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
                print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample))
                print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample))
            print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f %2.4f sec" % \
                  (eop, batch_id, total_loss / total_sample, \
                   total_acc1 / total_sample, total_acc5 / total_sample, train_batch_elapse))

            # 4.2 save checkpoint
            save_parameters = (not args.use_data_parallel) or (
                args.use_data_parallel
                and fluid.dygraph.parallel.Env().local_rank == 0)
            if save_parameters:
                if not os.path.isdir(args.model_save_dir):
                    os.makedirs(args.model_save_dir)
                model_path = os.path.join(
                    args.model_save_dir,
                    "_" + model_path_pre + "_epoch{}".format(eop))
                fluid.dygraph.save_dygraph(net.state_dict(), model_path)
                fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path)

            # 4.3 validation
            net.eval()
            eval(net, test_data_loader, eop)

        # 5. save final results
        save_parameters = (not args.use_data_parallel) or (
            args.use_data_parallel
            and fluid.dygraph.parallel.Env().local_rank == 0)
        if save_parameters:
            model_path = os.path.join(args.model_save_dir,
                                      "_" + model_path_pre + "_final")
            fluid.dygraph.save_dygraph(net.state_dict(), model_path)