def run_benchmark(model, args):
    if args.use_cprof:
        pr = cProfile.Profile()
        pr.enable()
    start_time = time.time()
    # Input data
    images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    # Train program
    predict = model(images)
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    # Evaluator
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=predict,
                                      label=label,
                                      total=batch_size_tensor)

    # inference program
    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    # Optimization
    opt = fluid.optimizer.AdamOptimizer(learning_rate=0.001,
                                        beta1=0.9,
                                        beta2=0.999)
    opt.minimize(avg_cost)

    fluid.memory_optimize(fluid.default_main_program())

    # Initialize executor
    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
    exe = fluid.Executor(place)

    # Parameter initialization
    exe.run(fluid.default_startup_program())

    # Reader
    train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                batch_size=args.batch_size)

    accuracy = fluid.average.WeightedAverage()
    for pass_id in range(args.pass_num):
        accuracy.reset()
        pass_start = time.time()
        every_pass_loss = []
        for batch_id, data in enumerate(train_reader()):
            img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
                                    data)).astype(DTYPE)
            y_data = np.array(map(lambda x: x[1], data)).astype("int64")
            y_data = y_data.reshape([len(y_data), 1])

            start = time.time()
            loss, acc, weight = exe.run(
                fluid.default_main_program(),
                feed={
                    "pixel": img_data,
                    "label": y_data
                },
                fetch_list=[avg_cost, batch_acc, batch_size_tensor]
            )  # The accuracy is the accumulation of batches, but not the current batch.
            end = time.time()
            accuracy.add(value=acc, weight=weight)
            every_pass_loss.append(loss)
            print("Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
                  (pass_id, batch_id, loss, acc))

        pass_end = time.time()

        train_avg_acc = accuracy.eval()
        train_avg_loss = np.mean(every_pass_loss)
        test_avg_acc = eval_test(exe, batch_acc, batch_size_tensor,
                                 inference_program)

        print(
            "pass=%d, train_avg_acc=%f,train_avg_loss=%f, test_avg_acc=%f, elapse=%f"
            % (pass_id, train_avg_acc, train_avg_loss, test_avg_acc,
               (pass_end - pass_start)))

        train_acc_kpi.add_record(np.array(train_avg_acc, dtype='float32'))
        train_cost_kpi.add_record(np.array(train_avg_loss, dtype='float32'))
        test_acc_kpi.add_record(np.array(test_avg_acc, dtype='float32'))
        train_duration_kpi.add_record(pass_end - pass_start)
Beispiel #2
0
def train_parallel_exe(args,
                       learning_rate,
                       batch_size,
                       num_passes,
                       init_model=None,
                       pretrained_model=None,
                       model_save_dir='model',
                       parallel=True,
                       use_nccl=True,
                       lr_strategy=None,
                       layers=50):
    class_dim = 1000
    image_shape = [3, 224, 224]

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    if args.model is 'se_resnext':
        out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
    else:
        out = mobile_net(img=image, class_dim=class_dim)

    cost = fluid.layers.cross_entropy(input=out, label=label)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    avg_cost = fluid.layers.mean(x=cost)

    test_program = fluid.default_main_program().clone(for_test=True)

    if "piecewise_decay" in lr_strategy:
        bd = lr_strategy["piecewise_decay"]["bd"]
        lr = lr_strategy["piecewise_decay"]["lr"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.piecewise_decay(boundaries=bd,
                                                       values=lr),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    elif "cosine_decay" in lr_strategy:
        step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"]
        epochs = lr_strategy["cosine_decay"]["epochs"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=cosine_decay(learning_rate=learning_rate,
                                       step_each_epoch=step_each_epoch,
                                       epochs=epochs),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    else:
        optimizer = fluid.optimizer.Momentum(
            learning_rate=learning_rate,
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))

    opts = optimizer.minimize(avg_cost)

    if args.with_mem_opt:
        fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    fluid.default_startup_program.random_seed = 1000
    exe.run(fluid.default_startup_program())

    if init_model is not None:
        fluid.io.load_persistables(exe, init_model)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    train_reader = paddle.batch(flowers.train(), batch_size=batch_size)
    test_reader = paddle.batch(flowers.test(), batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
    test_exe = fluid.ParallelExecutor(use_cuda=True,
                                      main_program=test_program,
                                      share_vars_from=train_exe)

    fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
    train_speed = []
    for pass_id in range(num_passes):
        train_info = [[], [], []]
        test_info = [[], [], []]
        pass_time = 0
        pass_num = 0
        pass_speed = 0.0
        for batch_id, data in enumerate(train_reader()):
            t1 = time.time()
            loss, acc1, acc5 = train_exe.run(fetch_list,
                                             feed=feeder.feed(data))
            t2 = time.time()
            period = t2 - t1
            pass_time += period
            pass_num += len(data)
            loss = np.mean(np.array(loss))
            acc1 = np.mean(np.array(acc1))
            acc5 = np.mean(np.array(acc5))
            train_info[0].append(loss)
            train_info[1].append(acc1)
            train_info[2].append(acc5)
            if batch_id % 10 == 0:
                print("Pass {0}, trainbatch {1}, loss {2}, \
                       acc1 {3}, acc5 {4} time {5}"
                                                   .format(pass_id, \
                       batch_id, loss, acc1, acc5, \
                       "%2.2f sec" % period))
                sys.stdout.flush()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        pass_speed = pass_num / pass_time
        train_speed.append(pass_speed)
        if pass_id == num_passes - 1:
            train_acc_top1_kpi.add_record(train_acc1)
            train_acc_top5_kpi.add_record(train_acc5)
            train_cost_kpi.add_record(train_loss)
            mean_pass_speed = np.array(pass_speed).mean()
            train_speed_kpi.add_record(mean_pass_speed)
        for data in test_reader():
            t1 = time.time()
            loss, acc1, acc5 = test_exe.run(fetch_list, feed=feeder.feed(data))
            t2 = time.time()
            period = t2 - t1
            loss = np.mean(np.array(loss))
            acc1 = np.mean(np.array(acc1))
            acc5 = np.mean(np.array(acc5))
            test_info[0].append(loss)
            test_info[1].append(acc1)
            test_info[2].append(acc5)
            if batch_id % 10 == 0:
                print("Pass {0},testbatch {1},loss {2}, \
                       acc1 {3},acc5 {4},time {5}"
                                                  .format(pass_id, \
                       batch_id, loss, acc1, acc5, \
                       "%2.2f sec" % period))
                sys.stdout.flush()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \
               test_loss {4}, test_acc1 {5}, test_acc5 {6}, pass_time {7}, train_speed {8}"
                                                           .format(pass_id, \
              train_loss, train_acc1, train_acc5, test_loss, test_acc1, \
              test_acc5, pass_time, pass_num / pass_time))
        sys.stdout.flush()
    train_acc_top1_kpi.persist()
    train_acc_top5_kpi.persist()
    train_cost_kpi.persist()
    train_speed_kpi.persist()
Beispiel #3
0
def parallel_exe(args,
                 train_file_list,
                 val_file_list,
                 data_args,
                 learning_rate,
                 batch_size,
                 num_passes,
                 model_save_dir='model',
                 pretrained_model=None):
    image_shape = [3, data_args.resize_h, data_args.resize_w]
    if data_args.dataset == 'coco':
        num_classes = 81
    elif data_args.dataset == 'pascalvoc':
        num_classes = 21

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    gt_box = fluid.layers.data(name='gt_box',
                               shape=[4],
                               dtype='float32',
                               lod_level=1)
    gt_label = fluid.layers.data(name='gt_label',
                                 shape=[1],
                                 dtype='int32',
                                 lod_level=1)
    difficult = fluid.layers.data(name='gt_difficult',
                                  shape=[1],
                                  dtype='int32',
                                  lod_level=1)

    locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
    nmsed_out = fluid.layers.detection_output(locs,
                                              confs,
                                              box,
                                              box_var,
                                              nms_threshold=0.45)
    loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var)
    loss = fluid.layers.reduce_sum(loss)

    test_program = fluid.default_main_program().clone(for_test=True)
    with fluid.program_guard(test_program):
        map_eval = fluid.evaluator.DetectionMAP(nmsed_out,
                                                gt_label,
                                                gt_box,
                                                difficult,
                                                num_classes,
                                                overlap_threshold=0.5,
                                                evaluate_difficult=False,
                                                ap_version=args.ap_version)

    if data_args.dataset == 'coco':
        # learning rate decay in 12, 19 pass, respectively
        if '2014' in train_file_list:
            epocs = 82783 / batch_size
            boundaries = [epocs * 12, epocs * 19]
        elif '2017' in train_file_list:
            epocs = 118287 / batch_size
            boundaries = [epcos * 12, epocs * 19]
    elif data_args.dataset == 'pascalvoc':
        epocs = 19200 / batch_size
        boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
    values = [
        learning_rate, learning_rate * 0.5, learning_rate * 0.25,
        learning_rate * 0.1, learning_rate * 0.01
    ]
    optimizer = fluid.optimizer.RMSProp(
        learning_rate=fluid.layers.piecewise_decay(boundaries, values),
        regularization=fluid.regularizer.L2Decay(0.00005),
    )

    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    fluid.default_startup_program.random_seed = 1000
    exe.run(fluid.default_startup_program())

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    if args.parallel:
        train_exe = fluid.ParallelExecutor(use_cuda=args.use_gpu,
                                           loss_name=loss.name)

    train_reader = paddle.batch(reader.train(data_args, train_file_list),
                                batch_size=batch_size)
    test_reader = paddle.batch(reader.test(data_args, val_file_list),
                               batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place,
                              feed_list=[image, gt_box, gt_label, difficult])

    def save_model(postfix):
        model_path = os.path.join(model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        print 'save models to %s' % (model_path)
        fluid.io.save_persistables(exe, model_path)

    best_map = 0.

    def test(pass_id, best_map):
        _, accum_map = map_eval.get_map_var()
        map_eval.reset(exe)
        test_map = None
        for data in test_reader():
            test_map = exe.run(test_program,
                               feed=feeder.feed(data),
                               fetch_list=[accum_map])
        if test_map[0] > best_map:
            best_map = test_map[0]
            save_model('best_model')
        print("Test {0}, map {1}".format(pass_id, test_map[0]))

    train_num = 0
    total_train_time = 0.0
    total_iters = 0
    for pass_id in range(num_passes):
        every_pass_loss = []
        iter = 0
        pass_duration = 0.0
        for batch_id, data in enumerate(train_reader()):
            batch_start = time.time()
            if iter == args.iterations:
                break
            if len(data) < devices_num: continue
            if args.parallel:
                loss_v, = train_exe.run(fetch_list=[loss.name],
                                        feed=feeder.feed(data))
            else:
                loss_v, = exe.run(fluid.default_main_program(),
                                  feed=feeder.feed(data),
                                  fetch_list=[loss])
            loss_v = np.mean(np.array(loss_v))
            if batch_id % 20 == 0:
                print("Pass {0}, batch {1}, loss {2}, time {3}".format(
                    pass_id, batch_id, loss_v,
                    time.time() - batch_start))
            if iter >= args.skip_batch_num or pass_id != 0:
                batch_duration = time.time() - batch_start
                pass_duration += batch_duration
                train_num += len(data)
            every_pass_loss.append(loss_v)
            iter += 1
            total_iters += 1
    #test(pass_id, best_map)
        total_train_time += pass_duration
        print("Pass:%d, Loss:%f, Handle Images Duration: %f\n" %
              (pass_id, np.mean(every_pass_loss), pass_duration))
        if pass_id == num_passes - 1:
            examples_per_sec = train_num / total_train_time
            train_cost_kpi.add_record(np.mean(every_pass_loss))
            train_speed_kpi.add_record(
                np.array(examples_per_sec, dtype='float'))
            four_card_speed_kpi.add_record(
                np.array(examples_per_sec, dtype='float'))
    if args.gpu_card_num == 1:
        train_cost_kpi.persist()
        train_speed_kpi.persist()
    else:
        four_card_speed_kpi.persist()
    print("Best test map {0}".format(best_map))
Beispiel #4
0
def train(batch_size, device, pass_num, iterations):
    print('iterations', iterations)
    class_dim = 10
    # NCHW
    dshape = [3, 32, 32]
    input = fluid.layers.data(name='data', shape=dshape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    predict = resnet_cifar10(input, class_dim)
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
    opts = optimizer.minimize(avg_cost)
    # accuracy = fluid.evaluator.Evaluator(input=predict, label=label)

    # inference program
    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        # test_target = accuracy.metrics + accuracy.states
        test_target = [predict, avg_cost]
        inference_program = fluid.io.get_inference_program(test_target)

    fluid.memory_optimize(fluid.default_main_program())

    train_reader = paddle.batch(
        paddle.dataset.cifar.train10(),
        batch_size=batch_size)

    test_reader = paddle.batch(
        paddle.dataset.cifar.test10(), batch_size=batch_size)

    def test(exe):
        # accuracy.reset(exe)
        for batch_id, data in enumerate(test_reader()):
            img_data = np.array(map(lambda x: x[0].reshape(dshape),
                                    data)).astype("float32")
            y_data = np.array(map(lambda x: x[1], data)).astype("int64")
            y_data = y_data.reshape([-1, 1])

            # print('image_data', img_data)
            # print('y_data', y_data)

            predict_, avg_cost_ = exe.run(
                inference_program,
                feed={
                    "data": img_data,
                    "label": y_data
                },
                fetch_list=[predict, avg_cost])
            return avg_cost

        # return accuracy.eval(exe)

    place = core.CPUPlace() if device == 'CPU' else core.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    for pass_id in range(1):
        logger.warning('Pass {}'.format(pass_id))
        # accuracy.reset(exe)
        iter = 0
        for batch_id, data in enumerate(train_reader()):
            logger.warning('Batch {}'.format(batch_id))
            batch_start = time.time()
            if iter == iterations:
                break
            image = np.array(map(lambda x: x[0].reshape(dshape),
                                 data)).astype('float32')
            label = np.array(map(lambda x: x[1], data)).astype('int64')
            label = label.reshape([-1, 1])
            avg_cost_ = exe.run(
                fluid.default_main_program(),
                feed={
                    'data': image,
                    'label': label
                },
                fetch_list=[avg_cost])
            batch_end = time.time()
            print('avg_cost', np.array(avg_cost_, dtype='float32'))
            train_cost_kpi.add_record(np.array(avg_cost_, dtype='float32'))
            train_duration_kpi.add_record(batch_end - batch_start)

            iter += 1