コード例 #1
0
    def net_profiler(self, state, profile_path='/tmp/profile'):
        enable_if_gpu = state == 'GPU' or state == "All"
        if enable_if_gpu and not core.is_compiled_with_cuda():
            return
        startup_program = fluid.Program()
        main_program = fluid.Program()

        with fluid.program_guard(main_program, startup_program):
            image = fluid.layers.data(name='x', shape=[784], dtype='float32')
            hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
            i = layers.zeros(shape=[1], dtype='int64')
            counter = fluid.layers.zeros(shape=[1],
                                         dtype='int64',
                                         force_cpu=True)
            until = layers.fill_constant([1], dtype='int64', value=10)
            data_arr = layers.array_write(hidden1, i)
            cond = fluid.layers.less_than(x=counter, y=until)
            while_op = fluid.layers.While(cond=cond)
            with while_op.block():
                hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
                layers.array_write(hidden_n, i, data_arr)
                fluid.layers.increment(x=counter, value=1, in_place=True)
                layers.less_than(x=counter, y=until, cond=cond)

            hidden_n = layers.array_read(data_arr, i)
            hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
            predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
            label = fluid.layers.data(name='y', shape=[1], dtype='int64')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(cost)
            batch_size = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(input=predict,
                                              label=label,
                                              total=batch_size)

        optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
        opts = optimizer.minimize(avg_cost, startup_program=startup_program)

        place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(startup_program)

        pass_acc_calculator = fluid.average.WeightedAverage()
        with profiler.profiler(state, 'total', profile_path) as prof:
            for iter in range(10):
                if iter == 2:
                    profiler.reset_profiler()
                x = np.random.random((32, 784)).astype("float32")
                y = np.random.randint(0, 10, (32, 1)).astype("int64")

                outs = exe.run(main_program,
                               feed={
                                   'x': x,
                                   'y': y
                               },
                               fetch_list=[avg_cost, batch_acc, batch_size])
                acc = np.array(outs[1])
                b_size = np.array(outs[2])
                pass_acc_calculator.add(value=acc, weight=b_size)
                pass_acc = pass_acc_calculator.eval()
コード例 #2
0
ファイル: test_profiler.py プロジェクト: absorbguo/Paddle
    def net_profiler(self, state, profile_path='/tmp/profile'):
        enable_if_gpu = state == 'GPU' or state == "All"
        if enable_if_gpu and not core.is_compiled_with_cuda():
            return
        startup_program = fluid.Program()
        main_program = fluid.Program()

        with fluid.program_guard(main_program, startup_program):
            image = fluid.layers.data(name='x', shape=[784], dtype='float32')
            hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
            i = layers.zeros(shape=[1], dtype='int64')
            counter = fluid.layers.zeros(
                shape=[1], dtype='int64', force_cpu=True)
            until = layers.fill_constant([1], dtype='int64', value=10)
            data_arr = layers.array_write(hidden1, i)
            cond = fluid.layers.less_than(x=counter, y=until)
            while_op = fluid.layers.While(cond=cond)
            with while_op.block():
                hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
                layers.array_write(hidden_n, i, data_arr)
                fluid.layers.increment(x=counter, value=1, in_place=True)
                layers.less_than(x=counter, y=until, cond=cond)

            hidden_n = layers.array_read(data_arr, i)
            hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
            predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
            label = fluid.layers.data(name='y', shape=[1], dtype='int64')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(cost)
            batch_size = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(
                input=predict, label=label, total=batch_size)

        optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
        opts = optimizer.minimize(avg_cost, startup_program=startup_program)

        place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(startup_program)

        pass_acc_calculator = fluid.average.WeightedAverage()
        with profiler.profiler(state, 'total', profile_path) as prof:
            for iter in range(10):
                if iter == 2:
                    profiler.reset_profiler()
                x = np.random.random((32, 784)).astype("float32")
                y = np.random.randint(0, 10, (32, 1)).astype("int64")

                outs = exe.run(main_program,
                               feed={'x': x,
                                     'y': y},
                               fetch_list=[avg_cost, batch_acc, batch_size])
                acc = np.array(outs[1])
                b_size = np.array(outs[2])
                pass_acc_calculator.add(value=acc, weight=b_size)
                pass_acc = pass_acc_calculator.eval()
コード例 #3
0
    def train_an_epoch_py_reader(epoch_id, batch_times):
        # get train epoch size
        log_interval = get_log_interval(len(train_data), batch_size)

        init_hidden, init_cell = get_init_data()
        first_data_feeds = {}
        first_data_feeds["init_hidden"] = init_hidden
        first_data_feeds["init_cell"] = init_cell

        total_loss = 0
        iters = 0

        py_reader.start()
        batch_id = 0
        try:
            while True:
                if batch_id == 0:
                    data_feeds = first_data_feeds
                    batch_time = 0
                    batch_start_time = time.time()
                else:
                    data_feeds = None
                    batch_time = time.time() - batch_start_time
                    batch_times.append(batch_time)
                    batch_start_time = time.time()

                fetch_outs = exe.run(train_program,
                                     feed=data_feeds,
                                     fetch_list=[loss.name, "learning_rate"],
                                     use_program_cache=True)

                cost_train = np.array(fetch_outs[0])
                lr = np.array(fetch_outs[1])

                total_loss += cost_train
                iters += num_steps
                if batch_id > 0 and (log_interval == 0
                                     or batch_id % log_interval == 0):
                    ppl = np.exp(total_loss / iters)
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                        % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

                if args.profile:
                    if batch_id == 1:
                        profiler.reset_profiler()
                    elif batch_id >= 11:
                        break
                batch_id += 1
        except fluid.core.EOFException:
            py_reader.reset()

        batch_times.append(time.time() - batch_start_time)
        ppl = np.exp(total_loss / iters)
        return ppl
コード例 #4
0
    def train_an_epoch(epoch_id, batch_times):
        # get train epoch size
        log_interval = get_log_interval(len(train_data), batch_size)
        train_data_iter = reader.get_data_iter(train_data, batch_size,
                                               num_steps)

        total_loss = 0
        iters = 0
        for batch_id, batch in enumerate(train_data_iter):
            if batch_id == 0:
                init_hidden, init_cell = get_init_data()
            else:
                init_hidden = None
                init_cell = None
            input_data_feed = prepare_input(batch,
                                            init_hidden=init_hidden,
                                            init_cell=init_cell,
                                            epoch_id=epoch_id,
                                            device_count=device_count)

            batch_start_time = time.time()
            fetch_outs = exe.run(train_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name, "learning_rate"],
                                 use_program_cache=True)
            batch_time = time.time() - batch_start_time
            batch_times.append(batch_time)

            cost_train = np.array(fetch_outs[0])
            lr = np.array(fetch_outs[1])

            total_loss += cost_train
            iters += num_steps
            if batch_id > 0 and batch_id % log_interval == 0:
                ppl = np.exp(total_loss / iters)
                print(
                    "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                    % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

            if args.profile:
                if batch_id == 1:
                    profiler.reset_profiler()
                elif batch_id >= 11:
                    break

        ppl = np.exp(total_loss / iters)
        return ppl
コード例 #5
0
ファイル: train.py プロジェクト: zhousanfu/paddle-demo
    def train_an_epoch(epoch_id, batch_times):
        # get train epoch size
        log_interval = get_log_interval(len(train_data))
        train_data_iter = reader.get_data_iter(
            train_data, config.batch_size * device_count, config.num_steps)

        total_loss = 0
        iters = 0

        init_hidden, init_cell = generate_init_data()
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden=init_hidden,
                                            init_cell=init_cell,
                                            epoch_id=epoch_id,
                                            with_lr=True,
                                            device_count=device_count)
            batch_start_time = time.time()
            fetch_outs = exe.run(train_program,
                                 feed=input_data_feed,
                                 fetch_list=[
                                     loss.name, "learning_rate",
                                     last_hidden.name, last_cell.name
                                 ],
                                 use_program_cache=True)
            batch_time = time.time() - batch_start_time
            batch_times.append(batch_time)

            cost_train = np.array(fetch_outs[0])
            lr = np.array(fetch_outs[1])
            init_hidden = np.array(fetch_outs[2])
            init_cell = np.array(fetch_outs[3])
            total_loss += cost_train
            iters += config.num_steps
            if batch_id > 0 and batch_id % log_interval == 0:
                ppl = np.exp(total_loss / iters)
                print(
                    "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                    % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

            # profiler tools for benchmark
            if args.profile and batch_id == log_interval:
                profiler.reset_profiler()
            elif args.profile and batch_id == (log_interval + 5):
                break
        ppl = np.exp(total_loss / iters)
        return ppl
コード例 #6
0
    def net_profiler(self,
                     exe,
                     state,
                     tracer_option,
                     batch_range=None,
                     use_parallel_executor=False,
                     use_new_api=False):
        main_program, startup_program, avg_cost, batch_size, batch_acc = self.build_program(
            compile_program=use_parallel_executor)
        exe.run(startup_program)

        profile_path = self.get_profile_path()
        if not use_new_api:
            with profiler.profiler(state, 'total', profile_path, tracer_option):
                pass_acc_calculator = fluid.average.WeightedAverage()
                for iter in range(10):
                    if iter == 2:
                        profiler.reset_profiler()
                    self.run_iter(exe, main_program,
                                  [avg_cost, batch_acc, batch_size],
                                  pass_acc_calculator)
        else:
            options = utils.ProfilerOptions(options={
                'state': state,
                'sorted_key': 'total',
                'tracer_level': tracer_option,
                'batch_range': [0, 10] if batch_range is None else batch_range,
                'profile_path': profile_path
            })
            with utils.Profiler(enabled=True, options=options) as prof:
                pass_acc_calculator = fluid.average.WeightedAverage()
                for iter in range(10):
                    self.run_iter(exe, main_program,
                                  [avg_cost, batch_acc, batch_size],
                                  pass_acc_calculator)
                    utils.get_profiler().record_step()
                    if batch_range is None and iter == 2:
                        utils.get_profiler().reset()

        self.check_profile_result(profile_path)
コード例 #7
0
def train(args):

    max_images_num = data_reader.max_images_num()
    shuffle = True
    if args.run_ce:
        np.random.seed(10)
        fluid.default_startup_program().random_seed = 90
        max_images_num = 1
        shuffle = False
    # data_shape = [-1] + data_reader.image_shape()
    data_shape = data_reader.image_shape()

    input_A = fluid.layers.data(name='input_A',
                                shape=data_shape,
                                append_batch_size=True,
                                dtype='float32')
    input_B = fluid.layers.data(name='input_B',
                                shape=data_shape,
                                append_batch_size=True,
                                dtype='float32')
    fake_pool_A = fluid.layers.data(name='fake_pool_A',
                                    shape=data_shape,
                                    append_batch_size=True,
                                    dtype='float32')
    fake_pool_B = fluid.layers.data(name='fake_pool_B',
                                    shape=data_shape,
                                    append_batch_size=True,
                                    dtype='float32')

    g_A_trainer = GATrainer(input_A, input_B)
    g_B_trainer = GBTrainer(input_A, input_B)
    d_A_trainer = DATrainer(input_A, fake_pool_A)
    d_B_trainer = DBTrainer(input_B, fake_pool_B)

    # prepare environment
    place = fluid.CPUPlace()
    if args.use_gpu:
        place = fluid.CUDAPlace(0)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
    A_pool = ImagePool()
    B_pool = ImagePool()

    A_reader = paddle.batch(data_reader.a_reader(shuffle=shuffle),
                            args.batch_size)()
    B_reader = paddle.batch(data_reader.b_reader(shuffle=shuffle),
                            args.batch_size)()
    if not args.run_ce:
        A_test_reader = data_reader.a_test_reader()
        B_test_reader = data_reader.b_test_reader()

    def test(epoch):
        out_path = args.output + "/test"
        if not os.path.exists(out_path):
            os.makedirs(out_path)
        i = 0
        for data_A, data_B in zip(A_test_reader(), B_test_reader()):
            A_name = data_A[1]
            B_name = data_B[1]
            tensor_A = fluid.LoDTensor()
            tensor_B = fluid.LoDTensor()
            tensor_A.set(data_A[0], place)
            tensor_B.set(data_B[0], place)
            fake_A_temp, fake_B_temp, cyc_A_temp, cyc_B_temp = exe.run(
                g_A_trainer.infer_program,
                fetch_list=[
                    g_A_trainer.fake_A, g_A_trainer.fake_B, g_A_trainer.cyc_A,
                    g_A_trainer.cyc_B
                ],
                feed={
                    "input_A": tensor_A,
                    "input_B": tensor_B
                })
            fake_A_temp = np.squeeze(fake_A_temp[0]).transpose([1, 2, 0])
            fake_B_temp = np.squeeze(fake_B_temp[0]).transpose([1, 2, 0])
            cyc_A_temp = np.squeeze(cyc_A_temp[0]).transpose([1, 2, 0])
            cyc_B_temp = np.squeeze(cyc_B_temp[0]).transpose([1, 2, 0])
            input_A_temp = np.squeeze(data_A[0]).transpose([1, 2, 0])
            input_B_temp = np.squeeze(data_B[0]).transpose([1, 2, 0])

            imsave(out_path + "/fakeB_" + str(epoch) + "_" + A_name,
                   ((fake_B_temp + 1) * 127.5).astype(np.uint8))
            imsave(out_path + "/fakeA_" + str(epoch) + "_" + B_name,
                   ((fake_A_temp + 1) * 127.5).astype(np.uint8))
            imsave(out_path + "/cycA_" + str(epoch) + "_" + A_name,
                   ((cyc_A_temp + 1) * 127.5).astype(np.uint8))
            imsave(out_path + "/cycB_" + str(epoch) + "_" + B_name,
                   ((cyc_B_temp + 1) * 127.5).astype(np.uint8))
            imsave(out_path + "/inputA_" + str(epoch) + "_" + A_name,
                   ((input_A_temp + 1) * 127.5).astype(np.uint8))
            imsave(out_path + "/inputB_" + str(epoch) + "_" + B_name,
                   ((input_B_temp + 1) * 127.5).astype(np.uint8))
            i += 1

    def checkpoints(epoch):
        out_path = args.output + "/checkpoints/" + str(epoch)
        if not os.path.exists(out_path):
            os.makedirs(out_path)
        fluid.io.save_persistables(exe,
                                   out_path + "/g_a",
                                   main_program=g_A_trainer.program)
        fluid.io.save_persistables(exe,
                                   out_path + "/g_b",
                                   main_program=g_B_trainer.program)
        fluid.io.save_persistables(exe,
                                   out_path + "/d_a",
                                   main_program=d_A_trainer.program)
        fluid.io.save_persistables(exe,
                                   out_path + "/d_b",
                                   main_program=d_B_trainer.program)
        print("saved checkpoint to {}".format(out_path))
        sys.stdout.flush()

    def init_model():
        assert os.path.exists(
            args.init_model), "[%s] cann't be found." % args.init_mode
        fluid.io.load_persistables(exe,
                                   args.init_model + "/g_a",
                                   main_program=g_A_trainer.program)
        fluid.io.load_persistables(exe,
                                   args.init_model + "/g_b",
                                   main_program=g_B_trainer.program)
        fluid.io.load_persistables(exe,
                                   args.init_model + "/d_a",
                                   main_program=d_A_trainer.program)
        fluid.io.load_persistables(exe,
                                   args.init_model + "/d_b",
                                   main_program=d_B_trainer.program)
        print("Load model from {}".format(args.init_model))

    if args.init_model:
        init_model()
    losses = [[], []]
    t_time = 0
    build_strategy = fluid.BuildStrategy()

    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_threads = 1
    exec_strategy.use_experimental_executor = True

    g_A_trainer_program = fluid.CompiledProgram(
        g_A_trainer.program).with_data_parallel(
            loss_name=g_A_trainer.g_loss_A.name,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)
    g_B_trainer_program = fluid.CompiledProgram(
        g_B_trainer.program).with_data_parallel(
            loss_name=g_B_trainer.g_loss_B.name,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)
    d_B_trainer_program = fluid.CompiledProgram(
        d_B_trainer.program).with_data_parallel(
            loss_name=d_B_trainer.d_loss_B.name,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)
    d_A_trainer_program = fluid.CompiledProgram(
        d_A_trainer.program).with_data_parallel(
            loss_name=d_A_trainer.d_loss_A.name,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)

    total_batch_num = 0  # this is for benchmark

    for epoch in range(args.epoch):
        batch_id = 0
        for i in range(max_images_num):
            if args.max_iter and total_batch_num == args.max_iter:  # this for benchmark
                return
            data_A = next(A_reader)
            data_B = next(B_reader)
            tensor_A = fluid.LoDTensor()
            tensor_B = fluid.LoDTensor()
            tensor_A.set(data_A, place)
            tensor_B.set(data_B, place)
            s_time = time.time()
            # optimize the g_A network
            g_A_loss, fake_B_tmp = exe.run(
                g_A_trainer_program,
                fetch_list=[g_A_trainer.g_loss_A, g_A_trainer.fake_B],
                feed={
                    "input_A": tensor_A,
                    "input_B": tensor_B
                })

            fake_pool_B = B_pool.pool_image(fake_B_tmp)

            # optimize the d_B network
            d_B_loss = exe.run(d_B_trainer_program,
                               fetch_list=[d_B_trainer.d_loss_B],
                               feed={
                                   "input_B": tensor_B,
                                   "fake_pool_B": fake_pool_B
                               })[0]

            # optimize the g_B network
            g_B_loss, fake_A_tmp = exe.run(
                g_B_trainer_program,
                fetch_list=[g_B_trainer.g_loss_B, g_B_trainer.fake_A],
                feed={
                    "input_A": tensor_A,
                    "input_B": tensor_B
                })

            fake_pool_A = A_pool.pool_image(fake_A_tmp)

            # optimize the d_A network
            d_A_loss = exe.run(d_A_trainer_program,
                               fetch_list=[d_A_trainer.d_loss_A],
                               feed={
                                   "input_A": tensor_A,
                                   "fake_pool_A": fake_pool_A
                               })[0]
            batch_time = time.time() - s_time
            t_time += batch_time
            print(
                "epoch{}; batch{}; g_A_loss: {}; d_B_loss: {}; g_B_loss: {}; d_A_loss: {}; "
                "Batch_time_cost: {}".format(epoch, batch_id, g_A_loss[0],
                                             d_B_loss[0], g_B_loss[0],
                                             d_A_loss[0], batch_time))
            losses[0].append(g_A_loss[0])
            losses[1].append(d_A_loss[0])
            sys.stdout.flush()
            batch_id += 1
            total_batch_num = total_batch_num + 1  # this is for benchmark
            # profiler tools for benchmark
            if args.profile and epoch == 0 and batch_id == 10:
                profiler.reset_profiler()
            elif args.profile and epoch == 0 and batch_id == 15:
                return

        if args.run_test and not args.run_ce:
            test(epoch)
        if args.save_checkpoints and not args.run_ce:
            checkpoints(epoch)
    if args.run_ce:
        print("kpis,g_train_cost,{}".format(np.mean(losses[0])))
        print("kpis,d_train_cost,{}".format(np.mean(losses[1])))
        print("kpis,duration,{}".format(t_time / args.epoch))
コード例 #8
0
ファイル: Pix2pix.py プロジェクト: zhousanfu/paddle-demo
    def build_model(self):
        data_shape = [None, 3, self.cfg.crop_size, self.cfg.crop_size]

        input_A = fluid.data(name='input_A', shape=data_shape, dtype='float32')
        input_B = fluid.data(name='input_B', shape=data_shape, dtype='float32')
        input_fake = fluid.data(
            name='input_fake', shape=data_shape, dtype='float32')
        # used for continuous evaluation        
        if self.cfg.enable_ce:
            fluid.default_startup_program().random_seed = 90

        loader = fluid.io.DataLoader.from_generator(
            feed_list=[input_A, input_B],
            capacity=4,
            iterable=True,
            use_double_buffer=True)

        gen_trainer = GTrainer(input_A, input_B, self.cfg, self.batch_num)
        dis_trainer = DTrainer(input_A, input_B, input_fake, self.cfg,
                               self.batch_num)

        # prepare environment
        place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace()
        loader.set_batch_generator(
            self.train_reader,
            places=fluid.cuda_places()
            if self.cfg.use_gpu else fluid.cpu_places())
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        if self.cfg.init_model:
            utility.init_checkpoints(self.cfg, gen_trainer, "net_G")
            utility.init_checkpoints(self.cfg, dis_trainer, "net_D")

        ### memory optim
        build_strategy = fluid.BuildStrategy()

        gen_trainer_program = fluid.CompiledProgram(
            gen_trainer.program).with_data_parallel(
                loss_name=gen_trainer.g_loss.name,
                build_strategy=build_strategy)
        dis_trainer_program = fluid.CompiledProgram(
            dis_trainer.program).with_data_parallel(
                loss_name=dis_trainer.d_loss.name,
                build_strategy=build_strategy)

        t_time = 0

        total_train_batch = 0  # used for benchmark

        for epoch_id in range(self.cfg.epoch):
            batch_id = 0
            for tensor in loader():
                if self.cfg.max_iter and total_train_batch == self.cfg.max_iter:  # used for benchmark
                    return
                s_time = time.time()

                # optimize the generator network
                g_loss_gan, g_loss_l1, fake_B_tmp = exe.run(
                    gen_trainer_program,
                    fetch_list=[
                        gen_trainer.g_loss_gan, gen_trainer.g_loss_L1,
                        gen_trainer.fake_B
                    ],
                    feed=tensor)

                devices_num = utility.get_device_num(self.cfg)
                fake_per_device = int(len(fake_B_tmp) / devices_num)
                for dev in range(devices_num):
                    tensor[dev]['input_fake'] = fake_B_tmp[dev * fake_per_device : (dev+1) * fake_per_device]

                # optimize the discriminator network
                d_loss_real, d_loss_fake = exe.run(dis_trainer_program,
                                                   fetch_list=[
                                                       dis_trainer.d_loss_real,
                                                       dis_trainer.d_loss_fake
                                                   ],
                                                   feed=tensor)

                batch_time = time.time() - s_time
                t_time += batch_time
                if batch_id % self.cfg.print_freq == 0:
                    print("epoch{}: batch{}: \n\
                         g_loss_gan: {}; g_loss_l1: {}; \n\
                         d_loss_real: {}; d_loss_fake: {}; \n\
                         Batch_time_cost: {}"
                          .format(epoch_id, batch_id, g_loss_gan[0], g_loss_l1[
                              0], d_loss_real[0], d_loss_fake[0], batch_time))

                sys.stdout.flush()
                batch_id += 1
                total_train_batch += 1  # used for benchmark
                # profiler tools
                if self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq:
                    profiler.reset_profiler()
                elif self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq + 5:
                    return

            if self.cfg.run_test:
                image_name = fluid.data(
                    name='image_name',
                    shape=[None, self.cfg.batch_size],
                    dtype="int32")
                test_loader = fluid.io.DataLoader.from_generator(
                    feed_list=[input_A, input_B, image_name],
                    capacity=4,
                    iterable=True,
                    use_double_buffer=True)
                test_loader.set_batch_generator(
                    self.test_reader,
                    places=fluid.cuda_places()
                    if self.cfg.use_gpu else fluid.cpu_places())
                test_program = gen_trainer.infer_program
                utility.save_test_image(
                    epoch_id,
                    self.cfg,
                    exe,
                    place,
                    test_program,
                    gen_trainer,
                    test_loader,
                    A_id2name=self.id2name)

            if self.cfg.save_checkpoints:
                utility.checkpoints(epoch_id, self.cfg, gen_trainer,
                                    "net_G")
                utility.checkpoints(epoch_id, self.cfg, dis_trainer,
                                    "net_D")
        if self.cfg.enable_ce:
            device_num = fluid.core.get_cuda_device_count(
            ) if self.cfg.use_gpu else 1
            print("kpis\tpix2pix_g_loss_gan_card{}\t{}".format(device_num,
                                                               g_loss_gan[0]))
            print("kpis\tpix2pix_g_loss_l1_card{}\t{}".format(device_num,
                                                              g_loss_l1[0]))
            print("kpis\tpix2pix_d_loss_real_card{}\t{}".format(device_num,
                                                                d_loss_real[0]))
            print("kpis\tpix2pix_d_loss_fake_card{}\t{}".format(device_num,
                                                                d_loss_fake[0]))
            print("kpis\tpix2pix_Batch_time_cost_card{}\t{}".format(device_num,
                                                                    batch_time))
コード例 #9
0
ファイル: infer.py プロジェクト: sunqiang25/models-develop
def inference(args):
    """OCR inference"""
    if args.model == "crnn_ctc":
        infer = ctc_infer
        get_feeder_data = get_ctc_feeder_for_infer
    else:
        infer = attention_infer
        get_feeder_data = get_attention_feeder_for_infer
    eos = 1
    sos = 0
    num_classes = data_reader.num_classes()
    data_shape = data_reader.data_shape()
    # define network
    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False)
    # data reader
    infer_reader = data_reader.inference(
        batch_size=args.batch_size,
        infer_images_dir=args.input_images_dir,
        infer_list_file=args.input_images_list,
        cycle=True if args.iterations > 0 else False,
        model=args.model)
    # prepare environment
    place = fluid.CPUPlace()
    if args.use_gpu:
        place = fluid.CUDAPlace(0)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load dictionary
    dict_map = None
    if args.dict is not None and os.path.isfile(args.dict):
        dict_map = {}
        with open(args.dict) as dict_file:
            for i, word in enumerate(dict_file):
                dict_map[i] = word.strip()
        print("Loaded dict from %s" % args.dict)

    # load init model
    model_dir = args.model_path
    model_file_name = None
    if not os.path.isdir(args.model_path):
        model_dir = os.path.dirname(args.model_path)
        model_file_name = os.path.basename(args.model_path)
    fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
    print("Init model from: %s." % args.model_path)

    batch_times = []
    iters = 0
    for data in infer_reader():
        feed_dict = get_feeder_data(data, place)
        if args.iterations > 0 and iters == args.iterations + args.skip_batch_num:
            break
        if iters < args.skip_batch_num:
            print("Warm-up itaration")
        if iters == args.skip_batch_num:
            profiler.reset_profiler()

        start = time.time()
        result = exe.run(fluid.default_main_program(),
                         feed=feed_dict,
                         fetch_list=[ids],
                         return_numpy=False)
        indexes = prune(np.array(result[0]).flatten(), 0, 1)
        batch_time = time.time() - start
        fps = args.batch_size / batch_time
        batch_times.append(batch_time)
        if dict_map is not None:
            print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
                iters,
                batch_time,
                fps,
                [dict_map[index] for index in indexes], ))
        else:
            print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
                iters,
                batch_time,
                fps,
                indexes, ))

        iters += 1

    latencies = batch_times[args.skip_batch_num:]
    latency_avg = np.average(latencies)
    latency_pc99 = np.percentile(latencies, 99)
    fpses = np.divide(args.batch_size, latencies)
    fps_avg = np.average(fpses)
    fps_pc99 = np.percentile(fpses, 1)

    # Benchmark output
    print('\nTotal examples (incl. warm-up): %d' % (iters * args.batch_size))
    print('average latency: %.5f s, 99pc latency: %.5f s' % (latency_avg,
                                                             latency_pc99))
    print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99))
コード例 #10
0
ファイル: train.py プロジェクト: zhousanfu/paddle-demo
    def train_an_epoch_dataloader(epoch_id, batch_times):
        # get train epoch size
        log_interval = get_log_interval(len(train_data))

        init_hidden, init_cell = generate_init_data()

        total_loss = 0
        iters = 0

        dataloader.start()
        batch_id = 0
        try:
            while True:
                data_feeds = {}
                if batch_id == 0:
                    batch_time = 0
                    batch_start_time = time.time()
                else:
                    batch_time = time.time() - batch_start_time
                    batch_times.append(batch_time)
                    batch_start_time = time.time()

                new_lr = generate_new_lr(epoch_id, device_count)
                data_feeds['learning_rate'] = new_lr
                data_feeds["init_hidden"] = init_hidden
                data_feeds["init_cell"] = init_cell

                fetch_outs = exe.run(train_program,
                                     feed=data_feeds,
                                     fetch_list=[
                                         loss.name, "learning_rate",
                                         last_hidden.name, last_cell.name
                                     ],
                                     use_program_cache=True)

                cost_train = np.array(fetch_outs[0])
                lr = np.array(fetch_outs[1])
                init_hidden = np.array(fetch_outs[2])
                init_cell = np.array(fetch_outs[3])

                total_loss += cost_train
                iters += config.num_steps
                if batch_id > 0 and (log_interval == 0
                                     or batch_id % log_interval == 0):
                    ppl = np.exp(total_loss / iters)
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                        % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

                batch_id += 1
                # profiler tools for benchmark
                if args.profile and batch_id == log_interval:
                    profiler.reset_profiler()
                elif args.profile and batch_id == (log_interval + 5):
                    break
        except fluid.core.EOFException:
            dataloader.reset()

        batch_times.append(time.time() - batch_start_time)
        ppl = np.exp(total_loss / iters)
        return ppl
コード例 #11
0
    def net_profiler(self, state, use_parallel_executor=False):
        profile_path = os.path.join(tempfile.gettempdir(), "profile")
        open(profile_path, "w").write("")
        startup_program = fluid.Program()
        main_program = fluid.Program()

        with fluid.program_guard(main_program, startup_program):
            image = fluid.layers.data(name='x', shape=[784], dtype='float32')
            hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
            i = layers.zeros(shape=[1], dtype='int64')
            counter = fluid.layers.zeros(
                shape=[1], dtype='int64', force_cpu=True)
            until = layers.fill_constant([1], dtype='int64', value=10)
            data_arr = layers.array_write(hidden1, i)
            cond = fluid.layers.less_than(x=counter, y=until)
            while_op = fluid.layers.While(cond=cond)
            with while_op.block():
                hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
                layers.array_write(hidden_n, i, data_arr)
                fluid.layers.increment(x=counter, value=1, in_place=True)
                layers.less_than(x=counter, y=until, cond=cond)

            hidden_n = layers.array_read(data_arr, i)
            hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
            predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
            label = fluid.layers.data(name='y', shape=[1], dtype='int64')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(cost)
            batch_size = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(
                input=predict, label=label, total=batch_size)

        optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
        opts = optimizer.minimize(avg_cost, startup_program=startup_program)

        place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(startup_program)
        if use_parallel_executor:
            pe = fluid.ParallelExecutor(
                state != 'CPU',
                loss_name=avg_cost.name,
                main_program=main_program)

        pass_acc_calculator = fluid.average.WeightedAverage()
        with profiler.profiler(state, 'total', profile_path) as prof:
            for iter in range(10):
                if iter == 2:
                    profiler.reset_profiler()
                x = np.random.random((32, 784)).astype("float32")
                y = np.random.randint(0, 10, (32, 1)).astype("int64")

                if use_parallel_executor:
                    pe.run(feed={'x': x, 'y': y}, fetch_list=[avg_cost.name])
                    continue
                outs = exe.run(main_program,
                               feed={'x': x,
                                     'y': y},
                               fetch_list=[avg_cost, batch_acc, batch_size])
                acc = np.array(outs[1])
                b_size = np.array(outs[2])
                pass_acc_calculator.add(value=acc, weight=b_size)
                pass_acc = pass_acc_calculator.eval()
        data = open(profile_path, 'rb').read()
        self.assertGreater(len(data), 0)
        profile_pb = profiler_pb2.Profile()
        profile_pb.ParseFromString(data)
        self.assertGreater(len(profile_pb.events), 0)
        for event in profile_pb.events:
            if event.type == profiler_pb2.Event.GPUKernel:
                if not event.detail_info and not event.name.startswith("MEM"):
                    raise Exception(
                        "Kernel %s missing event. Has this kernel been recorded by RecordEvent?"
                        % event.name)
            elif event.type == profiler_pb2.Event.CPU and (
                    event.name.startswith("Driver API") or
                    event.name.startswith("Runtime API")):
                print("Warning: unregister", event.name)
コード例 #12
0
ファイル: train.py プロジェクト: sneaxiy/benchmark
    def train_an_epoch(epoch_id, batch_times):
        # get train epoch size
        num_batchs = len(train_data) // batch_size
        epoch_size = (num_batchs - 1) // num_steps
        if args.profile:
            log_interval = 1
        else:
            log_interval = max(1, epoch_size // 10)

        data_iter_size = batch_size
        if device_count > 1 and args.parallel:
            data_iter_size = batch_size * device_count
        train_data_iter = reader.get_data_iter(train_data, data_iter_size,
                                               num_steps)

        total_loss = 0
        iters = 0
        if device_count > 1 and args.parallel:
            init_hidden = np.zeros(
                (num_layers * device_count, batch_size, hidden_size),
                dtype='float32')
            init_cell = np.zeros(
                (num_layers * device_count, batch_size, hidden_size),
                dtype='float32')
        else:
            init_hidden = np.zeros((num_layers, batch_size, hidden_size),
                                   dtype='float32')
            init_cell = np.zeros((num_layers, batch_size, hidden_size),
                                 dtype='float32')
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden,
                                            init_cell,
                                            epoch_id=epoch_id,
                                            device_count=device_count)

            batch_start_time = time.time()
            fetch_outs = exe.run(train_program,
                                 feed=input_data_feed,
                                 fetch_list=[
                                     loss.name, last_hidden.name,
                                     last_cell.name, "learning_rate"
                                 ],
                                 use_program_cache=True)
            batch_time = time.time() - batch_start_time
            batch_times.append(batch_time)

            cost_train = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            lr = np.array(fetch_outs[3])

            total_loss += cost_train
            iters += num_steps
            if batch_id > 0 and batch_id % log_interval == 0:
                ppl = np.exp(total_loss / iters)
                print(
                    "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                    % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

            if args.profile:
                if batch_id == 1:
                    profiler.reset_profiler()
                elif batch_id >= 11:
                    break

        ppl = np.exp(total_loss / iters)
        return ppl
コード例 #13
0
ファイル: STGAN.py プロジェクト: zhousanfu/paddle-demo
    def build_model(self):
        data_shape = [None, 3, self.cfg.image_size, self.cfg.image_size]

        image_real = fluid.data(name='image_real',
                                shape=data_shape,
                                dtype='float32')
        label_org = fluid.data(name='label_org',
                               shape=[None, self.cfg.c_dim],
                               dtype='float32')
        label_trg = fluid.data(name='label_trg',
                               shape=[None, self.cfg.c_dim],
                               dtype='float32')
        label_org_ = fluid.data(name='label_org_',
                                shape=[None, self.cfg.c_dim],
                                dtype='float32')
        label_trg_ = fluid.data(name='label_trg_',
                                shape=[None, self.cfg.c_dim],
                                dtype='float32')
        # used for continuous evaluation
        if self.cfg.enable_ce:
            fluid.default_startup_program().random_seed = 90

        test_gen_trainer = GTrainer(image_real, label_org, label_org_,
                                    label_trg, label_trg_, self.cfg,
                                    self.batch_num)

        loader = fluid.io.DataLoader.from_generator(
            feed_list=[image_real, label_org, label_trg],
            capacity=64,
            iterable=True,
            use_double_buffer=True)
        label_org_ = (label_org * 2.0 - 1.0) * self.cfg.thres_int
        label_trg_ = (label_trg * 2.0 - 1.0) * self.cfg.thres_int

        gen_trainer = GTrainer(image_real, label_org, label_org_, label_trg,
                               label_trg_, self.cfg, self.batch_num)
        dis_trainer = DTrainer(image_real, label_org, label_org_, label_trg,
                               label_trg_, self.cfg, self.batch_num)

        # prepare environment
        place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace()
        loader.set_batch_generator(self.train_reader,
                                   places=fluid.cuda_places()
                                   if self.cfg.use_gpu else fluid.cpu_places())

        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        if self.cfg.init_model:
            utility.init_checkpoints(self.cfg, gen_trainer, "net_G")
            utility.init_checkpoints(self.cfg, dis_trainer, "net_D")

        ### memory optim
        build_strategy = fluid.BuildStrategy()

        gen_trainer_program = fluid.CompiledProgram(
            gen_trainer.program).with_data_parallel(
                loss_name=gen_trainer.g_loss.name,
                build_strategy=build_strategy)
        dis_trainer_program = fluid.CompiledProgram(
            dis_trainer.program).with_data_parallel(
                loss_name=dis_trainer.d_loss.name,
                build_strategy=build_strategy)
        # used for continuous evaluation
        if self.cfg.enable_ce:
            gen_trainer_program.random_seed = 90
            dis_trainer_program.random_seed = 90

        t_time = 0

        total_train_batch = 0  # used for benchmark

        for epoch_id in range(self.cfg.epoch):
            batch_id = 0
            for data in loader():
                if self.cfg.max_iter and total_train_batch == self.cfg.max_iter:  # used for benchmark
                    return
                s_time = time.time()
                # optimize the discriminator network
                fetches = [
                    dis_trainer.d_loss.name,
                    dis_trainer.d_loss_real.name,
                    dis_trainer.d_loss_fake.name,
                    dis_trainer.d_loss_cls.name,
                    dis_trainer.d_loss_gp.name,
                ]
                d_loss, d_loss_real, d_loss_fake, d_loss_cls, d_loss_gp, = exe.run(
                    dis_trainer_program, fetch_list=fetches, feed=data)
                if (batch_id + 1) % self.cfg.num_discriminator_time == 0:
                    # optimize the generator network
                    d_fetches = [
                        gen_trainer.g_loss_fake.name,
                        gen_trainer.g_loss_rec.name,
                        gen_trainer.g_loss_cls.name
                    ]
                    g_loss_fake, g_loss_rec, g_loss_cls = exe.run(
                        gen_trainer_program, fetch_list=d_fetches, feed=data)
                    print("epoch{}: batch{}: \n\
                         g_loss_fake: {}; g_loss_rec: {}; g_loss_cls: {}".
                          format(epoch_id, batch_id, g_loss_fake[0],
                                 g_loss_rec[0], g_loss_cls[0]))
                batch_time = time.time() - s_time
                t_time += batch_time
                if (batch_id + 1) % self.cfg.print_freq == 0:
                    print("epoch{}: batch{}:  \n\
                         d_loss: {}; d_loss_real: {}; d_loss_fake: {}; d_loss_cls: {}; d_loss_gp: {} \n\
                         Batch_time_cost: {}".format(epoch_id, batch_id,
                                                     d_loss[0], d_loss_real[0],
                                                     d_loss_fake[0],
                                                     d_loss_cls[0],
                                                     d_loss_gp[0], batch_time))
                sys.stdout.flush()
                batch_id += 1
                if self.cfg.enable_ce and batch_id == 100:
                    break

                total_train_batch += 1  # used for benchmark
                # profiler tools
                if self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq:
                    profiler.reset_profiler()
                elif self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq + 5:
                    return

            if self.cfg.run_test:
                image_name = fluid.data(name='image_name',
                                        shape=[None, self.cfg.n_samples],
                                        dtype='int32')
                test_loader = fluid.io.DataLoader.from_generator(
                    feed_list=[image_real, label_org, label_trg, image_name],
                    capacity=32,
                    iterable=True,
                    use_double_buffer=True)
                test_loader.set_batch_generator(
                    self.test_reader,
                    places=fluid.cuda_places()
                    if self.cfg.use_gpu else fluid.cpu_places())
                test_program = test_gen_trainer.infer_program
                utility.save_test_image(epoch_id, self.cfg, exe, place,
                                        test_program, test_gen_trainer,
                                        test_loader)

            if self.cfg.save_checkpoints:
                utility.checkpoints(epoch_id, self.cfg, gen_trainer, "net_G")
                utility.checkpoints(epoch_id, self.cfg, dis_trainer, "net_D")
            # used for continuous evaluation
            if self.cfg.enable_ce:
                device_num = fluid.core.get_cuda_device_count(
                ) if self.cfg.use_gpu else 1
                print("kpis\tstgan_g_loss_fake_card{}\t{}".format(
                    device_num, g_loss_fake[0]))
                print("kpis\tstgan_g_loss_rec_card{}\t{}".format(
                    device_num, g_loss_rec[0]))
                print("kpis\tstgan_g_loss_cls_card{}\t{}".format(
                    device_num, g_loss_cls[0]))
                print("kpis\tstgan_d_loss_card{}\t{}".format(
                    device_num, d_loss[0]))
                print("kpis\tstgan_d_loss_real_card{}\t{}".format(
                    device_num, d_loss_real[0]))
                print("kpis\tstgan_d_loss_fake_card{}\t{}".format(
                    device_num, d_loss_fake[0]))
                print("kpis\tstgan_d_loss_cls_card{}\t{}".format(
                    device_num, d_loss_cls[0]))
                print("kpis\tstgan_d_loss_gp_card{}\t{}".format(
                    device_num, d_loss_gp[0]))
                print("kpis\tstgan_Batch_time_cost_card{}\t{}".format(
                    device_num, batch_time))
コード例 #14
0
ファイル: profile.py プロジェクト: zhong110020/models
def profile(args):
    """profile the training process.
    """

    if not args.first_batches_to_skip < args.max_batch_num:
        raise ValueError("arg 'first_batches_to_skip' must be smaller than "
                         "'max_batch_num'.")
    if not args.first_batches_to_skip >= 0:
        raise ValueError(
            "arg 'first_batches_to_skip' must not be smaller than 0.")

    _, avg_cost, accuracy = stacked_lstmp_model(frame_dim=args.frame_dim,
                                                hidden_dim=args.hidden_dim,
                                                proj_dim=args.proj_dim,
                                                stacked_num=args.stacked_num,
                                                class_num=args.class_num,
                                                parallel=args.parallel)

    optimizer = fluid.optimizer.Adam(
        learning_rate=fluid.layers.exponential_decay(
            learning_rate=args.learning_rate,
            decay_steps=1879,
            decay_rate=1 / 1.2,
            staircase=True))
    optimizer.minimize(avg_cost)

    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    ltrans = [
        trans_add_delta.TransAddDelta(2, 2),
        trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
        trans_splice.TransSplice(5, 5),
        trans_delay.TransDelay(5)
    ]

    data_reader = reader.AsyncDataReader(args.feature_lst,
                                         args.label_lst,
                                         -1,
                                         split_sentence_threshold=1024)
    data_reader.set_transformers(ltrans)

    feature_t = fluid.LoDTensor()
    label_t = fluid.LoDTensor()

    sorted_key = None if args.sorted_key is 'None' else args.sorted_key
    with profiler.profiler(args.device, sorted_key) as prof:
        frames_seen, start_time = 0, 0.0
        for batch_id, batch_data in enumerate(
                data_reader.batch_iterator(args.batch_size,
                                           args.minimum_batch_size)):
            if batch_id >= args.max_batch_num:
                break
            if args.first_batches_to_skip == batch_id:
                profiler.reset_profiler()
                start_time = time.time()
                frames_seen = 0
            # load_data
            (features, labels, lod, _) = batch_data
            features = np.reshape(features, (-1, 11, 3, args.frame_dim))
            features = np.transpose(features, (0, 2, 1, 3))
            feature_t.set(features, place)
            feature_t.set_lod([lod])
            label_t.set(labels, place)
            label_t.set_lod([lod])

            frames_seen += lod[-1]

            outs = exe.run(fluid.default_main_program(),
                           feed={
                               "feature": feature_t,
                               "label": label_t
                           },
                           fetch_list=[avg_cost, accuracy]
                           if args.print_train_acc else [],
                           return_numpy=False)

            if args.print_train_acc:
                print("Batch %d acc: %f" %
                      (batch_id, lodtensor_to_ndarray(outs[1])[0]))
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        time_consumed = time.time() - start_time
        frames_per_sec = frames_seen / time_consumed
        print("\nTime consumed: %f s, performance: %f frames/s." %
              (time_consumed, frames_per_sec))
コード例 #15
0
def train_parallel(train_args, test_args, args, train_prog, test_prog,
                   startup_prog, nccl_id_var, num_trainers, trainer_id):
    over_all_start = time.time()
    place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
    feeder = None
    if not args.use_reader_op:
        feed_var_list = [
            var for var in train_prog.global_block().vars.itervalues()
            if var.is_data
        ]
        feeder = fluid.DataFeeder(feed_var_list, place)
    # generate fake:
    if args.use_fake_data:
        for var in feed_var_list:
            v = startup_prog.global_block()._clone_variable(var)
            var.persistable = True
            v.persistable = True

            real_shape = list(var.shape)
            real_shape[0] = args.batch_size / args.gpus
            startup_prog.global_block().append_op(outputs={"Out": v},
                                                  type="fill_constant",
                                                  attrs={
                                                      "shape": real_shape,
                                                      "value": 1.0,
                                                      "dtype": var.dtype
                                                  })

    if nccl_id_var and trainer_id == 0:
        #FIXME(wuyi): wait other trainer to start listening
        time.sleep(30)

    startup_exe = fluid.Executor(place)
    startup_exe.run(startup_prog)
    strategy = fluid.ExecutionStrategy()
    strategy.num_threads = args.cpus
    strategy.allow_op_delay = False
    build_strategy = fluid.BuildStrategy()
    if args.reduce_strategy == "reduce":
        build_strategy.reduce_strategy = fluid.BuildStrategy(
        ).ReduceStrategy.Reduce
    else:
        build_strategy.reduce_strategy = fluid.BuildStrategy(
        ).ReduceStrategy.AllReduce
    build_strategy.fuse_broadcast_op = args.fuse_broadcast_op

    avg_loss = train_args[0]

    if args.update_method == "pserver":
        # parameter server mode distributed training, merge
        # gradients on local server, do not initialize
        # ParallelExecutor with multi server all-reduce mode.
        num_trainers = 1
        trainer_id = 0

    exe = fluid.ParallelExecutor(True,
                                 avg_loss.name,
                                 main_program=train_prog,
                                 exec_strategy=strategy,
                                 build_strategy=build_strategy,
                                 num_trainers=num_trainers,
                                 trainer_id=trainer_id)

    if not args.no_test:
        if args.update_method == "pserver":
            test_scope = None
        else:
            # NOTE: use an empty scope to avoid test exe using NCCLID
            test_scope = fluid.Scope()
        test_exe = fluid.ParallelExecutor(True,
                                          main_program=test_prog,
                                          share_vars_from=exe)

    for pass_id in range(args.pass_num):
        num_samples = 0
        iters = 0
        start_time = time.time()
        if not args.use_reader_op:
            reader_generator = train_args[3]()  #train_reader
        batch_id = 0
        data = None
        if args.use_reader_op:
            train_args[4].start()
        while True:
            if not args.use_reader_op:
                data = next(reader_generator, None)
                if data == None:
                    break
            if args.profile and batch_id == 5:
                profiler.start_profiler("All")
                profiler.reset_profiler()
            elif args.profile and batch_id == 10:
                print("profiling total time: ", time.time() - start_time)
                profiler.stop_profiler(
                    "total", "/tmp/profile_%d_pass%d" % (trainer_id, pass_id))
            if iters == args.iterations:
                reader_generator.close()
                break

            if iters == args.skip_batch_num:
                start_time = time.time()
                num_samples = 0
            fetch_list = [avg_loss.name]
            acc_name_list = [v.name for v in train_args[2]]
            fetch_list.extend(acc_name_list)

            if args.use_fake_data or args.use_reader_op:
                try:
                    fetch_ret = exe.run(fetch_list)
                except fluid.core.EOFException as eof:
                    break
                except fluid.core.EnforceNotMet as ex:
                    traceback.print_exc()
                    break
            else:
                fetch_ret = exe.run(fetch_list, feed=feeder.feed(data))
            if args.use_reader_op:
                num_samples += args.batch_size * args.gpus
            else:
                num_samples += len(data)

            iters += 1
            if batch_id % 1 == 0:
                fetched_data = [np.mean(np.array(d)) for d in fetch_ret]
                print("Pass %d, batch %d, loss %s, accucacys: %s" %
                      (pass_id, batch_id, fetched_data[0], fetched_data[1:]))
            batch_id += 1

        print_train_time(start_time, time.time(), num_samples)
        if args.use_reader_op:
            train_args[4].reset()  # reset reader handle
        else:
            del reader_generator

        if not args.no_test and test_args[2]:
            test_feeder = None
            if not args.use_reader_op:
                test_feed_var_list = [
                    var for var in test_prog.global_block().vars.itervalues()
                    if var.is_data
                ]
                test_feeder = fluid.DataFeeder(test_feed_var_list, place)
            test_ret = test_parallel(test_exe, test_args, args, test_prog,
                                     test_feeder)
            print("Pass: %d, Test Accuracy: %s\n" %
                  (pass_id, [np.mean(np.array(v)) for v in test_ret]))

    print("total train time: ", time.time() - over_all_start)
コード例 #16
0
    def train():
        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data,
                                                       batch_size,
                                                       enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)

            total_loss = 0
            word_count = 0.0
            batch_times = []
            time_interval = 0.0
            batch_start_time = time.time()
            epoch_word_count = 0.0
            total_reader_cost = 0.0
            batch_read_start = time.time()
            for batch_id, batch in enumerate(train_data_iter):
                input_data_feed, word_num = prepare_input(batch,
                                                          epoch_id=epoch_id)
                word_count += word_num
                total_reader_cost += time.time() - batch_read_start
                fetch_outs = exe.run(program=CompiledProgram,
                                     feed=input_data_feed,
                                     fetch_list=[loss.name],
                                     use_program_cache=True)

                cost_train = np.mean(fetch_outs[0])
                # print(cost_train)
                total_loss += cost_train * batch_size
                batch_end_time = time.time()
                batch_time = batch_end_time - batch_start_time
                batch_times.append(batch_time)
                time_interval += batch_time
                epoch_word_count += word_num

                if batch_id > 0 and batch_id % 100 == 0:
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f; reader cost: %0.5f s; ips: %0.5f tokens/sec"
                        % (epoch_id, batch_id, batch_time,
                           np.exp(total_loss / word_count), total_reader_cost /
                           100, word_count / time_interval))
                    ce_ppl.append(np.exp(total_loss / word_count))
                    total_loss = 0.0
                    word_count = 0.0
                    time_interval = 0.0
                    total_reader_cost = 0.0

                # profiler tools
                if args.profile and epoch_id == 0 and batch_id == 100:
                    profiler.reset_profiler()
                elif args.profile and epoch_id == 0 and batch_id == 105:
                    return
                batch_start_time = time.time()
                batch_read_start = time.time()

            end_time = time.time()
            epoch_time = end_time - start_time
            ce_time.append(epoch_time)
            print(
                "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step; ips: %0.5f tokens/sec\n"
                % (epoch_id, epoch_time, sum(batch_times) / len(batch_times),
                   epoch_word_count / sum(batch_times)))

            if not args.profile:
                save_path = os.path.join(args.model_path,
                                         "epoch_" + str(epoch_id),
                                         "checkpoint")
                print("begin to save", save_path)
                fluid.save(train_program, save_path)
                print("save finished")
                dev_ppl = eval(valid_data)
                print("dev ppl", dev_ppl)
                test_ppl = eval(test_data)
                print("test ppl", test_ppl)

        if args.enable_ce:
            card_num = get_cards()
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
コード例 #17
0
    def build_model(self):
        data_shape = [None, 3, self.cfg.crop_size, self.cfg.crop_size]

        input_A = fluid.data(name='input_A', shape=data_shape, dtype='float32')
        input_B = fluid.data(name='input_B', shape=data_shape, dtype='float32')
        fake_pool_A = fluid.data(name='fake_pool_A',
                                 shape=data_shape,
                                 dtype='float32')
        fake_pool_B = fluid.data(name='fake_pool_B',
                                 shape=data_shape,
                                 dtype='float32')
        # used for continuous evaluation
        if self.cfg.enable_ce:
            fluid.default_startup_program().random_seed = 90

        A_loader = fluid.io.DataLoader.from_generator(feed_list=[input_A],
                                                      capacity=4,
                                                      iterable=True,
                                                      use_double_buffer=True)

        B_loader = fluid.io.DataLoader.from_generator(feed_list=[input_B],
                                                      capacity=4,
                                                      iterable=True,
                                                      use_double_buffer=True)

        gen_trainer = GTrainer(input_A, input_B, self.cfg, self.batch_num)
        d_A_trainer = DATrainer(input_B, fake_pool_B, self.cfg, self.batch_num)
        d_B_trainer = DBTrainer(input_A, fake_pool_A, self.cfg, self.batch_num)

        # prepare environment
        place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace()

        A_loader.set_batch_generator(self.A_reader,
                                     places=fluid.cuda_places() if
                                     self.cfg.use_gpu else fluid.cpu_places())
        B_loader.set_batch_generator(self.B_reader,
                                     places=fluid.cuda_places() if
                                     self.cfg.use_gpu else fluid.cpu_places())

        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        A_pool = utility.ImagePool()
        B_pool = utility.ImagePool()

        if self.cfg.init_model:
            utility.init_checkpoints(self.cfg, gen_trainer, "net_G")
            utility.init_checkpoints(self.cfg, d_A_trainer, "net_DA")
            utility.init_checkpoints(self.cfg, d_B_trainer, "net_DB")

        ### memory optim
        build_strategy = fluid.BuildStrategy()
        build_strategy.enable_inplace = True

        gen_trainer_program = fluid.CompiledProgram(
            gen_trainer.program).with_data_parallel(
                loss_name=gen_trainer.g_loss.name,
                build_strategy=build_strategy)
        d_A_trainer_program = fluid.CompiledProgram(
            d_A_trainer.program).with_data_parallel(
                loss_name=d_A_trainer.d_loss_A.name,
                build_strategy=build_strategy)
        d_B_trainer_program = fluid.CompiledProgram(
            d_B_trainer.program).with_data_parallel(
                loss_name=d_B_trainer.d_loss_B.name,
                build_strategy=build_strategy)

        total_train_batch = 0  # NOTE :used for benchmark
        reader_cost_averager = timer.TimeAverager()
        batch_cost_averager = timer.TimeAverager()
        for epoch_id in range(self.cfg.epoch):
            batch_id = 0
            batch_start = time.time()
            for data_A, data_B in zip(A_loader(), B_loader()):
                if self.cfg.max_iter and total_train_batch == self.cfg.max_iter:  # used for benchmark
                    return
                reader_cost_averager.record(time.time() - batch_start)

                tensor_A, tensor_B = data_A[0]['input_A'], data_B[0]['input_B']
                ## optimize the g_A network
                g_A_loss, g_A_cyc_loss, g_A_idt_loss, g_B_loss, g_B_cyc_loss,\
                g_B_idt_loss, fake_A_tmp, fake_B_tmp = exe.run(
                    gen_trainer_program,
                    fetch_list=[
                        gen_trainer.G_A, gen_trainer.cyc_A_loss,
                        gen_trainer.idt_loss_A, gen_trainer.G_B,
                        gen_trainer.cyc_B_loss, gen_trainer.idt_loss_B,
                        gen_trainer.fake_A, gen_trainer.fake_B
                    ],
                    feed={"input_A": tensor_A,
                          "input_B": tensor_B})

                fake_pool_B = B_pool.pool_image(fake_B_tmp)
                fake_pool_A = A_pool.pool_image(fake_A_tmp)

                if self.cfg.enable_ce:
                    fake_pool_B = fake_B_tmp
                    fake_pool_A = fake_A_tmp

                # optimize the d_A network
                d_A_loss = exe.run(d_A_trainer_program,
                                   fetch_list=[d_A_trainer.d_loss_A],
                                   feed={
                                       "input_B": tensor_B,
                                       "fake_pool_B": fake_pool_B
                                   })[0]

                # optimize the d_B network
                d_B_loss = exe.run(d_B_trainer_program,
                                   fetch_list=[d_B_trainer.d_loss_B],
                                   feed={
                                       "input_A": tensor_A,
                                       "fake_pool_A": fake_pool_A
                                   })[0]

                batch_cost_averager.record(time.time() - batch_start,
                                           num_samples=self.cfg.batch_size)
                if batch_id % self.cfg.print_freq == 0:
                    print("epoch{}: batch{}: \n\
                         d_A_loss: {:.5f}; g_A_loss: {:.5f}; g_A_cyc_loss: {:.5f}; g_A_idt_loss: {:.5f}; \n\
                         d_B_loss: {:.5f}; g_B_loss: {:.5f}; g_B_cyc_loss: {:.5f}; g_B_idt_loss: {:.5f}; \n\
                         batch_cost: {:.5f} sec, reader_cost: {:.5f} sec, ips: {:.5f} images/sec"
                          .format(epoch_id, batch_id, d_A_loss[0], g_A_loss[0],
                                  g_A_cyc_loss[0], g_A_idt_loss[0],
                                  d_B_loss[0], g_B_loss[0], g_B_cyc_loss[0],
                                  g_B_idt_loss[0],
                                  batch_cost_averager.get_average(),
                                  reader_cost_averager.get_average(),
                                  batch_cost_averager.get_ips_average()))
                    reader_cost_averager.reset()
                    batch_cost_averager.reset()

                sys.stdout.flush()
                batch_id += 1
                total_train_batch += 1  # used for benchmark
                batch_start = time.time()

                # profiler tools
                if self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq:
                    profiler.reset_profiler()
                elif self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq + 5:
                    return

                # used for continuous evaluation
                if self.cfg.enable_ce and batch_id == 10:
                    break

            if self.cfg.run_test:
                A_image_name = fluid.data(name='A_image_name',
                                          shape=[None, 1],
                                          dtype='int32')
                B_image_name = fluid.data(name='B_image_name',
                                          shape=[None, 1],
                                          dtype='int32')
                A_test_loader = fluid.io.DataLoader.from_generator(
                    feed_list=[input_A, A_image_name],
                    capacity=4,
                    iterable=True,
                    use_double_buffer=True)

                B_test_loader = fluid.io.DataLoader.from_generator(
                    feed_list=[input_B, B_image_name],
                    capacity=4,
                    iterable=True,
                    use_double_buffer=True)

                A_test_loader.set_batch_generator(
                    self.A_test_reader,
                    places=fluid.cuda_places()
                    if self.cfg.use_gpu else fluid.cpu_places())
                B_test_loader.set_batch_generator(
                    self.B_test_reader,
                    places=fluid.cuda_places()
                    if self.cfg.use_gpu else fluid.cpu_places())
                test_program = gen_trainer.infer_program
                utility.save_test_image(epoch_id,
                                        self.cfg,
                                        exe,
                                        place,
                                        test_program,
                                        gen_trainer,
                                        A_test_loader,
                                        B_test_loader,
                                        A_id2name=self.A_id2name,
                                        B_id2name=self.B_id2name)

            if self.cfg.save_checkpoints:
                utility.checkpoints(epoch_id, self.cfg, gen_trainer, "net_G")
                utility.checkpoints(epoch_id, self.cfg, d_A_trainer, "net_DA")
                utility.checkpoints(epoch_id, self.cfg, d_B_trainer, "net_DB")

        # used for continuous evaluation
        if self.cfg.enable_ce:
            device_num = fluid.core.get_cuda_device_count(
            ) if self.cfg.use_gpu else 1
            print("kpis\tcyclegan_g_A_loss_card{}\t{}".format(
                device_num, g_A_loss[0]))
            print("kpis\tcyclegan_g_A_cyc_loss_card{}\t{}".format(
                device_num, g_A_cyc_loss[0]))
            print("kpis\tcyclegan_g_A_idt_loss_card{}\t{}".format(
                device_num, g_A_idt_loss[0]))
            print("kpis\tcyclegan_d_A_loss_card{}\t{}".format(
                device_num, d_A_loss[0]))
            print("kpis\tcyclegan_g_B_loss_card{}\t{}".format(
                device_num, g_B_loss[0]))
            print("kpis\tcyclegan_g_B_cyc_loss_card{}\t{}".format(
                device_num, g_B_cyc_loss[0]))
            print("kpis\tcyclegan_g_B_idt_loss_card{}\t{}".format(
                device_num, g_B_idt_loss[0]))
            print("kpis\tcyclegan_d_B_loss_card{}\t{}".format(
                device_num, d_B_loss[0]))
            print("kpis\tcyclegan_Batch_time_cost_card{}\t{}".format(
                device_num, batch_time))
コード例 #18
0
ファイル: train.py プロジェクト: zhong110020/models
def train(args):
    """OCR training"""

    if args.model == "crnn_ctc":
        train_net = ctc_train_net
        get_feeder_data = get_ctc_feeder_data
    else:
        train_net = attention_train_net
        get_feeder_data = get_attention_feeder_data

    num_classes = None
    num_classes = data_reader.num_classes(
    ) if num_classes is None else num_classes
    data_shape = data_reader.data_shape()
    # define network
    sum_cost, error_evaluator, inference_program, model_average = train_net(
        args, data_shape, num_classes)

    # data reader
    train_reader = data_reader.train(args.batch_size,
                                     train_images_dir=args.train_images,
                                     train_list_file=args.train_list,
                                     cycle=args.total_step > 0,
                                     model=args.model)
    test_reader = data_reader.test(test_images_dir=args.test_images,
                                   test_list_file=args.test_list,
                                   model=args.model)

    # prepare environment
    place = fluid.CPUPlace()
    if args.use_gpu:
        place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)

    if 'ce_mode' in os.environ:
        fluid.default_startup_program().random_seed = 90

    exe.run(fluid.default_startup_program())

    # load init model
    if args.init_model is not None:
        model_dir = args.init_model
        fluid.load(fluid.default_main_program(),
                   model_dir,
                   var_list=fluid.io.get_program_parameter(
                       fluid.default_main_program()))
        print("Init model from: %s." % args.init_model)

    train_exe = exe
    error_evaluator.reset(exe)
    if args.parallel:
        train_exe = fluid.ParallelExecutor(
            use_cuda=True if args.use_gpu else False, loss_name=sum_cost.name)

    fetch_vars = [sum_cost] + error_evaluator.metrics

    def train_one_batch(data):
        var_names = [var.name for var in fetch_vars]
        if args.parallel:
            results = train_exe.run(var_names,
                                    feed=get_feeder_data(data, place))
            results = [np.array(result).sum() for result in results]
        else:
            results = train_exe.run(feed=get_feeder_data(data, place),
                                    fetch_list=fetch_vars)
            results = [result[0] for result in results]
        return results

    def test(iter_num):
        error_evaluator.reset(exe)
        for data in test_reader():
            exe.run(inference_program, feed=get_feeder_data(data, place))
        _, test_seq_error = error_evaluator.eval(exe)
        print("\n[%s] - Iter[%d]; Test seq error: %s.\n" % (time.asctime(
            time.localtime(time.time())), iter_num, str(test_seq_error[0])))

        #Note: The following logs are special for CE monitoring.
        #Other situations do not need to care about these logs.
        if 'ce_mode' in os.environ:
            print("kpis	test_acc	%f" % (1 - test_seq_error[0]))

    def save_model(args, exe, iter_num):
        filename = "model_%05d" % iter_num
        fluid.save(fluid.default_main_program(),
                   os.path.join(args.save_model_dir, filename))
        print("Saved model to: %s/%s." % (args.save_model_dir, filename))

    iter_num = 0
    stop = False
    start_time = time.time()
    while not stop:
        total_loss = 0.0
        total_seq_error = 0.0
        batch_times = []
        # train a pass
        for data in train_reader():
            if args.total_step > 0 and iter_num == args.total_step + args.skip_batch_num:
                stop = True
                break
            if iter_num < args.skip_batch_num:
                print("Warm-up iteration")
            if iter_num == args.skip_batch_num:
                profiler.reset_profiler()
            start = time.time()
            results = train_one_batch(data)
            batch_time = time.time() - start
            fps = args.batch_size / batch_time
            batch_times.append(batch_time)
            total_loss += results[0]
            total_seq_error += results[2]

            iter_num += 1
            # training log
            if iter_num % args.log_period == 0:
                print("\n[%s] - Iter[%d]; Avg loss: %.3f; Avg seq err: %.3f" %
                      (time.asctime(time.localtime(
                          time.time())), iter_num, total_loss /
                       (args.log_period * args.batch_size), total_seq_error /
                       (args.log_period * args.batch_size)))
                if 'ce_mode' in os.environ:
                    print("kpis	train_cost	%f" %
                          (total_loss / (args.log_period * args.batch_size)))
                    print("kpis	train_acc	%f" %
                          (1 - total_seq_error /
                           (args.log_period * args.batch_size)))
                total_loss = 0.0
                total_seq_error = 0.0

            # evaluate
            if not args.skip_test and iter_num % args.eval_period == 0:
                if model_average:
                    with model_average.apply(exe):
                        test(iter_num)
                else:
                    test(iter_num)

            # save model
            if iter_num % args.save_model_period == 0:
                if model_average:
                    with model_average.apply(exe):
                        save_model(args, exe, iter_num)
                else:
                    save_model(args, exe, iter_num)
        end_time = time.time()
        if 'ce_mode' in os.environ:
            print("kpis	train_duration	%f" % (end_time - start_time))
        # Postprocess benchmark data
        latencies = batch_times[args.skip_batch_num:]
        latency_avg = np.average(latencies)
        latency_pc99 = np.percentile(latencies, 99)
        fpses = np.divide(args.batch_size, latencies)
        fps_avg = np.average(fpses)
        fps_pc99 = np.percentile(fpses, 1)

        # Benchmark output
        print('\nTotal examples (incl. warm-up): %d' %
              (iter_num * args.batch_size))
        print('average latency: %.5f s, 99pc latency: %.5f s' %
              (latency_avg, latency_pc99))
        print('average fps: %.5f, fps for 99pc latency: %.5f' %
              (fps_avg, fps_pc99))
コード例 #19
0
def train(args):
    """OCR training"""

    if args.model == "crnn_ctc":
        train_net = ctc_train_net
        get_feeder_data = get_ctc_feeder_data

    num_classes = None
    train_images = args.train_images
    train_list = args.train_list
    test_images = args.test_images
    test_list = args.test_list
    num_classes = data_reader.num_classes() if num_classes is None else num_classes
    data_shape = data_reader.data_shape()
    # define network
    sum_cost, error_evaluator, inference_program, model_average = train_net(
        args, data_shape, num_classes)

    logger = LogWriter('./log', sync_cycle=10)
    with logger.mode("train") as train_logger:
        train_acc = train_logger.scalar("train_acc")
        train_loss = train_logger.scalar("train_loss")
        val_loss = train_logger.scalar("val_loss")
        val_acc = train_logger.scalar("val_acc")
    # data reader

    train_reader = data_reader.train(
        args.batch_size,
        train_images_dir=train_images,
        train_list_file=train_list,
        cycle=args.total_step > 0,
        model=args.model)
    test_reader = data_reader.test(
        test_images_dir=test_images, test_list_file=test_list, model=args.model)


    # prepare environment
    place = fluid.CPUPlace()
    if args.use_gpu:
        place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)

    if 'ce_mode' in os.environ:
        fluid.default_startup_program().random_seed = 90

    exe.run(fluid.default_startup_program())
    # init_list=[]
    #for param in fluid.default_main_program().global_block().all_parameters():
    # if "batch_norm" in param.name or "conv2d" in param.name:
    #     init_list.append(param.name)
    # print ("%s=%s=%s" % (param.name, param.name, param.shape))
    # load init model
    print("Initing Model:****************")
    if args.init_model is not None:
        model_dir = args.init_model
        model_file_name = None
        if not os.path.isdir(args.init_model):
            model_dir = os.path.dirname(args.init_model)
            model_file_name = os.path.basename(args.init_model)
        model_file_name = os.path.basename(args.init_model)
        fluid.io.load_params(exe, dirname=args.init_model, filename="model_369000")

        print("Init model from: %s." % args.init_model)

    train_exe = exe
    error_evaluator.reset(exe)
    if args.parallel:
        train_exe = fluid.ParallelExecutor(
            use_cuda=True if args.use_gpu else False, loss_name=sum_cost.name)

    fetch_vars = [sum_cost] + error_evaluator.metrics

    def train_one_batch(data):
        var_names = [var.name for var in fetch_vars]
        if args.parallel:
            results = train_exe.run(var_names,
                                    feed=get_feeder_data(data, place))
            results = [np.array(result).sum() for result in results]
        else:
            results = train_exe.run(feed=get_feeder_data(data, place),
                                    fetch_list=fetch_vars)
            results = [result[0] for result in results]
        return results

    def test(iter_num):
        error_evaluator.reset(exe)
        res = 0
        i = 0
        for data in test_reader():
            cost =  exe.run(inference_program, feed=get_feeder_data(data, place), fetch_list=[sum_cost])
            # if i == 0:
            #     print(cost[0])
            res += cost[0][0]
            i += 1
        val_loss.add_record(iter_num, res / i)
        _, test_seq_error = error_evaluator.eval(exe)
        print("\nTime: %s; Iter[%d]; Test seq error: %s.\n" % (
            time.time(), iter_num, str(test_seq_error[0])))
        val_acc.add_record(iter_num, 1 - test_seq_error[0])
        #Note: The following logs are special for CE monitoring.
        #Other situations do not need to care about these logs.
        print("kpis test_acc    %f" % (1 - test_seq_error[0]))

    def save_model(args, exe, iter_num):
        filename = "model_%05d" % iter_num
        fluid.io.save_params(
            exe, dirname=args.save_model_dir, filename=filename)
        print("Saved model to: %s/%s." % (args.save_model_dir, filename))

    iter_num = 0
    stop = False
    start_time = time.time()
    while not stop:
        total_loss = 0.0
        total_seq_error = 0.0
        batch_times = []
        # train a pass
        for data in train_reader():
            if args.total_step > 0 and iter_num == args.total_step + args.skip_batch_num:
                stop = True
                break
            if iter_num < args.skip_batch_num:
                print("Warm-up iteration")
            if iter_num == args.skip_batch_num:
                profiler.reset_profiler()
            start = time.time()
            results = train_one_batch(data)
            batch_time = time.time() - start
            fps = args.batch_size / batch_time
            batch_times.append(batch_time)
            total_loss += results[0]
            total_seq_error += results[2]

            iter_num += 1
            # training log
            if iter_num % args.log_period == 0:
                avg_loss = total_loss / (args.log_period)
                avg_err = total_seq_error / (args.log_period * args.batch_size)
                print("\nTime: %s; Iter[%d]; Avg loss: %.3f; Avg seq err: %.3f" % (
                    time.time(), iter_num,
                    avg_loss, avg_err))
                print("kpis train_cost  %f" % (avg_loss))
                print("kpis train_acc   %f" % (1 - avg_err))
                train_loss.add_record(iter_num, avg_loss)
                train_acc.add_record(iter_num, 1 - avg_err )
                total_loss = 0.0
                total_seq_error = 0.0

            # evaluate
            if not args.skip_test and iter_num % args.eval_period == 0:
                if model_average:
                    with model_average.apply(exe):
                        test(iter_num)
                else:
                    test(iter_num)

            # save model
            if iter_num % args.save_model_period == 0:
                if model_average:
                    with model_average.apply(exe):
                        save_model(args, exe, iter_num)
                else:
                    save_model(args, exe, iter_num)
        end_time = time.time()
        print("kpis train_duration  %f" % (end_time - start_time))
        # Postprocess benchmark data
        latencies = batch_times[args.skip_batch_num:]
        latency_avg = np.average(latencies)
        latency_pc99 = np.percentile(latencies, 99)
        fpses = np.divide(args.batch_size, latencies)
        fps_avg = np.average(fpses)
        fps_pc99 = np.percentile(fpses, 1)

        # Benchmark output
        print('\nTotal examples (incl. warm-up): %d' %
              (iter_num * args.batch_size))
        print('average latency: %.5f s, 99pc latency: %.5f s' % (latency_avg,
                                                                 latency_pc99))
        print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg,
                                                                 fps_pc99))
コード例 #20
0
ファイル: infer.py プロジェクト: sunqiang25/models-develop
def infer(args):
    word = fluid.layers.data(name='word',
                             shape=[1],
                             dtype='int64',
                             lod_level=1)
    mention = fluid.layers.data(name='mention',
                                shape=[1],
                                dtype='int64',
                                lod_level=1)
    target = fluid.layers.data(name='target',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)

    label_reverse_dict = load_reverse_dict(args.test_label_file)

    test_data = paddle.batch(reader.file_reader(args.test_data_dir),
                             batch_size=args.batch_size)
    place = fluid.CUDAPlace(0) if args.device == 'GPU' else fluid.CPUPlace()
    feeder = fluid.DataFeeder(feed_list=[word, mention, target], place=place)
    exe = fluid.Executor(place)

    inference_scope = fluid.Scope()
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(args.model_path, exe)
        total_passes = args.num_passes + args.skip_pass_num
        batch_times = [0] * total_passes
        word_counts = [0] * total_passes
        wpses = [0] * total_passes
        all_iters = 0
        for pass_id in range(total_passes):
            if pass_id < args.skip_pass_num:
                print("Warm-up pass")
            if pass_id == args.skip_pass_num:
                profiler.reset_profiler()
            iters = 0
            for data in test_data():
                word = to_lodtensor(list(map(lambda x: x[0], data)), place)
                mention = to_lodtensor(list(map(lambda x: x[1], data)), place)

                start = time.time()
                crf_decode = exe.run(inference_program,
                                     feed={
                                         "word": word,
                                         "mention": mention
                                     },
                                     fetch_list=fetch_targets,
                                     return_numpy=False)
                batch_time = time.time() - start
                lod_info = (crf_decode[0].lod())[0]
                np_data = np.array(crf_decode[0])
                word_count = 0
                assert len(data) == len(lod_info) - 1
                for sen_index in range(len(data)):
                    assert len(
                        data[sen_index]
                        [0]) == lod_info[sen_index + 1] - lod_info[sen_index]
                    word_index = 0
                    for tag_index in range(lod_info[sen_index],
                                           lod_info[sen_index + 1]):
                        word = str(data[sen_index][0][word_index])
                        gold_tag = label_reverse_dict[data[sen_index][2]
                                                      [word_index]]
                        tag = label_reverse_dict[np_data[tag_index][0]]
                        word_index += 1
                    word_count += word_index
                batch_times[pass_id] += batch_time
                word_counts[pass_id] += word_count
                iters += 1
                all_iters += 1
            batch_times[pass_id] /= iters
            word_counts[pass_id] /= iters
            wps = word_counts[pass_id] / batch_times[pass_id]
            wpses[pass_id] = wps

            print(
                "Pass: %d, iterations (total): %d (%d), latency: %.5f s, words: %d, wps: %f"
                % (pass_id, iters, all_iters, batch_times[pass_id],
                   word_counts[pass_id], wps))

    # Postprocess benchmark data
    latencies = batch_times[args.skip_pass_num:]
    latency_avg = np.average(latencies)
    latency_std = np.std(latencies)
    latency_pc99 = np.percentile(latencies, 99)
    wps_avg = np.average(wpses)
    wps_std = np.std(wpses)
    wps_pc01 = np.percentile(wpses, 1)

    # Benchmark output
    print('\nTotal passes (incl. warm-up): %d' % (total_passes))
    print('Total iterations (incl. warm-up): %d' % (all_iters))
    print('Total examples (incl. warm-up): %d' % (all_iters * args.batch_size))
    print('avg latency: %.5f, std latency: %.5f, 99pc latency: %.5f' %
          (latency_avg, latency_std, latency_pc99))
    print('avg wps: %.5f, std wps: %.5f, wps for 99pc latency: %.5f' %
          (wps_avg, wps_std, wps_pc01))
コード例 #21
0
def inference(args):
    """OCR inference"""
    if args.model == "crnn_ctc":
        infer = ctc_infer
        get_feeder_data = get_ctc_feeder_for_infer

    num_classes = data_reader.num_classes()
    data_shape = data_reader.data_shape()
    # define network
    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False)
    # data reader
    infer_reader = data_reader.inference(
        batch_size=args.batch_size,
        infer_images_dir=args.input_images_dir,
        infer_list_file=args.input_images_list,
        cycle=True if args.iterations > 0 else False,
        model=args.model)

    # prepare environment
    place = fluid.CPUPlace()
    if args.use_gpu:
        place = fluid.CUDAPlace(0)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load dictionary
    dict_map = data_reader.DICT
    dict_map = {id: ch for ch, id in dict_map.items()}

    # load init model
    model_dir = args.model_path
    model_file_name = None
    if not os.path.isdir(args.model_path):
        model_dir = os.path.dirname(args.model_path)
        model_file_name = os.path.basename(args.model_path)
    fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
    print("Init model from: %s." % args.model_path)

    batch_times = []
    iters = 0
    fp = open('reslut15000.txt', 'w+')
    for data in infer_reader():
        feed_dict = get_feeder_data(data, place)
        if args.iterations > 0 and iters == args.iterations + args.skip_batch_num:
            break
        if iters < args.skip_batch_num:
            print("Warm-up itaration")
        if iters == args.skip_batch_num:
            profiler.reset_profiler()

        start = time.time()
        result = exe.run(fluid.default_main_program(),
                         feed=feed_dict,
                         fetch_list=[ids],
                         return_numpy=False)
        indexes = np.array(result[0]).flatten()
        indexes = [id for id in indexes if id != num_classes]
        batch_time = time.time() - start
        fps = args.batch_size / batch_time
        batch_times.append(batch_time)
        if dict_map is not None:
            line = ""
            wrong_predict_flag = False
            for index in indexes:
                if index >= 0:
                    line += dict_map[index]
                else:
                    wrong_predict_flag = True
                    print("exceed dict")
                    # break
            fp.write(line + '\n')
        else:
            print("no dict")
            print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
                iters,
                batch_time,
                fps,
                indexes,
            ))

        iters += 1
    fp.close()
    latencies = batch_times[args.skip_batch_num:]
    latency_avg = np.average(latencies)
    latency_pc99 = np.percentile(latencies, 99)
    fpses = np.divide(args.batch_size, latencies)
    fps_avg = np.average(fpses)
    fps_pc99 = np.percentile(fpses, 1)

    # Benchmark output
    print('\nTotal examples (incl. warm-up): %d' % (iters * args.batch_size))
    print('average latency: %.5f s, 99pc latency: %.5f s' %
          (latency_avg, latency_pc99))
    print('average fps: %.5f, fps for 99pc latency: %.5f' %
          (fps_avg, fps_pc99))