Ejemplo n.º 1
0
    def check(self, place, use_cuda):
        paddle.manual_seed(1)
        paddle.framework.random._manual_program_seed(1)
        main_program = fluid.Program()
        startup_program = fluid.Program()
        x, y, loss = self.build_program(main_program, startup_program,
                                        use_cuda)
        exe = fluid.Executor(place)
        iters = 10
        batch_size = 16
        feeder = fluid.DataFeeder(feed_list=[x, y], place=place)

        # close fused_bn_act_ops
        build_strategy = fluid.BuildStrategy()
        build_strategy.fuse_bn_act_ops = False
        binary = fluid.CompiledProgram(main_program).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=batch_size)
        loss_vals = []
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup_program)
            for _ in range(iters):
                data = next(train_reader())
                loss_v = exe.run(binary,
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
                loss_vals.append(loss_v[0][0])

        # open fused_bn_act_ops
        build_strategy_fused = fluid.BuildStrategy()
        build_strategy_fused.fuse_bn_act_ops = True
        binary_fused = fluid.CompiledProgram(main_program).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy_fused)
        train_reader_fused = paddle.batch(paddle.dataset.mnist.train(),
                                          batch_size=batch_size)
        loss_vals_fused = []
        scope_fused = fluid.Scope()
        with fluid.scope_guard(scope_fused):
            exe.run(startup_program)
            for _ in range(iters):
                data = next(train_reader_fused())
                loss_v = exe.run(binary_fused,
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
                loss_vals_fused.append(loss_v[0][0])

        # check loss
        for i in range(iters):
            self.assertAlmostEqual(loss_vals[i],
                                   loss_vals_fused[i],
                                   delta=1e-5)
Ejemplo n.º 2
0
    def check(self, place, use_cuda):
        paddle.seed(1)
        paddle.framework.random._manual_program_seed(1)
        iters = 5
        batch_size = 16

        # build_fused_program: turn on fuse_bn_add_act_ops
        main_program = fluid.Program()
        startup_program = fluid.Program()
        loss = self.build_origin_program(main_program, startup_program,
                                         use_cuda)
        build_strategy_fused = fluid.BuildStrategy()
        build_strategy_fused.fuse_bn_add_act_ops = True
        binary_fused = fluid.CompiledProgram(main_program).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy_fused)
        exe = fluid.Executor(place)
        loss_vals_fused = []
        x_data = []
        y_data = []
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup_program)
            for _ in range(iters):
                x = np.random.random((batch_size, 1, 28, 28)).astype("float32")
                y = np.random.random((batch_size, 1)).astype("int64")
                x_data.append(x)
                y_data.append(y)
                loss_v = exe.run(binary_fused,
                                 feed={"x": x,
                                       "y": y},
                                 fetch_list=[loss])
                loss_vals_fused.append(loss_v[0][0])

        # build_origin_program: turn off fused_bn_act_ops
        build_strategy = fluid.BuildStrategy()
        build_strategy.fuse_bn_add_act_ops = False
        binary = fluid.CompiledProgram(main_program).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy_fused)
        loss_vals = []
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup_program)
            for i in range(iters):
                loss_v = exe.run(binary,
                                 feed={"x": x_data[i],
                                       "y": y_data[i]},
                                 fetch_list=[loss])
                loss_vals.append(loss_v[0][0])

        # check loss
        for i in range(iters):
            self.assertAlmostEqual(loss_vals[i], loss_vals_fused[i], delta=1e-5)
Ejemplo n.º 3
0
    def _compile_and_initialize(self, prog, mode):
        compiled_prog = self._compiled_progs.get(mode, None)
        if compiled_prog is not None:
            return compiled_prog

        assert self.model._place is not None, \
            "device is not set, please call `model.prepare()` first"

        place = self.model._place

        # XXX *ALL WEIGHTS* should be initialized upon model construction
        # even if `forward()` may run different code path for different mode
        # therefore startup program only needs to run once
        if self._executor is None:
            self._executor = fluid.Executor(place)
            # XXX incremental initialization
            uninitialized = []
            for var_py in self._startup_prog.list_vars():
                var = fluid.global_scope().find_var(var_py.name)
                if not var_py.name.startswith('nccl_id') and var and \
                        var.get_tensor()._is_initialized():
                    continue

                uninitialized.append(var_py)
            if uninitialized:
                startup_prog = self._startup_prog._prune(uninitialized)
                self._executor.run(startup_prog)

        if self._nranks < 2:
            compiled_prog = fluid.CompiledProgram(prog)
        else:
            compiled_prog = prog

        self._compiled_progs[mode] = compiled_prog
Ejemplo n.º 4
0
def best_strategy_compiled(args, program, loss, exe):
    """make a program which wrapped by a compiled program
    """

    if os.getenv('FLAGS_use_ngraph'):
        return program
    else:
        build_strategy = fluid.compiler.BuildStrategy()

        exec_strategy = fluid.ExecutionStrategy()

        if args.use_gpu:
            exec_strategy.num_threads = fluid.core.get_cuda_device_count()

        exec_strategy.num_iteration_per_drop_scope = 10

        num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
        if num_trainers > 1 and args.use_gpu:
            dist_utils.prepare_for_multi_process(exe, build_strategy, program)
            # NOTE: the process is fast when num_threads is 1
            # for multi-process training.
            exec_strategy.num_threads = 1

        compiled_program = fluid.CompiledProgram(program).with_data_parallel(
            loss_name=loss.name,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)

        return compiled_program
    def init_model(self):
        """
        根据模型参数路径读入模型来初始化,包括预测程序编译,模型参数赋值,并行策略
        :param vocab_size: 词典大小
        :return:
        """
        model_path = self.args["load_model_path"]
        self.logger.info("Initializing predict model...")
        self.exe = fluid.Executor(
            TrainEngine.get_executor_run_places(self.args))
        with fluid.program_guard(self.predict_program, self.predict_startup):
            # 根据gzl的模型来定义网络,输出占位符
            loader, probs, qas_id = classifier.create_model_for_cls_merge(
                args=self.args_model_build, is_prediction=True)
            self.logger.info("Prediction neural network created.")

        self.logger.info("Prediction neural network parameter initialized.")

        # start_up程序运行初始参数
        self.exe.run(self.predict_startup)

        # 加载模型参数到网络中
        load_model_params(self.exe, model_path, self.predict_program)

        # 若并行,用并行编译program
        if self.args["use_parallel"]:
            build_strategy = fluid.BuildStrategy()
            # 并行策略暂时写死
            build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
            self.predict_program = fluid.CompiledProgram(self.predict_program). \
                with_data_parallel(places=TrainEngine.get_data_run_places(self.args),
                                   build_strategy=build_strategy)

        self.logger.info("Finish initializing predict model!")
        return loader, probs, qas_id
Ejemplo n.º 6
0
        def run_program(enable_addto):
            np.random.seed(10)
            paddle.seed(10)
            paddle.framework.random._manual_program_seed(10)
            if fluid.core.is_compiled_with_cuda():
                fluid.set_flags({"FLAGS_cudnn_deterministic": True})
            fluid.set_flags({"FLAGS_max_inplace_grad_add": 2})
            loss, main, startup, w = create_program(data_format=data_format)
            place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
            ) else fluid.CPUPlace()
            exe = fluid.Executor(place)

            strategy = fluid.BuildStrategy()
            strategy.enable_addto = enable_addto
            compiled = fluid.CompiledProgram(main).with_data_parallel(
                loss_name=loss.name, build_strategy=strategy)

            exe.run(startup)
            img = np.random.uniform(-128, 128,
                                    [8, 3, 224, 224]).astype(np.float32)
            for i in range(10):
                res = exe.run(compiled,
                              feed={'img': img},
                              fetch_list=[loss.name, w.name])
            return res
Ejemplo n.º 7
0
    def _init_pred(self, instance, infer_model_path):
        inst = instance
        if 'pred_output_path' not in inst.config:
            inst.config['pred_output_path'] = os.path.join(
                inst.config.get('save_path', '.'), inst.name)

        if not os.path.exists(inst.config['pred_output_path']):
            os.makedirs(inst.config['pred_output_path'])

        pred_backbone = self.Backbone(self.bb_conf, phase='pred')
        pred_parad = inst.Paradigm(inst.config,
                                   phase='pred',
                                   backbone_config=self.bb_conf)
        inst.task_layer['pred'] = pred_parad
        pred_joint_input_names, pred_joint_shape_and_dtypes, name_to_position = merge_input_attrs(
            pred_backbone.inputs_attr,
            inst.task_layer['pred'].inputs_attrs['reader'],
            insert_taskid=False,
            insert_batchsize=False,
            insert_seqlen=False,
            insert_batchsize_x_seqlen=False)

        pred_prog = inst.load(infer_model_path)
        pred_prog = fluid.CompiledProgram(pred_prog).with_data_parallel()
        if inst.reader['pred'] is None:
            pred_reader = inst.Reader(inst.config, phase='pred')
            inst.reader['pred'] = pred_reader
        return pred_prog
Ejemplo n.º 8
0
def train(use_cuda):
    # define program
    train_prog = fluid.Program()
    startup_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            # For training:
            # inputs = [src, src_sequence_length, trg, trg_sequence_length, label]
            inputs, loader = data_func(is_train=True)
            logits = model_func(inputs, is_train=True)
            loss = loss_func(logits, inputs[-1], inputs[-2])
            optimizer = optimizer_func()
            optimizer.minimize(loss)

    # define data source
    places = fluid.cuda_places() if use_cuda else fluid.cpu_places()
    loader.set_batch_generator(inputs_generator(batch_size,
                                                eos_id,
                                                is_train=True),
                               places=places)

    exe = fluid.Executor(places[0])
    exe.run(startup_prog)
    prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name)

    EPOCH_NUM = 20
    for pass_id in six.moves.xrange(EPOCH_NUM):
        batch_id = 0
        for data in loader():
            loss_val = exe.run(prog, feed=data, fetch_list=[loss])[0]
            print('pass_id: %d, batch_id: %d, loss: %f' %
                  (pass_id, batch_id, loss_val))
            batch_id += 1
        fluid.io.save_params(exe, model_save_dir, main_program=train_prog)
Ejemplo n.º 9
0
    def main_impl(self, place):
        image = fluid.layers.data(name='image',
                                  shape=self.image_shape,
                                  dtype='float32')
        relu_image = fluid.layers.relu(image)
        loss = fluid.layers.reduce_mean(relu_image)

        build_strategy = fluid.BuildStrategy()
        build_strategy.enable_inplace = True
        build_strategy.memory_optimize = True

        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        compiled_prog = fluid.CompiledProgram(
            fluid.default_main_program()).with_data_parallel(
                loss_name=loss.name, build_strategy=build_strategy)

        image_tensor = fluid.LoDTensor()
        np_image = np.random.uniform(low=-10, high=10,
                                     size=self.image_shape).astype('float32')
        image_tensor.set(np_image, place)

        feed_dict = [{image.name: image_tensor}]

        for _ in range(self.iteration):
            exe.run(compiled_prog, feed=feed_dict, fetch_list=[loss.name])
            self.assertTrue(np.array_equal(np.array(image_tensor), np_image))
Ejemplo n.º 10
0
    def run_main_with_place(self, places, use_compiled_program=True):
        with fluid.scope_guard(fluid.Scope()):
            with fluid.program_guard(fluid.Program(), fluid.Program()):
                input_data, loss, loader = self.build_network(places)
                fetch_list = [input_data]

                exe = fluid.Executor(places[0])
                exe.run(fluid.default_startup_program())

                dev_cnt = len(places)
                if dev_cnt > 1:
                    self.assertTrue(use_compiled_program)

                main_program = fluid.default_main_program()
                if use_compiled_program:
                    main_program = fluid.CompiledProgram(
                        main_program).with_data_parallel(loss_name=loss.name,
                                                         places=places)

                max_batch_num = min(self.break_num,
                                    int(self.batch_num / dev_cnt))

                if loader.iterable:
                    early_break = False
                    for epoch_id in six.moves.range(self.epoch_num):
                        early_break = False
                        batch_id = 0
                        for data in loader():
                            if batch_id >= self.break_num:
                                early_break = True
                                break
                            self.assertInputData(batch_id, data, dev_cnt)
                            fetch_val, = exe.run(program=main_program,
                                                 feed=data,
                                                 fetch_list=fetch_list)
                            self.assertInputData(batch_id, fetch_val, dev_cnt)
                            batch_id += 1

                        self.assertEqual(batch_id, max_batch_num)

                    if early_break:
                        loader._reset()
                else:
                    for epoch_id in six.moves.range(self.epoch_num):
                        batch_id = 0
                        loader.start()
                        try:
                            while True:
                                if batch_id >= self.break_num:
                                    loader.reset()
                                    break
                                fetch_val, = exe.run(program=main_program,
                                                     fetch_list=fetch_list)
                                self.assertInputData(batch_id, fetch_val,
                                                     dev_cnt)
                                batch_id += 1
                        except fluid.core.EOFException:
                            loader.reset()

                        self.assertEqual(batch_id, max_batch_num)
Ejemplo n.º 11
0
    def abs_max_run(self, reader, exe, step=None, loss_name=None):
        fetch_list = []
        with fluid.program_guard(self.program):
            for act_name in self.real_names:
                act = self.program.global_block().var(act_name)
                act = fluid.layers.reduce_max(
                    fluid.layers.abs(act), name=act_name + "_reduced")
                fetch_list.append(act_name + "_reduced.tmp_0")

        if not hasattr(self.program, '_program'):
            # Compile the native program to speed up
            program = fluid.CompiledProgram(self.program).with_data_parallel(
                loss_name=loss_name)
        for idx, data in enumerate(reader):
            vars_np = exe.run(program=program, feed=data, fetch_list=fetch_list)
            vars_np = [np.max(var) for var in vars_np]
            mapped_vars_np = dict(zip(self.real_names, vars_np))
            values = self.update(mapped_vars_np)

            if idx % 10 == 0:
                _logger.info("Collecting..., Step: {}".format(idx))

            if step is not None and idx + 1 >= step:
                break
        return values
Ejemplo n.º 12
0
    def _freeze(self):
        """
        call before enter train loop
        convert program to compiled program
        will do nothing if loss is None i.e. not in train mode
        """
        if self._loss is None:
            log.debug('will not freeze a program without loss')
            return
        if isinstance(self._program.train_program, F.compiler.CompiledProgram):
            log.debug('program has already been built')
            return
        exec_strategy = F.ExecutionStrategy()
        exec_strategy.num_threads = 4  #2 for fp32 4 for fp16
        exec_strategy.use_experimental_executor = True
        exec_strategy.num_iteration_per_drop_scope = 10  #important shit

        build_strategy = F.BuildStrategy()
        build_strategy.remove_unnecessary_lock = False
        #build_strategy.fuse_broadcast_ops = True
        build_strategy.num_trainers = distribution.status.num_replica
        build_strategy.trainer_id = distribution.status.replica_id
        build_strategy.memory_optimize = True

        log.info('replica id %d of %d' % (distribution.status.replica_id, distribution.status.num_replica))

        program = F.CompiledProgram(self._program.train_program).with_data_parallel(
            loss_name=self._loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy)
        self._program = ProgramPair(train_program=program, startup_program=self._program.startup_program)
Ejemplo n.º 13
0
    def test(program):

        compiled_eval_prog = fluid.CompiledProgram(program)

        results = eval_run(
            exe,
            compiled_eval_prog,
            eval_loader,
            eval_keys,
            eval_values,
            eval_cls,
            cfg=cfg)
        resolution = None
        if 'mask' in results[0]:
            resolution = model.mask_head.resolution
        dataset = cfg['EvalReader']['dataset']
        box_ap_stats = eval_results(
            results,
            cfg.metric,
            cfg.num_classes,
            resolution,
            is_bbox_normalized,
            FLAGS.output_eval,
            map_type,
            dataset=dataset)
        return box_ap_stats[0]
Ejemplo n.º 14
0
def main():
    seg_num = 8
    target_size = 224

    video_files = [FLAGS.data + '/' + f for f in os.listdir(FLAGS.data)]
    pipeline = VideoPipe(video_files, seg_num, target_size, FLAGS.stride)

    video_loader = DALIGenericIterator(pipeline, ['image'],
                                       len(video_files),
                                       dynamic_shape=True)

    exe = fluid.Executor(fluid.CUDAPlace(0))
    startup_prog = fluid.Program()
    eval_prog = fluid.Program()

    with fluid.program_guard(eval_prog, startup_prog):
        with fluid.unique_name.guard():
            fetch_list = build(seg_num, target_size)

    exe.run(startup_prog)
    compiled_eval_prog = fluid.CompiledProgram(eval_prog)

    load_weights(exe, eval_prog, PRETRAIN_WEIGHTS)

    labels = json.load(open("kinetics_labels.json"))

    for idx, batch in enumerate(video_loader):
        fetches = exe.run(compiled_eval_prog,
                          feed=batch,
                          fetch_list=fetch_list)
        pred = fetches[0][0]
        topk_indices = pred.argsort()[0 - FLAGS.topk:]
        topk_labels = [labels[i] for i in topk_indices]
        filename = video_files[idx]
        print("prediction for {} is: {}".format(filename, topk_labels))
Ejemplo n.º 15
0
def compile(config, program, loss_name=None):
    """
    Compile the program

    Args:
        config(dict): config
        program(): the program which is wrapped by
        loss_name(str): loss name

    Returns:
        compiled_program(): a compiled program
    """
    build_strategy = fluid.compiler.BuildStrategy()
    #build_strategy.fuse_bn_act_ops = config.get("fuse_bn_act_ops")
    #build_strategy.fuse_elewise_add_act_ops = config.get("fuse_elewise_add_act_ops")
    exec_strategy = fluid.ExecutionStrategy()

    exec_strategy.num_threads = 1
    exec_strategy.num_iteration_per_drop_scope = 10

    compiled_program = fluid.CompiledProgram(program).with_data_parallel(
        loss_name=loss_name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    return compiled_program
Ejemplo n.º 16
0
    def test_main(self):
        main_prog = fluid.Program()
        startup_prog = fluid.Program()
        with fluid.program_guard(main_prog, startup_prog):
            pred = fluid.data(name='pred', shape=[None, self.class_num], dtype='float32')
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
            acc = Accuracy(topk=self.topk, name=self.name)
            state = acc.add_metric_op(pred, label)

        exe = fluid.Executor(fluid.CPUPlace())
        compiled_main_prog = fluid.CompiledProgram(main_prog)

        for i in range(10):
            label, pred = self.random_pred_label()
            state_ret = exe.run(compiled_main_prog,
                                feed={'pred': pred, 'label': label},
                                fetch_list=[s.name for s in to_list(state)],
                                return_numpy=True)
            acc.update(*state_ret)
            res_m = acc.accumulate()
            res_f = accuracy(pred, label, self.topk)
            assert np.all(np.isclose(np.array(res_m), np.array(res_f), rtol=1e-3)), \
                    "Accuracy precision error: {} != {}".format(res_m, res_f)
            acc.reset()
            assert np.sum(acc.total) == 0
            assert np.sum(acc.count) == 0
Ejemplo n.º 17
0
def compile(config, program, loss_name=None, share_prog=None):
    """
    Compile the program

    Args:
        config(dict): config
        program(): the program which is wrapped by
        loss_name(str): loss name
        share_prog(): the shared program, used for evaluation during training

    Returns:
        compiled_program(): a compiled program
    """
    build_strategy = fluid.compiler.BuildStrategy()
    exec_strategy = fluid.ExecutionStrategy()

    exec_strategy.num_threads = 1
    exec_strategy.num_iteration_per_drop_scope = 10

    compiled_program = fluid.CompiledProgram(program).with_data_parallel(
        share_vars_from=share_prog,
        loss_name=loss_name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    return compiled_program
Ejemplo n.º 18
0
 def test_program_feed_scalar(self):
     main_program = fluid.Program()
     startup_program = fluid.Program()
     scope = fluid.Scope()
     with fluid.program_guard(main_program, startup_program):
         with fluid.scope_guard(scope):
             lr, cost = self.net()
             cpu = fluid.CPUPlace()
             exe = fluid.Executor(cpu)
             exe.run(startup_program)
             print(scope.find_var("fc_0.w_0").get_tensor())
             compiled_prog = fluid.CompiledProgram(
                 main_program).with_data_parallel(loss_name=cost.name)
             train_data = numpy.array([[1.0], [2.0], [3.0],
                                       [4.0]]).astype('float32')
             y_true = numpy.array([[2.0], [4.0], [6.0],
                                   [8.0]]).astype('float32')
         self.assertRaises(AssertionError,
                           exe.run,
                           compiled_prog,
                           feed={
                               'x': train_data,
                               'y': y_true,
                               'lr': 0.01
                           },
                           fetch_list=[lr, cost])
Ejemplo n.º 19
0
 def test_compiled_program_feed_scalar(self):
     main_program = fluid.Program()
     startup_program = fluid.Program()
     scope = fluid.Scope()
     with fluid.program_guard(main_program, startup_program):
         with fluid.scope_guard(scope):
             lr, cost = self.net()
             cpu = fluid.CPUPlace()
             exe = fluid.Executor(cpu)
             exe.run(startup_program)
             compiled_prog = fluid.CompiledProgram(
                 main_program).with_data_parallel(loss_name=cost.name)
             train_data = numpy.array(
                 [[1.0], [2.0], [3.0], [4.0]]).astype('float32')
             y_true = numpy.array(
                 [[2.0], [4.0], [6.0], [8.0]]).astype('float32')
             a = 0.01
             _lr, _ = exe.run(compiled_prog,
                              feed={'x': train_data,
                                    'y': y_true,
                                    'lr': a},
                              fetch_list=[lr, cost],
                              return_numpy=False)
             self.assertEqual(_lr._dtype(), lr.dtype)
             self.assertEqual(_lr._dtype(), fluid.core.VarDesc.VarType.FP32)
             self.assertEqual(type(a), float)
Ejemplo n.º 20
0
    def _get_gradient(self,
                      input_to_check,
                      place,
                      output_names,
                      no_grad_set,
                      parallel=False):
        prog = Program()
        block = prog.global_block()
        self._append_ops(block)
        loss = append_loss_ops(block, output_names)
        param_grad_list = append_backward(loss=loss,
                                          parameter_list=input_to_check,
                                          no_grad_set=no_grad_set)

        inputs = self._get_inputs(block)
        feed_dict = self.feed_var(inputs, place)

        fetch_list = [g for p, g in param_grad_list]
        if parallel:
            use_cuda = False
            if isinstance(place, fluid.CUDAPlace):
                use_cuda = True
            compiled_prog = fluid.CompiledProgram(prog).with_data_parallel(
                loss_name=loss.name, places=place)
            prog = compiled_prog
        executor = fluid.Executor(place)
        return list(
            map(np.array,
                executor.run(prog, feed_dict, fetch_list, return_numpy=False)))
Ejemplo n.º 21
0
    def train(self, print_steps=5):
        """
        start training.

        Args:
            print_steps: int. Logging frequency of training message, e.g., current step, loss and speed.
        """
        
        iterator = self._train_iterator
        self._distribute_train_prog = fluid.CompiledProgram(self._train_prog).with_data_parallel(loss_name=self._loss_var.name)

        time_begin = time.time()
        for feed in iterator:
            rt_outputs = self.train_one_step(feed)

            task_rt_outputs = {k[len(self.name+'.'):]: v for k,v in rt_outputs.items() if k.startswith(self.name+'.')}
            self._task_head.batch_postprocess(task_rt_outputs)


            if print_steps > 0 and self._cur_train_step % print_steps == 0:
                loss = rt_outputs[self.name+'.loss']
                loss = np.mean(np.squeeze(loss)).tolist()

                time_end = time.time()
                time_cost = time_end - time_begin

                print("step {}/{} (epoch {}), loss: {:.3f}, speed: {:.2f} steps/s".format(
                       (self._cur_train_step-1) % self._steps_pur_epoch + 1 , self._steps_pur_epoch, self._cur_train_epoch,
                       loss, print_steps / time_cost))
                sys.stdout.flush()
                time_begin = time.time() 

            if self._num_epochs is None and not self._multi_task and self._cur_train_step == self._steps_pur_epoch:
                break
Ejemplo n.º 22
0
 def test_prune_compiled_program(self):
     program = framework.Program()
     startup_program = framework.Program()
     scope = fluid.Scope()
     with fluid.scope_guard(scope):
         with fluid.program_guard(program, startup_program):
             (x, y, label, loss1, loss2, w_param_attrs) = self.net1()
             sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.5)
             sgd_optimizer.minimize(loss1)
             exe = fluid.Executor(fluid.CPUPlace())
             exe.run(startup_program)
             compiled_prog = fluid.CompiledProgram(
                 program).with_data_parallel(loss_name=loss1.name,
                                             places=fluid.CPUPlace())
             weight_init = np.array(
                 scope.find_var(w_param_attrs.name).get_tensor())
             x_np = np.random.random(size=(10, 2)).astype('float32')
             label_np = np.random.randint(1, size=(10, 1)).astype('int64')
             res = exe.run(compiled_prog,
                           feed={
                               'x': x_np,
                               'label': label_np
                           },
                           fetch_list=[loss1.name],
                           use_prune=True)
             self.assertIsNotNone(scope.find_var(loss1.name))
             self.assertIsNone(scope.find_var(loss2.name))
             weight = np.array(
                 scope.find_var(w_param_attrs.name).get_tensor())
             self.assertFalse(np.array_equal(weight_init,
                                             weight))  # weight changed
Ejemplo n.º 23
0
    def _build_env(self):
        """
        building the program and strategy for specific running phase.
        """
        if self.env.is_inititalized:
            return

        self._build_env_start_event()
        self.env.is_inititalized = True
        self.env.main_program = clone_program(self._base_main_program,
                                              for_test=False)

        self.env.startup_program = fluid.Program()
        with fluid.program_guard(self.env.main_program,
                                 self._base_startup_program):
            with fluid.unique_name.guard(self.env.UNG):
                self.env.outputs = self._build_net()
                if self.is_train_phase or self.is_test_phase:
                    self.env.labels = self._add_label()
                    self.env.loss = self._add_loss()
                    self.env.metrics = self._add_metrics()

        if self.is_predict_phase or self.is_test_phase:
            self.env.main_program = clone_program(self.env.main_program,
                                                  for_test=True)
            hub.common.paddle_helper.set_op_attr(self.env.main_program,
                                                 is_test=True)

        if self.config.enable_memory_optim:
            for var_name in self.fetch_list:
                var = self.env.main_program.global_block().vars[var_name]
                var.persistable = True

        if self.is_train_phase:
            with fluid.program_guard(self.env.main_program,
                                     self._base_startup_program):
                with fluid.unique_name.guard(self.env.UNG):
                    self.scheduled_lr, self.max_train_steps = self.config.strategy.execute(
                        self.loss, self._base_data_reader, self.config,
                        self.device_count)

        if self.is_train_phase:
            loss_name = self.env.loss.name
        else:
            loss_name = None

        share_vars_from = self._base_compiled_program

        if not self.config.use_data_parallel:
            self.env.main_program_compiled = None
        else:
            self.env.main_program_compiled = fluid.CompiledProgram(
                self.env.main_program).with_data_parallel(
                    loss_name=loss_name,
                    share_vars_from=share_vars_from,
                    build_strategy=self.build_strategy,
                    places=self.places)

        self.exe.run(self.env.startup_program)
        self._build_env_end_event()
Ejemplo n.º 24
0
    def train_one_step(self, batch):

        if not self._dist_train_init:
            self._distribute_train_prog = fluid.CompiledProgram(self._train_prog).with_data_parallel(loss_name=self._loss_var.name)
            self._dist_train_init = True

        exe = self._exe
        distribute_train_prog = self._distribute_train_prog
        fetch_list = self._fetch_list

        if gpu_dev_count > 1:
            feed, mask = batch
            rt_outputs = exe.run(distribute_train_prog, feed=feed, fetch_list=fetch_list)
            num_fakes = decode_fake(len(rt_outputs[0]), mask, self._train_batch_size)
            if num_fakes:
                rt_outputs = [i[:-num_fakes] for i in rt_outputs]
        
        else:
            feed = self._feed_batch_process_fn(batch)
            rt_outputs = exe.run(distribute_train_prog, feed=feed, fetch_list=fetch_list)

        rt_outputs = {k:v for k,v in zip(self._fetch_names, rt_outputs)}
        self._cur_train_step += 1
        self._check_save()
        self._cur_train_epoch = (self._cur_train_step-1) // self._steps_pur_epoch
        return rt_outputs
    def run_main(self, num_workers, places, persistent_workers, use_pe=True):
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            startup_prog, main_prog, image, label, loss = simple_fc_net_static()

            dataset = RandomDataset(SAMPLE_NUM, CLASS_NUM)
            dataloader = DataLoader(
                dataset,
                feed_list=[image, label],
                places=places,
                num_workers=num_workers,
                batch_size=BATCH_SIZE,
                return_list=False,
                drop_last=True,
                persistent_workers=persistent_workers)
            assert len(dataloader) == int(SAMPLE_NUM / BATCH_SIZE)

            exe = fluid.Executor(place=places[0])
            exe.run(startup_prog)

            if use_pe:
                prog = fluid.CompiledProgram(main_prog)
                if len(places) > 1:
                    prog = prog.with_data_parallel(
                        loss_name=loss.name, places=places)
            else:
                prog = main_prog

            step_list = []
            loss_list = []
            start_t = time.time()
            for _ in six.moves.range(EPOCH_NUM):
                step = 0
                for d in dataloader:
                    assert len(d) == len(places), "{} != {}".format(
                        len(d), len(places))
                    for i, item in enumerate(d):
                        image = item['image']
                        label = item['label']
                        assert image.shape() == [BATCH_SIZE, IMAGE_SIZE]
                        assert label.shape() == [BATCH_SIZE, 1]
                        assert image._place()._equals(places[i])
                        assert label._place()._equals(places[i])
                    L, = exe.run(program=prog,
                                 feed=d,
                                 fetch_list=[loss],
                                 use_program_cache=True)
                    loss_list.append(np.mean(L))
                    step += 1
                step_list.append(step)

        end_t = time.time()
        ret = {
            "time": end_t - start_t,
            "step": step_list,
            "loss": np.array(loss_list)
        }
        print("time cost", ret['time'], 'step_list', ret['step'])
        return ret
Ejemplo n.º 26
0
 def compile_program_not_compiled(self):
     with fluid.program_guard(fluid.Program()):
         # build model
         self.build_simple_model()
         # compile program
         program = fluid.default_main_program()
         compiled_program = fluid.CompiledProgram(
             program).with_data_parallel()
         return compiled_program
Ejemplo n.º 27
0
def best_strategy_compiled(args,
                           program,
                           loss,
                           exe,
                           mode="train",
                           share_prog=None):
    """make a program which wrapped by a compiled program
    """

    if os.getenv('FLAGS_use_ngraph'):
        return program
    else:
        build_strategy = fluid.compiler.BuildStrategy()
        try:
            fluid.require_version(min_version='1.7.0')
            build_strategy.fuse_bn_act_ops = args.fuse_bn_act_ops
        except Exception as e:
            logger.info(
                "PaddlePaddle version 1.7.0 or higher is "
                "required when you want to fuse batch_norm and activation_op.")
        build_strategy.fuse_elewise_add_act_ops = args.fuse_elewise_add_act_ops

        try:
            build_strategy.fuse_bn_add_act_ops = args.fuse_bn_add_act_ops
        except Exception as e:
            logger.info(
                "PaddlePaddle 2.0-rc or higher is "
                "required when you want to enable fuse_bn_add_act_ops strategy."
            )
        try:
            build_strategy.enable_addto = args.enable_addto
        except Exception as e:
            logger.info("PaddlePaddle 2.0-rc or higher is "
                        "required when you want to enable addto strategy.")

        exec_strategy = fluid.ExecutionStrategy()

        if args.use_gpu:
            exec_strategy.num_threads = fluid.core.get_cuda_device_count()

        exec_strategy.num_iteration_per_drop_scope = 10000 if args.use_pure_fp16 else 10

        num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
        if num_trainers > 1 and args.use_gpu:
            dist_utils.prepare_for_multi_process(exe, build_strategy, program)
            # NOTE: the process is fast when num_threads is 1
            # for multi-process training.
            exec_strategy.num_threads = 1

        compiled_program = fluid.CompiledProgram(program).with_data_parallel(
            loss_name=loss.name if mode == "train" else None,
            share_vars_from=share_prog if mode == "val" else None,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)

        return compiled_program
Ejemplo n.º 28
0
    def test_get_valid_program_error(self):
        # case 1: CompiledProgram no program
        graph = core.Graph(core.ProgramDesc())
        compiled_program = fluid.CompiledProgram(graph)
        with self.assertRaises(TypeError):
            fluid.io._get_valid_program(compiled_program)

        # case 2: main_program type error
        with self.assertRaises(TypeError):
            fluid.io._get_valid_program("program")
Ejemplo n.º 29
0
def create_multi_devices_program(program, loss_var_name):
    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = True
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_iteration_per_drop_scope = 1
    compile_program = fluid.CompiledProgram(program).with_data_parallel(
        loss_name=loss_var_name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)
    return compile_program
 def calc_sub_out(self, place=None, parallel=None):
     x = fluid.layers.ones(shape=[2, 2], dtype='float32')
     y = fluid.layers.ones(shape=[2, 2], dtype='float32')
     out = fluid.layers.elementwise_sub(x=x, y=y)
     program = fluid.default_main_program()
     if parallel:
         program = fluid.CompiledProgram(program).with_data_parallel(
             places=place)
     exe = fluid.Executor(place)
     out = exe.run(program, fetch_list=[out], return_numpy=False)
     return out