Example #1
0
    def save_persistables(self, executor, dirname, main_program=None):
        """
        This function filters out all variables with `persistable==True` from the
        give `main_program` and then saves these variables to the folder `dirname`
        or file `filename`.

        The `dirname` is used to specify the folder where persistable variables
        are going to be saved. If you would like to save variables in separate
        files, set `filename` None; if you would like to save all variables in a
        single file, use `filename` to specify the file name.
        """
        if isinstance(executor, ParallelExecutor):
            raise TypeError(
                "in fleet.save_persistables() function, executor must be as Executor type, ParallelExecutor is not allowed"
            )

        if not isinstance(executor, Executor):
            raise TypeError(
                "in fleet.save_persistables() function, executor must be as Executor type"
            )

        if main_program is None:
            main_program = self.main_program

        if isinstance(main_program, CompiledProgram):
            raise TypeError(
                "in fleet.save_persistables() function, main_program must be as Program type, CompiledProgram is not allowed"
            )

        if not main_program._is_distributed:
            raise ValueError(
                "main_program is for local, may not use fleet.save_persistables"
            )

        io.save_persistables(executor, dirname, main_program, None)
Example #2
0
def save_train_snapshot(executor, program, file_name="", train_info={}):
    name = time.strftime('%Y-%m-%d_%H-%M-%S', time.localtime(time.time()))
    file_name = file_name + "_" + name
    file_path = file_utils.get_fullurl("model", file_name, "dir")
    file_utils.save_file(content=train_info, file_type="model", file_name=file_name, file_format="json")
    io.save_persistables(executor=executor, dirname=file_path, main_program=program)
    return file_path
Example #3
0
    def save_persistables(self,
                          executor,
                          dirname,
                          main_program=None,
                          filename=None):
        """
        This function filters out all variables with `persistable==True` from
        the give `main_program` and then saves these variables to the folder
        `dirname` or file `filename`.

        The `dirname` is used to specify the folder where persistable variables
        are going to be saved. If you would like to save variables in separate
        files, set `filename` None; if you would like to save all variables in a
        single file, use `filename` to specify the file name.
        """
        assert isinstance(executor, Executor), \
            "In fleet.save_inference_model() function, executor must be as" \
            " Executor type."

        if main_program is None:
            main_program = self._origin_program

        assert isinstance(main_program, Program), \
            "In fleet.save_inference_model() function, main_program " \
            "must be as Program type."

        io.save_persistables(executor,
                             dirname,
                             main_program,
                             filename=filename)
Example #4
0
    def save_persistables(self, executor, dirname, main_program=None):
        """
        This function filters out all variables with `persistable==True` from the
        give `main_program` and then saves these variables to the folder `dirname`
        or file `filename`.

        The `dirname` is used to specify the folder where persistable variables
        are going to be saved. If you would like to save variables in separate
        files, set `filename` None; if you would like to save all variables in a
        single file, use `filename` to specify the file name.
        """
        io.save_persistables(executor, dirname, main_program, None)
Example #5
0
    def test_fit_line_inference_model(self):
        MODEL_DIR = "./tmp/inference_model"
        UNI_MODEL_DIR = "./tmp/inference_model1"

        init_program = Program()
        program = Program()

        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='float32')

            y_predict = layers.fc(input=x, size=1, act=None)

            cost = layers.square_error_cost(input=y_predict, label=y)
            avg_cost = layers.mean(cost)

            sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
            sgd_optimizer.minimize(avg_cost, init_program)

        place = core.CPUPlace()
        exe = executor.Executor(place)

        exe.run(init_program, feed={}, fetch_list=[])

        for i in six.moves.xrange(100):
            tensor_x = np.array([[1, 1], [1, 2], [3, 4],
                                 [5, 2]]).astype("float32")
            tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32")

            exe.run(program,
                    feed={
                        'x': tensor_x,
                        'y': tensor_y
                    },
                    fetch_list=[avg_cost])

        # Separated model and unified model
        save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program)
        save_inference_model(UNI_MODEL_DIR, ["x", "y"], [avg_cost], exe,
                             program, 'model', 'params')
        main_program = program.clone()._prune_with_input(
            feeded_var_names=["x", "y"], targets=[avg_cost])
        params_str = save_persistables(exe, None, main_program, None)

        expected = exe.run(program,
                           feed={
                               'x': tensor_x,
                               'y': tensor_y
                           },
                           fetch_list=[avg_cost])[0]

        six.moves.reload_module(executor)  # reload to build a new scope

        model_0 = InferModel(load_inference_model(MODEL_DIR, exe))
        with open(os.path.join(UNI_MODEL_DIR, 'model'), "rb") as f:
            model_str = f.read()
        model_1 = InferModel(
            load_inference_model(None, exe, model_str, params_str))

        for model in [model_0, model_1]:
            outs = exe.run(model.program,
                           feed={
                               model.feed_var_names[0]: tensor_x,
                               model.feed_var_names[1]: tensor_y
                           },
                           fetch_list=model.fetch_vars)
            actual = outs[0]

            self.assertEqual(model.feed_var_names, ["x", "y"])
            self.assertEqual(len(model.fetch_vars), 1)
            print("fetch %s" % str(model.fetch_vars[0]))
            self.assertEqual(expected, actual)

        self.assertRaises(ValueError, fluid.io.load_inference_model, None, exe,
                          model_str, None)
Example #6
0
    def run_trainer(self, args):
        test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
            self.get_model(batch_size=2)

        if args.update_method == "pserver":
            t = self.get_transpiler(args.trainer_id,
                                    fluid.default_main_program(),
                                    args.endpoints, args.trainers,
                                    args.sync_mode)

            trainer_prog = t.get_trainer_program()
        else:
            trainer_prog = fluid.default_main_program()

        if args.use_cuda:
            place = fluid.CUDAPlace(0)
        else:
            place = fluid.CPUPlace()

        startup_exe = fluid.Executor(place)
        startup_exe.run(fluid.default_startup_program())

        strategy = fluid.ExecutionStrategy()
        strategy.num_threads = 1

        build_stra = fluid.BuildStrategy()

        if args.use_reduce:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
        else:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce

        exe = fluid.ParallelExecutor(args.use_cuda,
                                     loss_name=avg_cost.name,
                                     exec_strategy=strategy,
                                     build_strategy=build_stra)

        feed_var_list = [
            var for var in trainer_prog.global_block().vars.values()
            if var.is_data
        ]

        feeder = fluid.DataFeeder(feed_var_list, place)
        reader_generator = train_reader()

        def get_data():
            origin_batch = next(reader_generator)
            if args.update_method == "pserver" and args.use_reader_alloc:
                new_batch = []
                for offset, item in enumerate(origin_batch):
                    if offset % 2 == args.trainer_id:
                        new_batch.append(item)
                return new_batch
            else:
                return origin_batch

        need_save = bool(int(os.getenv("SAVE", "0")))
        model_dir = os.getenv("MODEL_DIR", "")
        save_mode = os.getenv("SAVE_MODE", "")

        if save_mode == "LOCAL":
            if need_save:
                for _ in six.moves.xrange(RUN_STEP):
                    loss, = exe.run(fetch_list=[avg_cost.name],
                                    feed=feeder.feed(get_data()))
                if need_save and model_dir:
                    io.save_persistables(startup_exe, model_dir, trainer_prog)

            var = np.array(
                fluid.global_scope().find_var('__fc_b__').get_tensor())
            if six.PY2:
                print(pickle.dumps(np.ravel(var).tolist()))
            else:
                sys.stdout.buffer.write(pickle.dumps(np.ravel(var).tolist()))

        elif save_mode == "DIST":
            skip_steps = int(os.getenv("SKIP_STEPS"))
            loss = None
            if need_save:
                for idx in six.moves.xrange(8):
                    loss, = exe.run(fetch_list=[avg_cost.name],
                                    feed=feeder.feed(get_data()))
                    if need_save and model_dir and idx == skip_steps and args.trainer_id == 0:
                        io.save_persistables(startup_exe, model_dir,
                                             trainer_prog)
            else:
                for idx in six.moves.xrange(8):
                    data = get_data()
                    if idx <= skip_steps:
                        continue
                    loss, = exe.run(fetch_list=[avg_cost.name],
                                    feed=feeder.feed(data))
            if six.PY2:
                print(pickle.dumps(loss.tolist()))
            else:
                sys.stdout.buffer.write(pickle.dumps(loss.tolist()))
        else:
            raise Exception("save_mode must be LOCAL or DIST")
Example #7
0
 def save_persistables(self, executor, dirname, main_program=None):
     io.save_persistables(executor, dirname, main_program, None)
Example #8
0
    def run_trainer(self, args):
        test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
            self.get_model(batch_size=2)

        if args.mem_opt:
            fluid.memory_optimize(fluid.default_main_program(),
                                  skip_grads=True)
        if args.is_dist:
            t = self.get_transpiler(args.trainer_id,
                                    fluid.default_main_program(),
                                    args.endpoints, args.trainers,
                                    args.sync_mode)

            trainer_prog = t.get_trainer_program()
        else:
            trainer_prog = fluid.default_main_program()

        if args.use_cuda:
            place = fluid.CUDAPlace(0)
        else:
            place = fluid.CPUPlace()

        startup_exe = fluid.Executor(place)
        startup_exe.run(fluid.default_startup_program())

        strategy = fluid.ExecutionStrategy()
        strategy.num_threads = 1
        strategy.allow_op_delay = False

        build_stra = fluid.BuildStrategy()

        if args.use_reduce:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
        else:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce

        exe = fluid.ParallelExecutor(args.use_cuda,
                                     loss_name=avg_cost.name,
                                     exec_strategy=strategy,
                                     build_strategy=build_stra)

        feed_var_list = [
            var for var in trainer_prog.global_block().vars.values()
            if var.is_data
        ]

        feeder = fluid.DataFeeder(feed_var_list, place)
        reader_generator = train_reader()

        def get_data():
            origin_batch = next(reader_generator)
            if args.is_dist and args.use_reader_alloc:
                new_batch = []
                for offset, item in enumerate(origin_batch):
                    if offset % 2 == args.trainer_id:
                        new_batch.append(item)
                return new_batch
            else:
                return origin_batch

        need_save = bool(int(os.getenv("SAVE", "0")))
        model_dir = os.getenv("MODEL_DIR", "")

        if need_save:
            for _ in six.moves.xrange(RUN_STEP):
                loss, = exe.run(fetch_list=[avg_cost.name],
                                feed=feeder.feed(get_data()))
            if need_save and model_dir:
                io.save_persistables(startup_exe, model_dir, trainer_prog)

        var = np.array(fluid.global_scope().find_var('__fc_b__').get_tensor())
        if six.PY2:
            print(pickle.dumps(np.ravel(var).tolist()))
        else:
            sys.stdout.buffer.write(pickle.dumps(np.ravel(var).tolist()))