コード例 #1
0
    def test_pslib_1(self):
        """Test cases for pslib."""
        import paddle.fluid as fluid
        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
        from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib
        from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker

        os.environ["POD_IP"] = "127.0.0.1"
        os.environ["PADDLE_PORT"] = "36001"
        os.environ["TRAINING_ROLE"] = "TRAINER"
        os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001"
        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002"
        os.environ["PADDLE_TRAINER_ID"] = "0"
        role_maker = GeneralRoleMaker(
            init_timeout_seconds=100,
            run_timeout_seconds=100,
            http_ip_port="127.0.0.1:36003")
        #role_maker.generate_role()
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        #fleet.init(role_maker)
        train_program = fluid.Program()
        startup_program = fluid.Program()
        scope = fluid.Scope()
        with fluid.program_guard(train_program, startup_program):
            show = fluid.layers.data(name="show", shape=[-1, 1], \
                dtype="float32", lod_level=1, append_batch_size=False)
            fc = fluid.layers.fc(input=show, size=1, act=None)
            label = fluid.layers.data(name="click", shape=[-1, 1], \
                dtype="int64", lod_level=1, append_batch_size=False)
            label_cast = fluid.layers.cast(label, dtype='float32')
            cost = fluid.layers.log_loss(fc, label_cast)
        try:
            adam = fluid.optimizer.Adam(learning_rate=0.000005)
            adam = fleet.distributed_optimizer(adam)
            adam.minimize([cost], [scope])
            fleet.run_server()
            http_server_d = {}
            http_server_d["running"] = False
            size_d = {}
            role_maker._GeneralRoleMaker__start_kv_server(http_server_d, size_d)
        except:
            print("do not support pslib test, skip")
            return

        from paddle.fluid.incubate.fleet.base.role_maker import MockBarrier
        mb = MockBarrier()
        mb.barrier()
        mb.barrier_all()
        mb.all_reduce(1)
        mb.all_gather(1)
        os.environ["POD_IP"] = "127.0.0.1"
        os.environ["PADDLE_PORT"] = "36005"
        os.environ["TRAINING_ROLE"] = "TRAINER"
        os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36005"
        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36006"
        os.environ["PADDLE_IS_BARRIER_ALL_ROLE"] = "0"
        role_maker = GeneralRoleMaker(path="test_mock1")
        role_maker.generate_role()
コード例 #2
0
    def test_pslib_1(self):
        """Test cases for pslib."""
        import paddle.fluid as fluid
        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
        from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib
        from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
        try:
            import netifaces
        except:
            print("warning: no netifaces, skip test_pslib_1")
            return
        os.environ["POD_IP"] = "127.0.0.1"
        os.environ["PADDLE_PORT"] = "36001"
        os.environ["TRAINING_ROLE"] = "TRAINER"
        os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001"
        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002"
        os.environ["PADDLE_TRAINER_ID"] = "0"
        role_maker = GeneralRoleMaker()
        role_maker.generate_role()
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        fleet.init(role_maker)
        train_program = fluid.Program()
        startup_program = fluid.Program()
        scope = fluid.Scope()
        with fluid.program_guard(train_program, startup_program):
            show = fluid.layers.data(name="show", shape=[-1, 1], \
                dtype="int64", lod_level=1, append_batch_size=False)
            emb = fluid.layers.embedding(input=show, size=[1, 1], \
                is_sparse=True, is_distributed=True, \
                param_attr=fluid.ParamAttr(name="embedding"))
            fc = fluid.layers.fc(input=emb, size=1, act=None)
            label = fluid.layers.data(name="click", shape=[-1, 1], \
                dtype="int64", lod_level=1, append_batch_size=False)
            label_cast = fluid.layers.cast(label, dtype='float32')
            cost = fluid.layers.log_loss(fc, label_cast)

        strategy = {}
        strategy["embedding"] = {}
        strategy["embedding"]["sparse_accessor_class"] = "DownpourUnitAccessor"
        strategy["embedding"]["embed_sparse_optimizer"] = "naive"
        try:
            adam1 = fluid.optimizer.Adam(learning_rate=0.000005)
            adam1 = fleet.distributed_optimizer(adam1, strategy=strategy)
            adam1.minimize([cost], [scope])

            strategy["embedding"]["embed_sparse_optimizer"] = "adagrad"
            adam2 = fluid.optimizer.Adam(learning_rate=0.000005)
            adam2 = fleet.distributed_optimizer(adam2, strategy=strategy)
            adam2.minimize([cost], [scope])

            strategy["embedding"]["embed_sparse_optimizer"] = "adam"
            adam3 = fluid.optimizer.Adam(learning_rate=0.000005)
            adam3 = fleet.distributed_optimizer(adam3, strategy=strategy)
            adam3.minimize([cost], [scope])
        except:
            print("do not support pslib test, skip")
            return
コード例 #3
0
ファイル: test_fleet.py プロジェクト: wuhuachaocoding/Paddle
    def test_pslib_1(self):
        """Test cases for pslib."""
        import paddle.fluid as fluid
        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
        from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib
        from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker

        os.environ["POD_IP"] = "127.0.0.1"
        os.environ["PADDLE_PORT"] = "36001"
        os.environ["TRAINING_ROLE"] = "TRAINER"
        os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001"
        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002"
        os.environ["PADDLE_TRAINER_ID"] = "0"
        role_maker = GeneralRoleMaker()
        #role_maker.generate_role()
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        #fleet.init(role_maker)
        train_program = fluid.Program()
        startup_program = fluid.Program()
        scope = fluid.Scope()
        with fluid.program_guard(train_program, startup_program):
            show = fluid.layers.data(name="show", shape=[-1, 1], \
                dtype="int64", lod_level=1, append_batch_size=False)
            emb = fluid.layers.embedding(input=show, size=[1, 1], \
                is_sparse=True, is_distributed=True, \
                param_attr=fluid.ParamAttr(name="embedding"))
            bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
            bow = fluid.layers.data_norm(input=bow, epsilon=1e-4, name="norm")
            fc = fluid.layers.fc(input=bow, size=1, act=None)
            label = fluid.layers.data(name="click", shape=[-1, 1], \
                dtype="int64", lod_level=1, append_batch_size=False)
            label_cast = fluid.layers.cast(label, dtype='float32')
            cost = fluid.layers.log_loss(fc, label_cast)
        try:
            adam = fluid.optimizer.Adam(learning_rate=0.000005)
            adam = fleet.distributed_optimizer(
                adam,
                strategy={
                    "embedding": {
                        "sparse_accessor_class": "DownpourSparseValueAccessor"
                    }
                })
            adam.minimize([cost], [scope])
            fleet.run_server()
        except:
            print("do not support pslib test, skip")
            return
        try:
            # worker should call these methods instead of server
            # the following is only for test when with_pslib=off
            def test_func():
                """
                it is only a test function
                """
                return True

            fleet._role_maker.is_first_worker = test_func
            fleet._role_maker._barrier_worker = test_func
            fleet.save_model("./model_000")
            fleet.save_one_table(0, "./model_001")
            fleet.save_one_table(0, "./model_002", prefix="hahaha")
            fleet.load_model("./model_0003")
            fleet.load_one_table(0, "./model_004")
        except:
            print("do not support pslib test, skip")
            return
コード例 #4
0
 def test_pslib_1(self):
     """Test cases for pslib."""
     import paddle.fluid as fluid
     from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
     from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib
     from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
     try:
         import netifaces
     except:
         print("warning: no netifaces, skip test_pslib_1")
         return
     os.environ["POD_IP"] = "127.0.0.1"
     os.environ["PADDLE_PORT"] = "36001"
     os.environ["TRAINING_ROLE"] = "TRAINER"
     os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001"
     os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002"
     os.environ["PADDLE_TRAINER_ID"] = "0"
     role_maker = GeneralRoleMaker()
     role_maker.generate_role()
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     fleet.init(role_maker)
     train_program = fluid.Program()
     startup_program = fluid.Program()
     scope = fluid.Scope()
     with fluid.program_guard(train_program, startup_program):
         show = fluid.layers.data(name="show", shape=[-1, 1], \
             dtype="float32", lod_level=1, append_batch_size=False)
         fc = fluid.layers.fc(input=show, size=1, act=None)
         label = fluid.layers.data(name="click", shape=[-1, 1], \
             dtype="int64", lod_level=1, append_batch_size=False)
         label_cast = fluid.layers.cast(label, dtype='float32')
         cost = fluid.layers.log_loss(fc, label_cast)
     try:
         adam = fluid.optimizer.Adam(learning_rate=0.000005)
         adam = fleet.distributed_optimizer(adam)
         adam.minimize([cost], [scope])
         fleet.run_server()
     except:
         print("do not support pslib test, skip")
         return
     fleet.clear_one_table(0)
     from paddle.fluid.incubate.fleet.base.role_maker import \
         MPISymetricRoleMaker
     try:
         role = MPISymetricRoleMaker()
         role._all_reduce([1], [2])
     except:
         print("catch expected error of not inited")
     try:
         role = MPISymetricRoleMaker()
         role._all_reduce([1], [2], "min")
     except:
         print("catch expected error of not inited")
     try:
         role = MPISymetricRoleMaker()
         role._all_reduce([1], [2], "max")
     except:
         print("catch expected error of not inited")
     try:
         role = MPISymetricRoleMaker()
         role._all_reduce([1], [2], "unknown")
     except:
         print("catch expected error of unknown type")
コード例 #5
0
    def test_pslib_2(self):
        """Test cases for pslib."""
        import paddle.fluid as fluid
        from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
        from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
        from paddle.fluid.incubate.fleet.base.role_maker import RoleMakerBase

        os.environ["POD_IP"] = "127.0.0.1"
        os.environ["PADDLE_PORT"] = "36001"
        os.environ["TRAINING_ROLE"] = "TRAINER"
        os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001"
        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002"
        os.environ["PADDLE_TRAINER_ID"] = "0"
        os.environ["PADDLE_TRAINERS_NUM"] = "1"
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        try:
            fleet.init(None)
        except:
            print("no mpi4py, skip test_pslib_2")
            return
        train_program = fluid.Program()
        startup_program = fluid.Program()
        scope = fluid.Scope()
        with fluid.program_guard(train_program, startup_program):
            show = fluid.layers.data(name="show", shape=[-1, 1], \
                                     dtype="float32", lod_level=1, append_batch_size=False)
            fc = fluid.layers.fc(input=show, size=1, act=None)
            label = fluid.layers.data(name="click", shape=[-1, 1], \
                                      dtype="int64", lod_level=1, append_batch_size=False)
            label_cast = fluid.layers.cast(label, dtype='float32')
            cost = fluid.layers.log_loss(fc, label_cast)
        try:
            adam = fluid.optimizer.Adam(learning_rate=0.000005)
            adam = fleet.distributed_optimizer(adam)
            adam.minimize([cost], [scope])
            fleet.run_server()
        except:
            print("do not support pslib test, skip")
            return
        os.environ["TRAINING_ROLE"] = "wrong"
        try:
            role1 = GeneralRoleMaker(path="./test_gloo_1")
            role1.generate_role()
        except:
            print("catch expected error of wrong TRAINING_ROLE")
        os.environ["TRAINING_ROLE"] = "PSERVER"
        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001"
        role2 = GeneralRoleMaker(path="./test_gloo_2")
        role2._finalize()
        role2._all_gather(1)
        role2._all_gather(1)
        role2._barrier_server()
        role2._all_gather(1)
        role3 = GeneralRoleMaker(path="./test_gloo_3")
        role3._worker_gather(1)
        role3._worker_gather(1)
        os.environ["TRAINING_ROLE"] = "TRAINER"
        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002"
        role4 = GeneralRoleMaker(path="./test_gloo_4")
        role4._worker_gather(1)
        role4._get_rank()
        role4._get_size()
        role4._all_comm.init()
        role5 = GeneralRoleMaker(path="./test_gloo_5")
        role5.get_local_endpoint()
        role5.get_local_endpoint()
        role6 = GeneralRoleMaker(path="./test_gloo_6")
        role6.get_trainer_endpoints()
        role6.get_trainer_endpoints()
        role7 = GeneralRoleMaker(path="./test_gloo_7")
        role7.get_pserver_endpoints()
        role7.get_pserver_endpoints()
        role8 = GeneralRoleMaker(path="./test_gloo_8")
        role8.is_worker()
        role8.is_worker()
        role9 = GeneralRoleMaker(path="./test_gloo_9")
        role9.is_server()
        role9.is_server()
        role10 = GeneralRoleMaker(path="./test_gloo_10")
        role10.is_first_worker()
        role10.is_first_worker()
        role11 = GeneralRoleMaker(path="./test_gloo_11")
        role11.worker_index()
        role11.worker_index()
        role12 = GeneralRoleMaker(path="./test_gloo_12")
        role12.server_index()
        role12.server_index()
        role13 = GeneralRoleMaker(path="./test_gloo_13")
        role13.worker_num()
        role13.worker_num()
        role14 = GeneralRoleMaker(path="./test_gloo_14")
        role14.server_num()
        role14.server_num()
        role15 = GeneralRoleMaker(path="./test_gloo_15")
        role15._barrier_worker()
        role15._barrier_worker()
        role16 = GeneralRoleMaker(path="./test_gloo_16")
        role16._barrier_all()
        role16._barrier_all()
        role17 = GeneralRoleMaker(path="./test_gloo_17")
        role17._barrier_server()
        role17._barrier_server()
        role18 = GeneralRoleMaker(path="./test_gloo_18")
        role18._worker_num()
        role18._worker_num()
        role19 = GeneralRoleMaker(path="./test_gloo_19")
        role19._server_num()
        role19._server_num()
        role20 = GeneralRoleMaker(path="./test_gloo_20")
        a = [1]
        b = [0]
        role20._all_reduce(a, b)
        role21 = GeneralRoleMaker(path="./test_gloo_21")
        role21.all_reduce_worker([], [])
        role21.all_reduce_worker([], [])
        role21.barrier_worker()
        role21.barrier_all()
        role22 = GeneralRoleMaker(path="./test_gloo_22")
        role22._get_rank()
        role22._get_rank()
        os.environ["PADDLE_PSERVER_ID"] = "0"
        role23 = GeneralRoleMaker(path="./test_gloo_23")
        role23._get_size()
        role23._get_size()
        with open("test_fleet_gloo_role_maker_1.txt", "w") as f:
            data = "1 1 1 1\n"
            f.write(data)

        dataset = paddle.distributed.InMemoryDataset()
        dataset.set_filelist(["test_fleet_gloo_role_maker_1.txt"])
        dataset._set_use_var([show, label])
        dataset.load_into_memory()
        dataset.get_memory_data_size(fleet)
        dataset.get_shuffle_data_size(fleet)
        os.remove("./test_fleet_gloo_role_maker_1.txt")

        class TmpClass():
            """
            dummy tmp class
            """
            def __init__(self):
                pass

            def all_reduce_worker(self, input, output):
                """
                dummy all reduce worker

                Args:
                    input(None): fake input
                    output(None): fale output
                """
                pass

            def barrier_worker(self):
                """
                dummy barrier worker
                """
                pass

        from paddle.fluid.incubate.fleet.base.fleet_base import Fleet

        class TmpFleet(Fleet):
            """
            dummy tmp fleet
            """
            def __init__(self):
                super(TmpFleet, self).__init__()
                self._role_maker = None

            def init_worker(self):
                """
                dummy init worker
                """
                pass

            def init_server(self, model_dir=None):
                """
                dummy init server

                Args:
                    model_dir(None): fake model_dir
                """
                pass

            def run_server(self):
                """
                dummy run server
                """
                pass

            def stop_worker(self):
                """
                dummy stop worker
                """
                pass

            def distributed_optimizer(self, optimizer, strategy=None):
                """
                dummy distributed optimizer

                Args:
                    optimizer(None): fake optimizer
                    strategy(None): fake strategy
                """
                pass

            def save_inference_model(self):
                """
                dummy save inference model
                """
                pass

            def save_persistables(self):
                """
                dummy save persistables
                """
                pass

        os.environ["TRAINING_ROLE"] = "TRAINER"
        tmp = TmpFleet()
        tmp._role_maker = TmpClass()
        tmp.all_reduce_worker([], [])
        tmp.barrier_worker()
        from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
        tmp = RoleMakerBase()
        tmp.all_gather(1)
        tmp.all_reduce_worker([], [])
        tmp.barrier_worker()
        tmp.barrier_all()
        from paddle.fluid.incubate.fleet.base.role_maker import \
            MPISymetricRoleMaker
        tmp1 = MPISymetricRoleMaker()
        tmp1.all_gather(1)
        tmp1.all_gather(1)
        tmp2 = MPISymetricRoleMaker()
        tmp2.all_reduce_worker([], [])
        tmp3 = MPISymetricRoleMaker()
        tmp3.barrier_worker()
        tmp3.barrier_worker()
        tmp4 = MPISymetricRoleMaker()
        tmp4.barrier_all()
        tmp4.barrier_all()
コード例 #6
0
ファイル: test_fleet_1.py プロジェクト: iducn/Paddle
    def test_pslib_1(self):
        """Test cases for pslib."""
        import paddle.fluid as fluid
        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
        from paddle.fluid.incubate.fleet.parameter_server.pslib import \
            fleet_embedding, _prepare_params, _fleet_embedding, \
            _fleet_embedding_v2, FLEET_GLOBAL_DICT
        from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
        try:
            import netifaces
        except:
            print("warning: no netifaces, skip test_pslib_1")
            return
        os.environ["POD_IP"] = "127.0.0.1"
        os.environ["PADDLE_PORT"] = "36001"
        os.environ["TRAINING_ROLE"] = "TRAINER"
        os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001"
        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002"
        os.environ["PADDLE_TRAINER_ID"] = "0"
        role_maker = GeneralRoleMaker()
        role_maker.generate_role()
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        fleet.init(role_maker)
        train_program = fluid.Program()
        startup_program = fluid.Program()
        scope = fluid.Scope()
        global FLEET_GLOBAL_DICT
        with fluid.program_guard(train_program, startup_program):
            show = fluid.layers.data(name="show", shape=[-1, 1], \
                dtype="int64", lod_level=1, append_batch_size=False)
            click = fluid.layers.data(name="click", shape=[-1, 1], \
                dtype="int64", lod_level=1, append_batch_size=False)
            with fleet_embedding(click_name=click.name):
                emb = fluid.layers.embedding(input=show, size=[1, 1], \
                    is_sparse=True, is_distributed=True, \
                    param_attr=fluid.ParamAttr(name="embedding"))
            emb = fluid.layers.data_norm(input=emb,
                                         name="a",
                                         epsilon=1e-4,
                                         param_attr={
                                             "batch_size": 1e4,
                                             "batch_sum_default": 0.0,
                                             "batch_square": 1e4
                                         })
            fc = fluid.layers.fc(input=emb, size=1, act=None)
            label = fluid.layers.data(name="click", shape=[-1, 1], \
                dtype="int64", lod_level=1, append_batch_size=False)
            label_cast = fluid.layers.cast(label, dtype='float32')
            cost = fluid.layers.log_loss(fc, label_cast)
        try:
            adam = fluid.optimizer.Adam(learning_rate=0.000005)
            adam = fleet.distributed_optimizer(
                adam,
                strategy={
                    "embedding": {
                        "sparse_accessor_class": "DownpourSparseValueAccessor"
                    }
                })
            adam.minimize([cost], [scope])
        except:
            print("do not support pslib test, skip")
            return
        FLEET_GLOBAL_DICT["cur_accessor"] = "DownpourCtrAccessor"
        try:
            _prepare_params(input=show, size=[1, 1])
        except:
            print("catch expected exception of param_attr=None")
        try:
            _prepare_params(input=show,
                            size=[1, 1],
                            param_attr=fluid.ParamAttr())
        except:
            print("catch expected exception of name=None")
        try:
            tmp = fluid.ParamAttr(name="embedding")
            _prepare_params(input=show, size=1, param_attr=tmp)
        except:
            print("catch expected exception of size not list")
        try:
            tmp = fluid.ParamAttr(name="embedding")
            _prepare_params(input=show, size=[-1, 12], param_attr=tmp)
        except:
            print("catch expected exception of size not equal")
        try:
            tmp = fluid.ParamAttr(name="embedding")
            _prepare_params(input=show,
                            size=[-1, 1],
                            param_attr=tmp,
                            is_sparse=False)
        except:
            print("catch expected exception of is_sparse=False")
        try:
            tmp = fluid.ParamAttr(name="embedding")
            _prepare_params(input=show, size=[-1, 1], param_attr=tmp, \
                            is_sparse=True, is_distributed=False)
        except:
            print("catch expected exception of is_distributed=False")
        try:
            _prepare_params(input=show, size=[-1, 1], \
                            param_attr=fluid.ParamAttr(name="embedding"), \
                            is_sparse=True, is_distributed=True, dtype="abc")
        except:
            print("catch expected exception of unknown dtype")
        try:
            FLEET_GLOBAL_DICT["emb_to_accessor"]["embedding"] = "unknown"
            tmp = fluid.ParamAttr(name="embedding")
            _prepare_params(input=show, size=[-1, 1], param_attr=tmp)
        except:
            print("catch expected exception of unknown accessor")
        FLEET_GLOBAL_DICT["cur_accessor"] = "DownpourCtrAccessor"
        try:
            _fleet_embedding(input=show, size=[-1, 1], is_sparse=True, \
                             is_distributed=True, dtype="float32", \
                             param_attr=fluid.ParamAttr(name="embedding"))
        except:
            print("catch expected exception of unknown accessor")
        try:
            _fleet_embedding_v2(input=show, size=[-1, 1], is_sparse=True, \
                                is_distributed=True, dtype="float32", \
                                param_attr=fluid.ParamAttr(name="embedding"))
        except:
            print("catch expected exception of unknown accessor")

        adam1 = fluid.optimizer.Adam(learning_rate=0.000005)
        adam1 = fleet.distributed_optimizer(
            adam1,
            strategy={
                "embedding": {
                    "sparse_accessor_class": "DownpourSparseValueAccessor"
                }
            })
        try:
            pre = FLEET_GLOBAL_DICT["emb_to_table"]
            FLEET_GLOBAL_DICT["emb_to_table"] = {}
            adam1.minimize([cost], [scope])
        except:
            FLEET_GLOBAL_DICT["emb_to_table"] = pre
            print("catch expected exception of empty emb_to_table")
        try:
            pre = FLEET_GLOBAL_DICT["emb_to_table"]
            FLEET_GLOBAL_DICT["emb_to_table"] = {}
            FLEET_GLOBAL_DICT["emb_to_table"]["emb1"] = 0
            adam1.minimize([cost], [scope])
        except:
            FLEET_GLOBAL_DICT["emb_to_table"] = pre
            print("catch expected exception of error emb_to_table")
        try:
            adam2 = fluid.optimizer.Adam(learning_rate=0.000005)
            adam2 = fleet.distributed_optimizer(adam2)
            adam2.supported_embedding_types = []
            adam2.minimize([cost], [scope])
        except:
            print("catch expected exception of embedding_types")
        try:
            adam3 = fluid.optimizer.Adam(learning_rate=0.000005)
            adam3 = fleet.distributed_optimizer(
                adam3,
                strategy={
                    "embedding": {
                        "sparse_accessor_class": "DownpourSparseValueAccessor",
                        "sparse_embedx_dim": 999
                    }
                })
            adam3.minimize([cost], [scope])
        except:
            print("catch expected exception of embedx_dim error")

        try:
            adam4 = fluid.optimizer.Adam(learning_rate=0.000005)
            adam4 = fleet.distributed_optimizer(
                adam4,
                strategy={
                    "embedding": {
                        "sparse_accessor_class": "DownpourCtrAccessor",
                        "sparse_embedx_dim": 999
                    }
                })
            adam4.minimize([cost], [scope])
        except:
            print("catch expected exception of embedx_dim error")
        train_program1 = fluid.Program()
        startup_program1 = fluid.Program()
        FLEET_GLOBAL_DICT["emb_to_accessor"] = {}
        with fluid.program_guard(train_program1, startup_program1):
            show = fluid.layers.data(name="show", shape=[-1, 1], \
                dtype="int64", lod_level=1, append_batch_size=False)
            with fleet_embedding(click_name=click.name):
                emb = fluid.layers.embedding(input=show, size=[1, 1], \
                    is_sparse=True, is_distributed=True, \
                    param_attr=fluid.ParamAttr(name="embedding"))
            with fleet_embedding(click_name=click.name):
                emb1 = fluid.embedding(input=show, size=[1, 1], \
                    is_sparse=True, is_distributed=True, \
                    param_attr=fluid.ParamAttr(name="embedding"))
コード例 #7
0
    def init(self, context):
        """R
        """
        role_maker = None
        if self.global_config.get('process_mode', 'mpi') == 'brilliant_cpu':
            afs_config = self.global_config['io']['afs']
            role_maker = GeneralRoleMaker(
                hdfs_name=afs_config['fs_name'],
                hdfs_ugi=afs_config['fs_ugi'],
                path=self.global_config['output_path'] + "/gloo",
                init_timeout_seconds=1200,
                run_timeout_seconds=1200)
        fleet.init(role_maker)
        data_var_list = []
        data_var_name_dict = {}
        runnnable_scope = []
        runnnable_cost_op = []
        context['status'] = 'startup'

        for executor in self.global_config['executor']:
            scope = fluid.Scope()
            self._exector_context[executor['name']] = {}
            self._exector_context[executor['name']]['scope'] = scope
            self._exector_context[
                executor['name']]['model'] = model_basic.create(executor)
            model = self._exector_context[executor['name']]['model']
            self._metrics.update(model.get_metrics())
            runnnable_scope.append(scope)
            runnnable_cost_op.append(model.get_cost_op())
            for var in model._data_var:
                if var.name in data_var_name_dict:
                    continue
                data_var_list.append(var)
                data_var_name_dict[var.name] = var

        optimizer = model_basic.YamlModel.build_optimizer({
            'metrics':
            self._metrics,
            'optimizer_conf':
            self.global_config['optimizer']
        })
        optimizer.minimize(runnnable_cost_op, runnnable_scope)
        for executor in self.global_config['executor']:
            scope = self._exector_context[executor['name']]['scope']
            model = self._exector_context[executor['name']]['model']
            program = model._build_param['model']['train_program']
            if not executor['is_update_sparse']:
                program._fleet_opt["program_configs"][str(
                    id(model.get_cost_op().block.program)
                )]["push_sparse"] = []
            if 'train_thread_num' not in executor:
                executor['train_thread_num'] = self.global_config[
                    'train_thread_num']
            with fluid.scope_guard(scope):
                self._exe.run(model._build_param['model']['startup_program'])
            model.dump_model_program('./')

        # server init done
        if fleet.is_server():
            return 0

        self._dataset = {}
        for dataset_item in self.global_config['dataset']['data_list']:
            dataset_item['data_vars'] = data_var_list
            dataset_item.update(self.global_config['io']['afs'])
            dataset_item["batch_size"] = self.global_config['batch_size']
            self._dataset[dataset_item[
                'name']] = dataset.FluidTimeSplitDataset(dataset_item)
        # if config.need_reqi_changeslot and config.reqi_dnn_plugin_day >= last_day and config.reqi_dnn_plugin_pass >= last_pass:
        #    util.reqi_changeslot(config.hdfs_dnn_plugin_path, join_save_params, common_save_params, update_save_params, scope2, scope3)
        fleet.init_worker()
        pass