Esempio n. 1
0
    def test_param_sharing_with_create_parameter(self):
        """
        Test case for parameter sharing of create_parameter op
        """
        net = MyNetWork()

        main_program1 = fluid.Program()
        with fluid.program_guard(main_program1):
            x = layers.data(name='x', shape=[100], dtype="float32")
            out1 = x + net.created_param()

        main_program2 = fluid.Program()
        with fluid.program_guard(main_program2):
            x = layers.data(name='x', shape=[100], dtype="float32")
            out2 = x + net.created_param()

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        input_np = np.random.uniform(0, 1, [1, 100]).astype("float32")
        out1_np = exe.run(main_program1,
                          feed={"x": input_np},
                          fetch_list=[out1])[0]
        out2_np = exe.run(main_program2,
                          feed={"x": input_np},
                          fetch_list=[out2])[0]
        self.assertEqual(np.sum(out1_np.flatten()), np.sum(out2_np.flatten()))
Esempio n. 2
0
    def test_param_sharing_with_batch_norm(self):
        """
        Test case for batch_norm layer
        """
        net = MyNetWork()

        main_program1 = fluid.Program()
        with fluid.program_guard(main_program1):
            x = layers.data(name='x', shape=[32, 128, 128], dtype="float32")
            hid1 = net.fc1(x)
            out1 = net.batch_norm(hid1)

        main_program2 = fluid.Program()
        with fluid.program_guard(main_program2):
            x = layers.data(name='x', shape=[32, 128, 128], dtype="float32")
            hid1 = net.fc1(x)
            out2 = net.batch_norm(hid1)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        input_np = np.random.uniform(0, 1, [1, 32, 128, 128]).astype("float32")
        out1_np = exe.run(main_program1,
                          feed={"x": input_np},
                          fetch_list=[out1])[0]
        out2_np = exe.run(main_program2,
                          feed={"x": input_np},
                          fetch_list=[out2])[0]
        self.assertEqual(np.sum(out1_np.flatten()), np.sum(out2_np.flatten()))
Esempio n. 3
0
    def build_program(self):
        self.predict_program = fluid.Program()
        self.learn_program = fluid.Program()
        with fluid.program_guard(self.predict_program):
            obs = layers.data(name='obs', shape=[10], dtype='float32')
            output = self.alg.predict(obs)
        self.predict_output = [output]

        with fluid.program_guard(self.learn_program):
            obs = layers.data(name='obs', shape=[10], dtype='float32')
            label = layers.data(name='label', shape=[1], dtype='float32')
            cost = self.alg.learn(obs, label)
Esempio n. 4
0
    def test_sync_weights_with_batch_norm(self):
        model = TestModel3()
        target_model = deepcopy(model)

        program1 = fluid.Program()
        program2 = fluid.Program()
        with fluid.program_guard(program1):
            obs = layers.data(name='obs',
                              shape=[32, 128, 128],
                              dtype="float32")
            model_output = model.predict(obs)
            loss = layers.reduce_mean(model_output)
            optimizer = fluid.optimizer.AdamOptimizer(1e-3)
            optimizer.minimize(loss)

        with fluid.program_guard(program2):
            obs = layers.data(name='obs',
                              shape=[32, 128, 128],
                              dtype="float32")
            model_output = model.predict(obs)
            target_model_output = target_model.predict(obs)
        self.executor.run(fluid.default_startup_program())

        N = 10
        random_obs = np.random.random(size=(N, 32, 128, 128)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            outputs = self.executor.run(
                program2,
                feed={'obs': x},
                fetch_list=[model_output, target_model_output])
            self.assertNotEqual(np.sum(outputs[0].flatten()),
                                np.sum(outputs[1].flatten()))

        # run optimizing to make parameters of batch_norm between model and target_model are different
        N = 100
        random_obs = np.random.random(size=(N, 32, 128, 128)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            self.executor.run(program1, feed={'obs': x})

        model.sync_weights_to(target_model)

        random_obs = np.random.random(size=(N, 32, 128, 128)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            outputs = self.executor.run(
                program2,
                feed={'obs': x},
                fetch_list=[model_output, target_model_output])
            self.assertEqual(np.sum(outputs[0].flatten()),
                             np.sum(outputs[1].flatten()))
Esempio n. 5
0
    def build_program(self):
        self.predict_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.predict_program):
            obs = layers.data(name='obs', shape=[4], dtype='float32')
            self.predict_actions = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(name='obs', shape=[4], dtype='float32')
            actions = layers.data(name='actions', shape=[], dtype='int64')
            behaviour_logits = layers.data(name='behaviour_logits',
                                           shape=[2],
                                           dtype='float32')
            rewards = layers.data(name='rewards', shape=[], dtype='float32')
            dones = layers.data(name='dones', shape=[], dtype='float32')
            lr = layers.data(name='lr',
                             shape=[1],
                             dtype='float32',
                             append_batch_size=False)
            entropy_coeff = layers.data(name='entropy_coeff',
                                        shape=[1],
                                        dtype='float32',
                                        append_batch_size=False)

            vtrace_loss, kl = self.alg.learn(obs, actions, behaviour_logits,
                                             rewards, dones, lr, entropy_coeff)
            self.learn_outputs = [
                vtrace_loss.total_loss, vtrace_loss.pi_loss,
                vtrace_loss.vf_loss, vtrace_loss.entropy, kl
            ]
Esempio n. 6
0
    def build_program(self):
        self.predict_program = fluid.Program()
        self.value_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.predict_program):
            obs = layers.data(name='obs', shape=[4], dtype='float32')
            self.predict_actions = self.alg.predict(obs)

        with fluid.program_guard(self.value_program):
            obs = layers.data(name='obs', shape=[4], dtype='float32')
            self.values = self.alg.value(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(name='obs', shape=[4], dtype='float32')
            actions = layers.data(name='actions', shape=[], dtype='int64')
            advantages = layers.data(name='advantages',
                                     shape=[],
                                     dtype='float32')
            target_values = layers.data(name='target_values',
                                        shape=[],
                                        dtype='float32')
            lr = layers.data(name='lr',
                             shape=[1],
                             dtype='float32',
                             append_batch_size=False)
            entropy_coeff = layers.data(name='entropy_coeff',
                                        shape=[1],
                                        dtype='float32',
                                        append_batch_size=False)

            total_loss, pi_loss, vf_loss, entropy = self.alg.learn(
                obs, actions, advantages, target_values, lr, entropy_coeff)
            self.learn_outputs = [total_loss, pi_loss, vf_loss, entropy]
Esempio n. 7
0
    def build_program(self):
        self.policy_predict_program = fluid.Program()
        self.policy_sample_program = fluid.Program()
        self.policy_learn_program = fluid.Program()
        self.value_predict_program = fluid.Program()
        self.value_learn_program = fluid.Program()

        with fluid.program_guard(self.policy_sample_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            print(obs)
            sampled_act = self.alg.sample(obs)
            self.policy_sample_output = [sampled_act]

        with fluid.program_guard(self.policy_predict_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            means = self.alg.predict(obs)
            self.policy_predict_output = [means]

        with fluid.program_guard(self.policy_learn_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            actions = layers.data(name='actions',
                                  shape=[self.act_dim],
                                  dtype='float32')
            advantages = layers.data(name='advantages',
                                     shape=[1],
                                     dtype='float32')
            if self.loss_type == 'KLPEN':
                beta = layers.data(name='beta', shape=[], dtype='float32')
                loss, kl = self.alg.policy_learn(obs, actions, advantages,
                                                 beta)
            else:
                loss, kl = self.alg.policy_learn(obs, actions, advantages)

            self.policy_learn_output = [loss, kl]

        with fluid.program_guard(self.value_predict_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            value = self.alg.value_predict(obs)
            self.value_predict_output = [value]

        with fluid.program_guard(self.value_learn_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            val = layers.data(name='val', shape=[], dtype='float32')
            value_loss = self.alg.value_learn(obs, val)
            self.value_learn_output = [value_loss]
Esempio n. 8
0
    def build_program(self):
        self.pred_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.pred_program):  # 搭建计算图用于 预测动作,定义输入输出变量
            obs = layers.data(name='obs', shape=self.obs_dim, dtype='float32')
            self.act_prob = self.alg.predict(obs)

        with fluid.program_guard(
                self.learn_program):  # 搭建计算图用于 更新policy网络,定义输入输出变量
            obs = layers.data(name='obs', shape=self.obs_dim, dtype='float32')
            act = layers.data(name='act', shape=[1], dtype='int64')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            self.cost = self.alg.learn(obs, act, reward)
Esempio n. 9
0
    def build_program(self):
        self.pred_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.pred_program):  # 搭建计算图用于 预测动作,定义输入输出变量
            last_obs = layers.data(name='last_obs',
                                   shape=[self.obs_dim],
                                   dtype='float32')
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            self.value = self.alg.predict(last_obs, obs)

        with fluid.program_guard(self.learn_program):  # 搭建计算图用于 更新Q网络,定义输入输出变量
            last_obs = layers.data(name='last_obs',
                                   shape=[self.obs_dim],
                                   dtype='float32')
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            action = layers.data(name='act', shape=[1], dtype='int32')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            next_obs = layers.data(name='next_obs',
                                   shape=[self.obs_dim],
                                   dtype='float32')
            terminal = layers.data(name='terminal', shape=[], dtype='bool')
            self.cost = self.alg.learn(last_obs, obs, action, reward, next_obs,
                                       terminal)
Esempio n. 10
0
    def build_program(self):
        self.sample_program = fluid.Program()
        self.predict_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.sample_program):
            obs = layers.data(name='obs',
                              shape=self.obs_shape,
                              dtype='float32')
            self.sample_actions, self.behaviour_logits = self.alg.sample(obs)

        with fluid.program_guard(self.predict_program):
            obs = layers.data(name='obs',
                              shape=self.obs_shape,
                              dtype='float32')
            self.predict_actions = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(name='obs',
                              shape=self.obs_shape,
                              dtype='float32')
            actions = layers.data(name='actions', shape=[], dtype='int64')
            behaviour_logits = layers.data(name='behaviour_logits',
                                           shape=[self.act_dim],
                                           dtype='float32')
            rewards = layers.data(name='rewards', shape=[], dtype='float32')
            dones = layers.data(name='dones', shape=[], dtype='float32')
            lr = layers.data(name='lr',
                             shape=[1],
                             dtype='float32',
                             append_batch_size=False)
            entropy_coeff = layers.data(name='entropy_coeff',
                                        shape=[1],
                                        dtype='float32',
                                        append_batch_size=False)

            self.learn_reader = fluid.layers.create_py_reader_by_data(
                capacity=32,
                feed_list=[
                    obs, actions, behaviour_logits, rewards, dones, lr,
                    entropy_coeff
                ])

            obs, actions, behaviour_logits, rewards, dones, lr, entropy_coeff = fluid.layers.read_file(
                self.learn_reader)

            vtrace_loss, kl = self.alg.learn(obs, actions, behaviour_logits,
                                             rewards, dones, lr, entropy_coeff)
            self.learn_outputs = [
                vtrace_loss.total_loss, vtrace_loss.pi_loss,
                vtrace_loss.vf_loss, vtrace_loss.entropy, kl
            ]
        self.learn_program = parl.compile(self.learn_program,
                                          vtrace_loss.total_loss)
Esempio n. 11
0
    def build_program(self):
        self.pred_program = fluid.Program()
        self.learn_program = fluid.Program() #train_program

        with fluid.program_guard(self.pred_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            self.act_prob = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            act = layers.data(name='act', shape=[1], dtype='int64')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            self.cost = self.alg.learn(obs, act, reward)
Esempio n. 12
0
    def test_sync_weights_with_decay(self):
        pred_program = fluid.Program()
        with fluid.program_guard(pred_program):
            obs = layers.data(name='obs', shape=[4], dtype='float32')
            model_output = self.model.predict(obs)
            target_model_output = self.target_model.predict(obs)

        self.executor.run(fluid.default_startup_program())

        decay = 0.9
        # update in numpy way
        (target_model_fc1_w, target_model_fc1_b, target_model_fc2_w,
         target_model_fc2_b, target_model_fc3_w,
         target_model_fc3_b) = self._numpy_update(self.target_model, decay)

        self.model.sync_weights_to(self.target_model, decay=decay)

        N = 10
        random_obs = np.random.random(size=(N, 4)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            real_target_outputs = self.executor.run(
                pred_program,
                feed={'obs': x},
                fetch_list=[target_model_output])[0]

            # Ideal target output
            out_np = np.dot(x, target_model_fc1_w) + target_model_fc1_b
            out_np = np.dot(out_np, target_model_fc2_w) + target_model_fc2_b
            out_np = np.dot(out_np, target_model_fc3_w) + target_model_fc3_b

            self.assertLess(float(np.abs(real_target_outputs - out_np)), 1e-5)
Esempio n. 13
0
    def test_sync_weights_with_create_parameter(self):
        model = TestModel2()
        target_model = deepcopy(model)

        pred_program = fluid.Program()
        with fluid.program_guard(pred_program):
            obs = layers.data(name='obs', shape=[100], dtype='float32')
            model_output = model.predict(obs)
            target_model_output = target_model.predict(obs)
        self.executor.run(fluid.default_startup_program())

        N = 10
        random_obs = np.random.random(size=(N, 100)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            outputs = self.executor.run(
                pred_program,
                feed={'obs': x},
                fetch_list=[model_output, target_model_output])
            self.assertNotEqual(np.sum(outputs[0].flatten()),
                                np.sum(outputs[1].flatten()))

        model.sync_weights_to(target_model)

        random_obs = np.random.random(size=(N, 100)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            outputs = self.executor.run(
                pred_program,
                feed={'obs': x},
                fetch_list=[model_output, target_model_output])
            self.assertEqual(np.sum(outputs[0].flatten()),
                             np.sum(outputs[1].flatten()))
Esempio n. 14
0
    def create_inputs(self, mode):
        """create layers.data here"""
        inputs = OrderedDict()
        data_attributes = copy.deepcopy(self.data_attributes)
        data_attributes['click_id'] = {
            'shape': (-1, 1),
            'dtype': 'int64',
            'lod_level': 1
        }

        if mode in ['train', 'test']:
            list_names = self.item_slot_names + self.user_slot_names + [
                'click_id'
            ]
        elif mode in ['inference']:
            list_names = self.item_slot_names + self.user_slot_names
        else:
            raise NotImplementedError(mode)

        for name in list_names:
            p = data_attributes[name]
            inputs[name] = layers.data(name=name,
                                       shape=p['shape'],
                                       dtype=p['dtype'],
                                       lod_level=p['lod_level'])
        return inputs
Esempio n. 15
0
    def test_set_weights_between_different_models(self):
        model1 = TestModel4()
        model2 = TestModel4()

        pred_program = fluid.Program()
        with fluid.program_guard(pred_program):
            obs = layers.data(name='obs', shape=[4], dtype='float32')
            model1_output = model1.predict(obs)
            model2_output = model2.predict(obs)

        self.executor.run(fluid.default_startup_program())

        N = 10
        random_obs = np.random.random(size=(N, 4)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            outputs = self.executor.run(
                pred_program,
                feed={'obs': x},
                fetch_list=[model1_output, model2_output])
            self.assertNotEqual(outputs[0].flatten(), outputs[1].flatten())

        # pass parameters of self.model to model2
        params = model1.get_weights()
        model2.set_weights(params)

        random_obs = np.random.random(size=(N, 4)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            outputs = self.executor.run(
                pred_program,
                feed={'obs': x},
                fetch_list=[model1_output, model2_output])
            self.assertEqual(outputs[0].flatten(), outputs[1].flatten())
Esempio n. 16
0
    def test_sync_weights_in_one_program(self):
        pred_program = fluid.Program()
        with fluid.program_guard(pred_program):
            obs = layers.data(name='obs', shape=[4], dtype='float32')
            model_output = self.model.predict(obs)
            target_model_output = self.target_model.predict(obs)
        self.executor.run(fluid.default_startup_program())

        N = 10
        random_obs = np.random.random(size=(N, 4)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            outputs = self.executor.run(
                pred_program,
                feed={'obs': x},
                fetch_list=[model_output, target_model_output])
            self.assertNotEqual(outputs[0].flatten(), outputs[1].flatten())

        self.model.sync_weights_to(self.target_model)

        random_obs = np.random.random(size=(N, 4)).astype('float32')
        for i in range(N):
            x = np.expand_dims(random_obs[i], axis=0)
            outputs = self.executor.run(
                pred_program,
                feed={'obs': x},
                fetch_list=[model_output, target_model_output])
            self.assertEqual(outputs[0].flatten(), outputs[1].flatten())
Esempio n. 17
0
    def build_program(self):
        self.predict_programs = []
        self.predict_outputs = []
        self.learn_programs = []
        self.learn_programs_output = []
        for i in range(self.ensemble_num):
            predict_program = fluid.Program()
            with fluid.program_guard(predict_program):
                obs = layers.data(
                    name='obs', shape=[self.obs_dim], dtype='float32')
                act = self.alg.predict(obs, model_id=i)
            self.predict_programs.append(predict_program)
            self.predict_outputs.append([act.name])

            learn_program = fluid.Program()
            with fluid.program_guard(learn_program):
                obs = layers.data(
                    name='obs', shape=[self.obs_dim], dtype='float32')
                act = layers.data(
                    name='act', shape=[self.act_dim], dtype='float32')
                reward = layers.data(name='reward', shape=[], dtype='float32')
                next_obs = layers.data(
                    name='next_obs', shape=[self.obs_dim], dtype='float32')
                terminal = layers.data(name='terminal', shape=[], dtype='bool')
                actor_lr = layers.data(
                    name='actor_lr',
                    shape=[1],
                    dtype='float32',
                    append_batch_size=False)
                critic_lr = layers.data(
                    name='critic_lr',
                    shape=[1],
                    dtype='float32',
                    append_batch_size=False)
                actor_loss, critic_loss = self.alg.learn(
                    obs,
                    act,
                    reward,
                    next_obs,
                    terminal,
                    actor_lr,
                    critic_lr,
                    model_id=i)
            self.learn_programs.append(learn_program)
            self.learn_programs_output.append([critic_loss.name])

        self.ensemble_predict_program = fluid.Program()
        with fluid.program_guard(self.ensemble_predict_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            act = self.alg.ensemble_predict(obs)
        self.ensemble_predict_output = [act.name]
Esempio n. 18
0
    def build_program(self):
        self.sample_program = fluid.Program()
        self.predict_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.sample_program):
            obs = layers.data(name='obs', shape=self.obs_dim, dtype='float32')
            self.sample_actions, self.action_mean, self.action_std = self.alg.sample(
                obs)

        # with fluid.program_guard(self.predict_program):
        #     obs = layers.data(
        #         name='obs', shape=[self.obs_dim], dtype='float32')
        #     self.predict_actions = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(name='obs', shape=self.obs_dim, dtype='float32')
            actions = layers.data(name='actions',
                                  shape=[self.act_dim],
                                  dtype='float32')
            mean = layers.data(name='mean',
                               shape=[self.act_dim],
                               dtype='float32')
            std = layers.data(name='std',
                              shape=[self.act_dim],
                              dtype='float32')
            rewards = layers.data(name='rewards', shape=[], dtype='float32')
            dones = layers.data(name='dones', shape=[], dtype='float32')
            lr = layers.data(name='lr',
                             shape=[1],
                             dtype='float32',
                             append_batch_size=False)
            entropy_coeff = layers.data(name='entropy_coeff',
                                        shape=[],
                                        dtype='float32')

            self.learn_reader = fluid.layers.create_py_reader_by_data(
                capacity=32,
                feed_list=[
                    obs, actions, mean, std, rewards, dones, lr, entropy_coeff
                ])

            obs, actions, mean, std, rewards, dones, lr, entropy_coeff = fluid.layers.read_file(
                self.learn_reader)

            vtrace_loss, kl = self.alg.learn(obs, actions, mean, std, rewards,
                                             dones, lr, entropy_coeff)
            self.learn_outputs = [
                vtrace_loss.total_loss, vtrace_loss.pi_loss,
                vtrace_loss.vf_loss, vtrace_loss.entropy, kl
            ]
Esempio n. 19
0
 def _define_program(self):
     self.ensemble_predict_program = fluid.Program()
     self.startup_program = fluid.Program()
     with fluid.program_guard(self.ensemble_predict_program,
                              self.startup_program):
         obs = layers.data(name='obs', shape=[OBS_DIM], dtype='float32')
         action = self._ensemble_predict(obs)
         self.ensemble_predict_output = [action]
    def build_program(self):
        self.pred_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.pred_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            self.value = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            action = layers.data(name='act', shape=[1], dtype='int32')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            next_obs = layers.data(name='next_obs',
                                   shape=[self.obs_dim],
                                   dtype='float32')
            terminal = layers.data(name='terminal', shape=[], dtype='bool')
            sample_weight = layers.data(name='sample_weight',
                                        shape=[1],
                                        dtype='float32')
            self.cost, self.delta = self.alg.learn(obs, action, reward,
                                                   next_obs, terminal,
                                                   sample_weight)
Esempio n. 21
0
    def build_program(self):
        self.learn_programs = []
        self.predict_programs=[]
        self.pred_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.pred_program):
            obs = layers.data(
                name='obs',
                shape=[CONTEXT_LEN, IMAGE_SIZE[0], IMAGE_SIZE[1]],
                dtype='float32')
            self.value = self.alg.define_predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(
                name='obs',
                shape=[CONTEXT_LEN, IMAGE_SIZE[0], IMAGE_SIZE[1]],
                dtype='float32')
            action = layers.data(name='act', shape=[1], dtype='int32')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            next_obs = layers.data(
                name='next_obs',
                shape=[CONTEXT_LEN, IMAGE_SIZE[0], IMAGE_SIZE[1]],
                dtype='float32')
            terminal = layers.data(name='terminal', shape=[], dtype='bool')
            weight=layers.data(name='weight', shape=[], dtype='float32') 
            self.cost,self.newTd = self.alg.define_learn(obs, action, reward, next_obs,terminal,weight)
        self.learn_programs.append(self.learn_program)
        self.predict_programs.append(self.pred_program)
Esempio n. 22
0
    def build_program(self):
        self.pred_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.pred_program):
            obs = layers.data(
                name='obs',
                shape=[CONTEXT_LEN, IMAGE_SIZE[0], IMAGE_SIZE[1]],
                dtype='float32')
            self.value = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(
                name='obs',
                shape=[CONTEXT_LEN, IMAGE_SIZE[0], IMAGE_SIZE[1]],
                dtype='float32')
            action = layers.data(name='act', shape=[1], dtype='int32')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            next_obs = layers.data(
                name='next_obs',
                shape=[CONTEXT_LEN, IMAGE_SIZE[0], IMAGE_SIZE[1]],
                dtype='float32')
            lr = layers.data(name='lr',
                             shape=[1],
                             dtype='float32',
                             append_batch_size=False)
            terminal = layers.data(name='terminal', shape=[], dtype='bool')
            self.cost = self.alg.learn(obs, action, reward, next_obs, terminal,
                                       lr)
Esempio n. 23
0
    def build_program(self):
        self.pred_program = fluid.Program()
        self.sample_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.pred_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            self.pred_act = self.alg.predict(obs)

        with fluid.program_guard(self.sample_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            self.sample_act, _ = self.alg.sample(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            act = layers.data(name='act',
                              shape=[self.act_dim],
                              dtype='float32')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            next_obs = layers.data(name='next_obs',
                                   shape=[self.obs_dim],
                                   dtype='float32')
            terminal = layers.data(name='terminal', shape=[], dtype='bool')
            self.critic_cost, self.actor_cost = self.alg.learn(
                obs, act, reward, next_obs, terminal)
Esempio n. 24
0
    def build_program(self):
        self.predict_program = fluid.Program()

        with fluid.program_guard(self.predict_program):
            obs = layers.data(name='obs',
                              shape=[self.config['obs_dim']],
                              dtype='float32')
            self.predict_action = self.alg.predict(obs)
        self.predict_program = parl.compile(self.predict_program)
Esempio n. 25
0
    def build_program(self):
        self.pred_program = fluid.Program()
        self.learn_program = fluid.Program()
        self.next_q_program = fluid.Program()
        self.next_a_program = fluid.Program()

        with fluid.program_guard(self.pred_program):
            obs = layers.data(
                name='obs',
                shape=[self.obs_dim_n[self.agent_index]],
                dtype='float32')
            self.pred_act = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs_n = [
                layers.data(
                    name='obs' + str(i),
                    shape=[self.obs_dim_n[i]],
                    dtype='float32') for i in range(self.n)
            ]
            act_n = [
                layers.data(
                    name='act' + str(i),
                    shape=[self.act_dim_n[i]],
                    dtype='float32') for i in range(self.n)
            ]
            target_q = layers.data(name='target_q', shape=[], dtype='float32')
            self.critic_cost = self.alg.learn(obs_n, act_n, target_q)

        with fluid.program_guard(self.next_q_program):
            obs_n = [
                layers.data(
                    name='obs' + str(i),
                    shape=[self.obs_dim_n[i]],
                    dtype='float32') for i in range(self.n)
            ]
            act_n = [
                layers.data(
                    name='act' + str(i),
                    shape=[self.act_dim_n[i]],
                    dtype='float32') for i in range(self.n)
            ]
            self.next_Q = self.alg.Q_next(obs_n, act_n)

        with fluid.program_guard(self.next_a_program):
            obs = layers.data(
                name='obs',
                shape=[self.obs_dim_n[self.agent_index]],
                dtype='float32')
            self.next_action = self.alg.predict_next(obs)

        if self.speedup:
            self.pred_program = parl.compile(self.pred_program)
            self.learn_program = parl.compile(self.learn_program,
                                              self.critic_cost)
            self.next_q_program = parl.compile(self.next_q_program)
            self.next_a_program = parl.compile(self.next_a_program)
Esempio n. 26
0
    def build_program(self):
        self.pred_program = fluid.Program()
        self.train_program = fluid.Program()

        fluid.default_startup_program().random_seed = self.seed
        self.train_program.random_seed = self.seed

        with fluid.program_guard(self.pred_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            self.act_prob = self.alg.define_predict(obs)

        with fluid.program_guard(self.train_program):
            obs = layers.data(name='obs',
                              shape=[self.obs_dim],
                              dtype='float32')
            act = layers.data(name='act', shape=[1], dtype='int64')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            self.cost = self.alg.define_learn(obs, act, reward)
Esempio n. 27
0
    def test_set_weights_with_wrong_params_num(self):
        pred_program = fluid.Program()
        with fluid.program_guard(pred_program):
            obs = layers.data(name='obs', shape=[4], dtype='float32')
            model_output = self.model.predict(obs)

        self.executor.run(fluid.default_startup_program())

        params = self.model.get_weights()

        with self.assertRaises(AssertionError):
            self.model.set_weights(params[1:])
Esempio n. 28
0
    def _define_program(self):
        self.actor_predict_programs = []
        self.actor_predict_outputs = []
        self.learn_programs = []
        self.learn_programs_output = []
        for i in range(self.ensemble_num):
            actor_predict_program = fluid.Program()
            with fluid.program_guard(actor_predict_program):
                obs = layers.data(name='obs',
                                  shape=[self.obs_dim],
                                  dtype='float32')
                action = self.alg.actor_predict(obs, model_id=i)
            self.actor_predict_programs.append(actor_predict_program)
            self.actor_predict_outputs.append([action])

            learn_program = fluid.Program()
            with fluid.program_guard(learn_program):
                obs = layers.data(name='obs',
                                  shape=[self.obs_dim],
                                  dtype='float32')
                action = layers.data(name='action',
                                     shape=[self.act_dim],
                                     dtype='float32')
                reward = layers.data(name='reward', shape=[], dtype='float32')
                next_obs = layers.data(name='next_obs',
                                       shape=[self.obs_dim],
                                       dtype='float32')
                terminal = layers.data(name='terminal', shape=[], dtype='bool')
                actor_lr = layers.data(name='actor_lr',
                                       shape=[1],
                                       dtype='float32',
                                       append_batch_size=False)
                critic_lr = layers.data(name='critic_lr',
                                        shape=[1],
                                        dtype='float32',
                                        append_batch_size=False)
                critic_loss = self.alg.learn(obs,
                                             action,
                                             reward,
                                             next_obs,
                                             terminal,
                                             actor_lr,
                                             critic_lr,
                                             model_id=i)
            if self._no_mem_allocation:
                for var in learn_program.blocks[0].vars:
                    if not learn_program.blocks[0].var(var).is_data:
                        learn_program.blocks[0].var(var).persistable = True
            self.learn_programs.append(learn_program)
            self.learn_programs_output.append([critic_loss.name])
  def build_program(self):
    self._pred_program = fluid.Program()
    self._learn_program = fluid.Program()

    with fluid.program_guard(self._pred_program):
      obs = layers.data(
          name = 'obs',
          shape = [self._obs_dim],
          dtype = 'float32'
          )
      self._value = self.alg.define_predict(obs)

    with fluid.program_guard(self._learn_program):
      obs = layers.data(
          name = 'obs',
          shape = [self._obs_dim],
          dtype = 'float32'
          )
      action = layers.data(name='act', shape=[1], dtype='int32')
      reward = layers.data(name='reward', shape=[], dtype='float32')
      next_obs = layers.data(
          name = 'next_obs',
          shape = [self._obs_dim],
          dtype = 'float32'
          )
      terminal = layers.data(name='terminal', shape=[], dtype='bool')
      self._cost = self.alg.define_learn(obs, action, reward, next_obs, terminal)
Esempio n. 30
0
    def build_program(self):
        self.sample_program = fluid.Program()
        self.predict_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.sample_program):
            obs = layers.data(name='obs',
                              shape=self.obs_shape,
                              dtype='float32')
            sample_actions, values = self.alg.sample(obs)
            self.sample_outputs = [sample_actions.name, values.name]

        with fluid.program_guard(self.predict_program):
            obs = layers.data(name='obs',
                              shape=self.obs_shape,
                              dtype='float32')
            self.predict_actions = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(name='obs',
                              shape=self.obs_shape,
                              dtype='float32')
            actions = layers.data(name='actions', shape=[], dtype='int64')
            advantages = layers.data(name='advantages',
                                     shape=[],
                                     dtype='float32')
            target_values = layers.data(name='target_values',
                                        shape=[],
                                        dtype='float32')
            lr = layers.data(name='lr',
                             shape=[1],
                             dtype='float32',
                             append_batch_size=False)
            entropy_coeff = layers.data(name='entropy_coeff',
                                        shape=[],
                                        dtype='float32')

            self.learn_reader = fluid.layers.create_py_reader_by_data(
                capacity=32,
                feed_list=[
                    obs, actions, advantages, target_values, lr, entropy_coeff
                ])
            obs, actions, advantages, target_values, lr, entropy_coeff = fluid.layers.read_file(
                self.learn_reader)

            total_loss, pi_loss, vf_loss, entropy = self.alg.learn(
                obs, actions, advantages, target_values, lr, entropy_coeff)
            self.learn_outputs = [
                total_loss.name, pi_loss.name, vf_loss.name, entropy.name
            ]