예제 #1
0
    def init(self, taskspec):
        ts = TaskSpecParser(taskspec)
        if not ts.valid:
            raise TaskSpecError('TaskSpec Error: Invalid task spec version')

        _, maxval = ts.get_reward_range()

        extra = ts.get_extra()
        v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT']
        pos = []
        for i, id_ in enumerate(list(v)):
            try:
                pos.append(extra.index(id_))
            except ValueError:
                v.remove(id_)
        sorted_v = sorted(zip(pos, v))

        act_desc = {}
        for i, (_, id_) in enumerate(sorted_v):
            val = ts.get_value(i, extra, v)
            if id_ == 'OBSDESCR':
                pass
            elif id_ == 'ACTDESCR':
                act_desc = eval(val)

        obs = ts.get_double_obs()
        dimensions = [1.0] * ts.get_num_int_obs()
        dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist()

        MDPState.set_feature_limits(obs)

        act_limits = ts.get_int_act()
        act_limits += ts.get_double_act()

        discrete_dim = ts.get_num_int_act()
        assert (discrete_dim > 0)
        continuous_dim = ts.get_num_double_act()
        assert (continuous_dim == 0)

        if discrete_dim > 1:
            min_ = list(zip(*act_limits)[0])
            max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist()
            actions = [range(*a) for a in zip(min_, max_)]

            import itertools
            act = list(itertools.product(*actions))
        else:
            act = act_limits[0][:]
            act[1] += 1

        bb = self._config['modelbreadth'] * self._config['modelbreadth']
        maxd = np.sqrt(-bb * np.log(self._config['minweight']))
        kernelfn = lambda x: np.exp(-x * x / bb)

        def model_inclusion(approximator, state, delta):
            try:
                approx = approximator._queries[state]
            except KeyError:
                approx = approximator.Approximation(approximator, state, approximator._kernelfn)

            do_add = True
            for _, s, d in approx._neighbors:
                delta_error = np.linalg.norm(d - delta)
                if delta_error <= self._config['model_sigma']:
                    # At least one of the cases in the case base correctly estimated the query case,
                    # the query case does not add any new information, do not add.
                    do_add = False
                    break

            do_add = do_add or approx._neighbors[0][0] > self._config['model_tau']
            return do_add

        actions = []
        for i in range(*act):
            model_approximator = CsmlApproximator(self._config['minfraction'], maxd, dimensions, kernelfn,
                                                  self._config['n_components'], self._config['n_iter'], model_inclusion)
            actions.append(
                MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator,
                                 name=act_desc[i] if i in act_desc else None,
                                 feature_limits=act_limits))

        # shuffle(actions)
        actions = [actions[0], actions[1], actions[2]]

        # def value_inclusion(approximator, state):
        #     try:
        #         approx = approximator._queries[state]
        #     except KeyError:
        #         approx = approximator.Approximation(approximator, state, approximator._kernelfn)
        #
        #     return len(approx._weights) <= 0 or approx._neighbors[0][0] > self._config['value_tau']
        #
        # value_approximator = KernelApproximator(0.01, maxd, dimensions, lambda x: np.exp(-x ** 2), value_inclusion)
        value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions)
        mdp = MDP(actions, value_approximator)
        planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'],
                                      self._config['epsilon'])

        self._learner = Learner(planner)
예제 #2
0
    def init(self, taskspec):
        ts = TaskSpecParser(taskspec)
        if not ts.valid:
            raise TaskSpecError('TaskSpec Error: Invalid task spec version')

        _, maxval = ts.get_reward_range()

        extra = ts.get_extra()
        v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT']
        pos = []
        for i, id_ in enumerate(list(v)):
            try:
                pos.append(extra.index(id_))
            except ValueError:
                v.remove(id_)
        sorted_v = sorted(zip(pos, v))

        act_desc = {}
        for i, (_, id_) in enumerate(sorted_v):
            val = ts.get_value(i, extra, v)
            if id_ == 'OBSDESCR':
                pass
            elif id_ == 'ACTDESCR':
                act_desc = eval(val)

        obs = ts.get_double_obs()
        dimensions = [1.0] * ts.get_num_int_obs()
        dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist()

        MDPState.set_feature_limits(obs)

        act_limits = ts.get_int_act()
        act_limits += ts.get_double_act()

        discrete_dim = ts.get_num_int_act()
        assert (discrete_dim > 0)
        continuous_dim = ts.get_num_double_act()
        assert (continuous_dim == 0)

        if discrete_dim > 1:
            min_ = list(zip(*act_limits)[0])
            max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist()
            actions = [range(*a) for a in zip(min_, max_)]

            import itertools
            act = list(itertools.product(*actions))
        else:
            act = act_limits[0][:]
            act[1] += 1

        bb = self._config['modelbreadth'] * self._config['modelbreadth']
        maxd = np.sqrt(-bb * np.log(self._config['minweight']))

        feature_metadata = {}
        feature_names = ['state', 'act', 'delta_state']
        for i, n in enumerate(feature_names):
            feature_metadata[n] = {
                'type': 'float',
                'is_index': False if n == 'delta_state' else True,
                'order': i,
            }
            try:
                feature_metadata[n]['retrieval_method'] = self._config['retrieval'][n]['method']
            except KeyError:
                continue

            try:
                feature_metadata[n]['retrieval_method_params'] = self._config['retrieval'][n]['method_params']
            except KeyError:
                pass
            else:
                if n == 'state':
                    if isinstance(feature_metadata[n]['retrieval_method_params'], dict):
                        if 'scale' not in feature_metadata[n]['retrieval_method_params']:
                            feature_metadata[n]['retrieval_method_params']['scale'] = dimensions
                    else:
                        feature_metadata[n]['retrieval_method_params'] = listify(
                            feature_metadata[n]['retrieval_method_params'])
                        feature_metadata[n]['retrieval_method_params'][0] = maxd
                        if len(feature_metadata[n]['retrieval_method_params']) <= 1:
                            feature_metadata[n]['retrieval_method_params'].insert(0, dimensions)

        model_approximator = CasmlApproximator(feature_metadata, self._config['minfraction'], dimensions,
                                               lambda x: np.exp(-x * x / bb), self._config['model_tau'],
                                               self._config['model_sigma'], self._config['n_components'],
                                               self._config['n_iter'])

        actions = []
        for i in range(*act):
            actions.append(
                MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator,
                                 name=act_desc[i] if i in act_desc else None,
                                 feature_limits=act_limits))
        # shuffle(actions)
        actions = [actions[0], actions[1], actions[2]]

        # def inclusion(approximator, state):
        #     try:
        #         approx = approximator._queries[state]
        #     except KeyError:
        #         approx = approximator.Approximation(approximator, state, approximator._kernelfn)
        #
        #     return len(approx._weights) <= 0 or approx._neighbors[0][0] > self._config['value_tau']
        #
        # value_approximator = KernelApproximator(0.01, maxd, dimensions, lambda x: np.exp(-x ** 2), inclusion)
        value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions)
        mdp = MDP(actions, value_approximator)
        planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'],
                                      self._config['epsilon'])

        self._learner = Learner(planner)
예제 #3
0
    def init(self, taskspec):
        """Initializes the agent.

        Parameters
        ----------
        taskspec : str
            The task specification.

        """
        ts = TaskSpecParser(taskspec)
        if not ts.valid:
            raise TaskSpecError('TaskSpec Error: Invalid task spec version')

        _, maxval = ts.get_reward_range()

        extra = ts.get_extra()
        v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT']
        pos = []
        for i, id_ in enumerate(list(v)):
            try:
                pos.append(extra.index(id_))
            except ValueError:
                v.remove(id_)
        sorted_v = sorted(zip(pos, v))

        act_desc = {}
        for i, (_, id_) in enumerate(sorted_v):
            val = ts.get_value(i, extra, v)
            if id_ == 'OBSDESCR':
                pass
            elif id_ == 'ACTDESCR':
                act_desc = eval(val)

        obs = ts.get_double_obs()
        dimensions = [1.0] * ts.get_num_int_obs()
        dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist()

        MDPState.set_feature_limits(obs)

        act_limits = ts.get_int_act()
        act_limits += ts.get_double_act()

        discrete_dim = ts.get_num_int_act()
        assert (discrete_dim > 0)
        continuous_dim = ts.get_num_double_act()
        assert (continuous_dim == 0)

        if discrete_dim > 1:
            min_ = list(zip(*act_limits)[0])
            max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist()
            actions = [range(*a) for a in zip(min_, max_)]

            import itertools
            act = list(itertools.product(*actions))
        else:
            act = act_limits[0][:]
            act[1] += 1

        bb = self._config['modelbreadth'] * self._config['modelbreadth']
        maxd = np.sqrt(-bb * np.log(self._config['minweight']))
        kernelfn = lambda x: np.exp(-x * x / bb)

        actions = []
        for i in range(*act):
            model_approximator = KernelApproximator(self._config['minfraction'], maxd, dimensions, kernelfn)
            actions.append(
                MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator,
                                 name=act_desc[i] if i in act_desc else None,
                                 feature_limits=act_limits))
        # shuffle(actions)
        actions = [actions[0], actions[1], actions[2]]

        value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions)
        mdp = MDP(actions, value_approximator)
        planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'],
                                      self._config['epsilon'])

        self._learner = Learner(planner)