def init(self):
        ts = TaskSpec(discount_factor=0.99, reward_range=(-20, 20))
        ts.set_continuing()
        ts.add_double_obs(('NEGINF', 'POSINF'), repeat=3 if self._feature_rep == 'larm' else 15)
        ts.add_double_act(('NEGINF', 'POSINF'), repeat=3 if self._feature_rep == 'larm' else 15)

        ts.set_extra(self._ts_extra + "COPYRIGHT Penaltykick (Python) implemented by Astrid Jackson")
        return ts.to_taskspec()
    def init(self):
        MDPAction.set_description({
            'out': {'value': [-0.004]},
            'in': {'value': [0.004]},
            'kick': {'value': [-1.0]}
        })

        ts = TaskSpec(discount_factor=0.99, reward_range=(-20, 20))
        ts.set_episodic()
        ts.set_charcount_obs(0)
        ts.add_double_act((-1.0, 0.004))

        self._ts_extra += "ACTIONDESCR %s " % str(MDPAction.description)
        self._ts_extra += "COPYRIGHT Penaltykick (Python) implemented by Astrid Jackson"
        ts.set_extra(self._ts_extra)
        return ts.to_taskspec()
Exemple #3
0
    def init(self):
        ts = TaskSpec(discount_factor=1.0, reward_range=(-1.0, 0.0))
        ts.set_episodic()

        for i, (min_, max_) in enumerate(self._limits):
            if min_ == -np.inf:
                min_ = 'NEGINF'
            if max_ == np.inf:
                max_ = 'POSINF'
            ts.add_double_obs((min_, max_))

        ts.add_int_act((0, 2))

        extra = " COPYRIGHT Mountain Car (Python) implemented by Astrid Jackson."
        state_descr = "OBSDESCR {'descr':['car position','car velocity']}"
        action_descr = "ACTDESCR {2:'forward',1:'neutral',0:'reverse'}"

        ts.set_extra(state_descr + " " + action_descr + extra)
        return ts.to_taskspec()