def init(self): ts = TaskSpec(discount_factor=0.99, reward_range=(-20, 20)) ts.set_continuing() ts.add_double_obs(('NEGINF', 'POSINF'), repeat=3 if self._feature_rep == 'larm' else 15) ts.add_double_act(('NEGINF', 'POSINF'), repeat=3 if self._feature_rep == 'larm' else 15) ts.set_extra(self._ts_extra + "COPYRIGHT Penaltykick (Python) implemented by Astrid Jackson") return ts.to_taskspec()
def init(self): MDPAction.set_description({ 'out': {'value': [-0.004]}, 'in': {'value': [0.004]}, 'kick': {'value': [-1.0]} }) ts = TaskSpec(discount_factor=0.99, reward_range=(-20, 20)) ts.set_episodic() ts.set_charcount_obs(0) ts.add_double_act((-1.0, 0.004)) self._ts_extra += "ACTIONDESCR %s " % str(MDPAction.description) self._ts_extra += "COPYRIGHT Penaltykick (Python) implemented by Astrid Jackson" ts.set_extra(self._ts_extra) return ts.to_taskspec()
def init(self): ts = TaskSpec(discount_factor=1.0, reward_range=(-1.0, 0.0)) ts.set_episodic() for i, (min_, max_) in enumerate(self._limits): if min_ == -np.inf: min_ = 'NEGINF' if max_ == np.inf: max_ = 'POSINF' ts.add_double_obs((min_, max_)) ts.add_int_act((0, 2)) extra = " COPYRIGHT Mountain Car (Python) implemented by Astrid Jackson." state_descr = "OBSDESCR {'descr':['car position','car velocity']}" action_descr = "ACTDESCR {2:'forward',1:'neutral',0:'reverse'}" ts.set_extra(state_descr + " " + action_descr + extra) return ts.to_taskspec()