def init(self): ts = TaskSpec(discount_factor=0.99, reward_range=(-20, 20)) ts.set_continuing() ts.add_double_obs(('NEGINF', 'POSINF'), repeat=3 if self._feature_rep == 'larm' else 15) ts.add_double_act(('NEGINF', 'POSINF'), repeat=3 if self._feature_rep == 'larm' else 15) ts.set_extra(self._ts_extra + "COPYRIGHT Penaltykick (Python) implemented by Astrid Jackson") return ts.to_taskspec()
def init(self): ts = TaskSpec(discount_factor=1.0, reward_range=(-1.0, 0.0)) ts.set_episodic() for i, (min_, max_) in enumerate(self._limits): if min_ == -np.inf: min_ = 'NEGINF' if max_ == np.inf: max_ = 'POSINF' ts.add_double_obs((min_, max_)) ts.add_int_act((0, 2)) extra = " COPYRIGHT Mountain Car (Python) implemented by Astrid Jackson." state_descr = "OBSDESCR {'descr':['car position','car velocity']}" action_descr = "ACTDESCR {2:'forward',1:'neutral',0:'reverse'}" ts.set_extra(state_descr + " " + action_descr + extra) return ts.to_taskspec()