def reset(self): #if self.state == feb.RUN: if self.state is None: return FurutaEnvTorqueDeepq.reset(self, position=0, velocity=0) # position 0rad is upright return FurutaEnvTorqueDeepq.reset( self, position=cm.sgn() * np.random.uniform(0, cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D)), #position=cm.sgn()*cm.deg2Rad(feb.ANGLE_TERMINAL_MIN_D), velocity=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE), arm_vel=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_ARM))
def reset(self): # position 0rad is upright return FurutaEnvTorqueDeepq.reset( self, #position=cm.sgn()*np.random.uniform(cm.deg2Rad(feb.ANGLE_TERMINAL_MIN_D), 2*feb.PI), #velocity=cm.sgn()*np.random.uniform(0, feb.VEL_MAX_POLE), #arm_vel=cm.sgn()*np.random.uniform(0, feb.VEL_MAX_ARM) # start in the spin we never want to see position=cm.sgn() * np.random.uniform(cm.deg2Rad(cm.ANGLE_TERMINAL_MAX_D), 2 * cm.deg2Rad(cm.ANGLE_TERMINAL_MAX_D)), #position=cm.sgn()*feb.PI, velocity=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE), arm_vel=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_ARM))
def reset(self): # position 0rad is upright if False: #self.state == feb.RUN: return FurutaEnvPosTrpo.reset(self, position=cm.deg2Rad( cm.ANGLE_TERMINAL_MIN_D / 2), velocity=0, arm_vel=0) return FurutaEnvPosTrpo.reset( self, position=cm.sgn() * np.random.uniform(0, cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D)), velocity=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE), arm_vel=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_ARM))
def haze(self): if abs(self.pole_angle_real) > cm.deg2Rad(cm.ANGLE_TERMINAL_MAX_D): return velocity = 0 if cm.TRAIN == self.state and POS_HAZE == cm.TRAIN: if self._envStepCounter % 500 == 0: velocity = cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE) * 0.3 if cm.RUN == self.state and POS_HAZE == cm.RUN: if self._envStepCounter % 500 == 0: velocity = cm.sgn() * cm.VEL_MAX_POLE * 0.2 if 0 != velocity: p.setJointMotorControl2(bodyUniqueId=self.botId, jointIndex=self.poleId, controlMode=p.VELOCITY_CONTROL, targetVelocity=self.pole_vel_real + velocity)
def randTraj(action): if np.random.uniform(0, 1) > 0.0: action += cm.sgn() * round(np.random.uniform(0, 1)**3 * 4, 0) action = cm.clamp(action, 0, len(action_table) - 1) return int(action)