"state": state, "action": action }) try: data[-2]["next_state"] = state print(data[-2]) except IndexError: pass if x < 0.2: command = min(command,0) if x > 0.8: command = max(command, 0) cart.setSpeed(command) commandqueue.put(command) commandqueue.get() print(commandqueue.queue) key = cv2.waitKey(1) if key & 0xFF == 32: print("toggle enable") old_data += data[0:-2] data = [] itheta = 0 cart.toggleEnable() elif key & 0xFF == ord('q'):
from sabretooth_command import CartCommand pygame.init() cart = CartCommand() cart.toggleEnable() pygame.joystick.init() clock = pygame.time.Clock() print pygame.joystick.get_count() _joystick = pygame.joystick.Joystick(0) _joystick.init() while 1: pygame.event.get() xdir = _joystick.get_axis(0) #rtrigger = _joystick.get_axis(5) #ltrigger = _joystick.get_axis(4) #print(xdir * 200) if abs(xdir) < 0.2: xdir = 0.0 print(xdir * 100) cart.setSpeed(xdir * 2046) #MESSAGE = pickle.dumps([xdir,rtrigger,ltrigger]) #sock.sendto(MESSAGE, (UDP_IP, UDP_PORT)) clock.tick(30)
class CartPoleEnv(gym.Env): def __init__(self, cartport="/dev/ttyACM0", imageport=1): self.analyzer = ImageAnalyzer(imageport) self.cart = CartCommand(port=cartport) self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box( np.array([0., -50., 0., -50., -1., -50.]), np.array([1., 50., 1., 50., 1., 50.])) self.last_state = None self.state = self._getState() self.last_state = self._getState() def _step(self, action): if action == self.action_space[0]: d_command = 1. else: d_command = -1. command += commandStep * d_command command = min(max(command, -2046), 2046) if x < 0.35: command = min(command, -500) if x > 0.65: command = max(command, 500) self.cart.setSpeed(command) self.last_state = self.state self.state = self._getState() reward = self._getReward(self.state) done = False return np.array(self.state), reward, done, {} def _reset(self): x, dx, theta, dtheta = self.analyzer.analyzeFrame() self.cart.enabled = True while not 0.4 < x < 0.6: x, dx, theta, dtheta = self.analyzer.analyzeFrame() command = 1000 * np.sign(x - 0.5) command = min(max(command, -2046), 2046) self.cart.setSpeed(command) cv2.waitKey(1) self.cart.setSpeed(0) sleep(0.3) self.cart.enabled = False def _getData(self): x, dx, theta, dtheta = self.analyzer.analyzeFrame() xpole = np.cos(theta) ypole = np.sin(theta) return x, xpole, ypole def _getState(self): x, xpole, ypole = self._getData() if not self.last_state is None: state = [ x, x - self.last_state[0], xpole, xpole - self.last_state[2], ypole, ypole - self.last_state[4] ] else: state = [x, 0, xpole, 0, ypole, 0] return state def _getReward(self, state): rewards_pole = 0.0 * (state[:, 4] + 0.5)**2 #ypole hieght rewards_cart = -2.0 * np.power(state[:, 0], 2) #xcart pos return rewards_cart + rewards_pole def _render(self, mode='human', close=False): pass