def __init__(self): pyaon.setNumThreads(8) lds = [] for i in range(1): ld = pyaon.LayerDesc() ld.hiddenSize = Int3(5, 5, 16) ld.ffRadius = 5 ld.lRadius = 5 ld.pRadius = 5 ld.aRadius = 5 ld.ticksPerUpdate = 2 ld.temporalHorizon = 2 lds.append(ld) input_sizes = [Int3(4, 3, ANGLE_RESOLUTION)] input_types = [pyaon.inputTypePrediction] input_sizes.append(Int3(3, 1, COMMAND_RESOLUTION)) input_types.append(pyaon.inputTypeNone) input_sizes.append(Int3(3, 2, IMU_RESOLUTION)) input_types.append(pyaon.inputTypeNone) self.h = pyaon.Hierarchy(input_sizes, input_types, lds) self.reward = 1.0 self.direction = np.array([0.0, 0.0, 0.0]) self.average_error = 0.0 self.average_error_decay = 0.999 self.num_samples = 0 self.offsets = np.array([ -0.12295051, 0.12295051, -0.12295051, 0.12295051, 0.77062617, 0.77062617, 0.77062617, 0.77062617, -0.845151, -0.845151, -0.845151, -0.845151 ])
def main(use_imu=False, default_velocity=np.zeros(2), default_yaw_rate=0.0, lock_frame_rate=True): device = evdev.InputDevice(list_devices()[0]) print(device) # Create config sim = Sim() hardware_interface = HardwareInterface(sim.model, sim.joint_indices) # Create imu handle if use_imu: imu = IMU() # Load hierarchy pyaon.setNumThreads(4) # lds = [] # for i in range(5): # ld = pyaon.LayerDesc() # ld.hiddenSize = Int3(4, 4, 16) # ld.ffRadius = 4 # ld.pRadius = 4 # ld.aRadius = 4 # ld.ticksPerUpdate = 2 # ld.temporalHorizon = 2 # lds.append(ld) # input_sizes = [ Int3(4, 3, ANGLE_RESOLUTION) ] # input_types = [ pyaon.inputTypeAction ] # input_sizes.append(Int3(3, 2, IMU_RESOLUTION)) # input_types.append(pyaon.inputTypeNone) #h = pyaon.Hierarchy(cs, input_sizes, input_types, lds) h = pyaon.Hierarchy("pupper.ohr") #h = pyaon.Hierarchy("pupper_rltrained.ohr") angles = 12 * [ 0.0 ] # Sim seconds per sim step sim_steps_per_sim_second = 240 sim_dt = 1.0 / sim_steps_per_sim_second start_sim_time = time.time() sim_time_elapsed = 0.0 reward = 0.0 control_reward_accum = 0.0 control_reward_accum_steps = 0 vels = ( [ 0, 0, 0 ], [ 0, 0, 0 ] ) steps = 0 actions = list(h.getPredictionCs(0)) # Create config config = Configuration() config.z_clearance = 0.05 # Create controller and user input handles controller = Controller( config, four_legs_inverse_kinematics, ) state = State() state.behavior_state = BehaviorState.TROT octaves = 3 smooth_chain = octaves * [ np.array([ 0.0, 0.0, 0.0 ]) ] smooth_factor = 0.005 smooth_scale = 9.0 max_speed = 0.5 max_yaw_rate = 1.5 offsets = np.array([ -0.12295051, 0.12295051, -0.12295051, 0.12295051, 0.77062617, 0.77062617, 0.77062617, 0.77062617, -0.845151, -0.845151, -0.845151, -0.845151 ]) while True: start_step_time = time.time() sim_time_elapsed += sim_dt if sim_time_elapsed > config.dt: try: for event in device.read(): if event.type == ecodes.EV_ABS: if event.code == ecodes.ABS_X: ls[0] = event.value / 32767.0 elif event.code == ecodes.ABS_Y: ls[1] = event.value / 32767.0 elif event.code == ecodes.ABS_RX: rs[0] = event.value / 32767.0 elif event.code == ecodes.ABS_RY: rs[1] = event.value / 32767.0 except: pass sim_time_elapsed = sim_time_elapsed % config.dt imu_vals = list(vels[0]) + list(vels[1]) imu_SDR = [] for i in range(len(imu_vals)): imu_SDR.append(IMU_RESOLUTION // 2)#int((np.tanh(imu_vals[i] * IMU_SQUASH_SCALE) * 0.5 + 0.5) *(IMU_RESOLUTION - 1) + 0.5)) #direction = smoothed_result #direction = np.array([ 0.75, 0.0, 0.0 ]) direction = np.array([ -ls[1], -ls[0], -rs[0] ]) sim.set_direction(np.array([ direction[0] * max_speed, direction[1] * max_speed, direction[2] * max_yaw_rate ])) command_SDR = [ int((direction[i] * 0.5 + 0.5) * (COMMAND_RESOLUTION - 1) + 0.5) for i in range(3) ] h.step([ actions, command_SDR, imu_SDR ], False, control_reward_accum / max(1, control_reward_accum_steps)) actions = list(h.getPredictionCs(0)) #actions = mutate(np.array(actions), 0.01, h.getInputSize(0).z).tolist() control_reward_accum = 0.0 control_reward_accum_steps = 0 joint_angles = np.zeros((3, 4)) motor_index = 0 for segment_index in range(3): for leg_index in range(4): target_angle = (actions[motor_index] / float(ANGLE_RESOLUTION - 1) * 2.0 - 1.0) * (0.25 * np.pi) + offsets[motor_index] delta = 0.3 * (target_angle - angles[motor_index]) max_delta = 0.05 if abs(delta) > max_delta: delta = max_delta if delta > 0.0 else -max_delta angles[motor_index] += delta joint_angles[segment_index, leg_index] = angles[motor_index] motor_index += 1 command = Command() # Go forward at max speed command.horizontal_velocity = direction[0 : 2] * max_speed command.yaw_rate = direction[2] * max_yaw_rate quat_orientation = ( np.array([1, 0, 0, 0]) ) state.quat_orientation = quat_orientation # Step the controller forward by dt controller.run(state, command) #joint_angles = copy(state.joint_angles) # Update the pwm widths going to the servos hardware_interface.set_actuator_postions(joint_angles) # Simulate physics for 1/240 seconds (the default timestep) reward, vels = sim.step() control_reward_accum += reward control_reward_accum_steps += 1 if steps % 50000 == 49999: print("Saving...") h.save("pupper_rltrained.ohr") steps += 1 # Performance testing step_elapsed = time.time() - start_step_time # Keep framerate if lock_frame_rate: time.sleep(max(0, sim_dt - step_elapsed)) pygame.quit()
def main(): pyaon.setNumThreads(8) lds = [] for i in range(3): ld = pyaon.LayerDesc() ld.hiddenSize = (3, 3, 16) lds.append(ld) h = pyaon.Hierarchy() h.initRandom( [ pyaon.IODesc((2, 4, sensorRes), pyaon.none, 2, 2, 2, 32), # Priop pyaon.IODesc((2, 3, sensorRes), pyaon.none, 2, 2, 2, 32), # IMU pyaon.IODesc( (4, 4, motorRes), pyaon.action, 2, 2, 2, 32) # Motor control ], lds) iks = [] for i in range(4): iks.append(DeltaIK()) controller = ManualController(iks) angles = 8 * [0.0] kPs = 8 * [1.0] frametime = 1.0 / 30.0 print("Ready.") saveTimer = 0 errorTimer = 0 averageError = 0.0 while True: try: #s = time.time() pad_y = 1.0 controller.step(pad_y, frametime) # Map IK leg results to motors for i in range(4): angles[legMap[i] [0]] = legDirs[i][0] * (iks[i].angle - (np.pi / 2.0 - iks[i].A)) angles[legMap[i] [1]] = legDirs[i][1] * (iks[i].angle + (np.pi / 2.0 - iks[i].A)) #motors.sendCommands(angles, kPs) priopSDR = 8 * [0] for i in range(8): priopSDR[i] = np.random.randint(0, sensorRes) # Train with noise imuSDR = 6 * [0] # Random IMU training for i in range(6): imuSDR[i] = np.random.randint(0, sensorRes) # Train with commands from manual controller motorSDR = 16 * [0] for i in range(16): if i >= 8: # Kp motorSDR[i] = int(kPs[i - 8] * (motorRes - 1) + 0.5) # Train with maximum default else: # Angle motorSDR[i] = int( min(1.0, max(0.0, angles[i] / maxAngleRange * 0.5 + 0.5)) * (motorRes - 1) + 0.5) error = 0.0 for i in range(len(motorSDR)): delta = (motorSDR[i] - h.getPredictionCIs(2)[i]) / float(motorRes - 1) error += delta * delta averageError = 0.99 * averageError + 0.01 * error h.step([priopSDR, imuSDR, motorSDR], True, 0.0, True) # Show error rate occasionally if errorTimer >= 1000: errorTimer = 0 print("Error: " + str(averageError)) errorTimer += 1 # Save occasionally if saveTimer >= 10000: saveTimer = 0 print("Saving...") h.saveToFile("lorcan_mini_pretrained.ohr") print("Saved.") saveTimer += 1 #time.sleep(max(0, frametime - (time.time() - s))) except Exception as e: print(e) break print("-- Program at End --")
# Define layer descriptors: Parameters of each layer upon creation lds = [] for i in range( 2 ): # Layers with exponential memory. Not much memory is needed for Cart-Pole, so we only use 2 layers ld = pyaon.LayerDesc(hiddenSize=(3, 3, 16)) ld.eRadius = 1 ld.dRadius = 1 lds.append(ld) # Create the hierarchy: Provided with input layer sizes (a single column in this case), and input types (a single predicted layer) h = pyaon.Hierarchy() h.initRandom([ pyaon.IODesc((3, 3, res), pyaon.prediction, eRadius=1, dRadius=1), pyaon.IODesc((1, 1, numActions), pyaon.action, eRadius=0, dRadius=1, historyCapacity=64) ], lds) h.setAVLR(1, 0.01) h.setAALR(1, 0.01) h.setADiscount(1, 0.99) h.setAHistoryIters(1, 16) reward = 0.0
def __init__(self, env, layerSizes=2 * [(4, 4, 16)], layerRadius=2, hiddenSize=(8, 8, 16), imageRadius=8, imageScale=1.0, obsResolution=32, actionResolution=16, rewardScale=1.0, terminalReward=0.0, infSensitivity=1.0, nThreads=8): self.env = env pyaon.setNumThreads(nThreads) self.imEnc = None self.imEncIndex = -1 self.inputSizes = [] self.inputLows = [] self.inputHighs = [] self.inputTypes = [] self.imageSizes = [] self.imgsPrev = [] self.actionIndices = [] self.rewardScale = rewardScale self.terminalReward = terminalReward self.infSensitivity = infSensitivity if type(self.env.observation_space) is gym.spaces.Discrete: self.inputSizes.append((1, 1, self.env.observation_space.n)) self.inputTypes.append(pyaon.prediction) self.inputLows.append([0.0]) self.inputHighs.append([0.0]) elif type(self.env.observation_space) is gym.spaces.Box: if len(self.env.observation_space.shape) == 1 or len( self.env.observation_space.shape) == 0: squareSize = int( np.ceil(np.sqrt(len(self.env.observation_space.low)))) self.inputSizes.append((squareSize, squareSize, obsResolution)) self.inputTypes.append(pyaon.prediction) lows = list(self.env.observation_space.low) highs = list(self.env.observation_space.high) # Detect large numbers/inf for i in range(len(lows)): if abs(lows[i]) > 100000 or abs(highs[i]) > 100000: # Indicate inf by making low greater than high lows[i] = 1.0 highs[i] = -1.0 self.inputLows.append(lows) self.inputHighs.append(highs) elif len(self.env.observation_space.shape) == 2: scaledSize = (int(self.env.observation_space.shape[0] * imageScale), int(self.env.observation_space.shape[1] * imageScale), 1) self.imageSizes.append(scaledSize) elif len(self.env.observation_space.shape) == 3: scaledSize = (int(self.env.observation_space.shape[0] * imageScale), int(self.env.observation_space.shape[1] * imageScale), 3) self.imageSizes.append(scaledSize) else: raise Exception("Unsupported Box input: Dimensions too high " + str(self.env.observation_space.shape)) else: raise Exception("Unsupported input type " + str(type(self.env.observation_space))) if len(self.imageSizes) > 0: vlds = [] for i in range(len(self.imageSizes)): vld = pyaon.ImageEncoderVisibleLayerDesc( (self.imageSizes[i][0], self.imageSizes[i][1], self.imageSizes[i][2]), imageRadius) vlds.append(vld) self.imgsPrev.append(np.zeros(self.imageSizes[i])) self.imEnc = pyaon.ImageEncoder() self.imEnc.initRandom(hiddenSize, vlds) self.imEncIndex = len(self.inputSizes) self.inputSizes.append(hiddenSize) self.inputTypes.append(pyaon.prediction) self.inputLows.append([0.0]) self.inputHighs.append([1.0]) # Actions if type(self.env.action_space) is gym.spaces.Discrete: self.actionIndices.append(len(self.inputSizes)) self.inputSizes.append((1, 1, self.env.action_space.n)) self.inputTypes.append(pyaon.action) self.inputLows.append([0.0]) self.inputHighs.append([0.0]) elif type(self.env.action_space) is gym.spaces.Box: if len(self.env.action_space.shape) < 3: if len(self.env.action_space.shape) == 2: self.actionIndices.append(len(self.inputSizes)) self.inputSizes.append( (self.env.action_space.shape[0], self.env.action_space.shape[1], actionResolution)) self.inputTypes.append(pyaon.action) lows = list(self.env.action_space.low) highs = list(self.env.action_space.high) self.inputLows.append(lows) self.inputHighs.append(highs) else: squareSize = int( np.ceil(np.sqrt(len(self.env.action_space.low)))) squareTotal = squareSize * squareSize self.actionIndices.append(len(self.inputSizes)) self.inputSizes.append( (squareSize, squareSize, actionResolution)) self.inputTypes.append(pyaon.action) lows = list(self.env.action_space.low) highs = list(self.env.action_space.high) self.inputLows.append(lows) self.inputHighs.append(highs) else: raise Exception( "Unsupported Box action: Dimensions too high " + str(self.env.action_space.shape)) else: raise Exception("Unsupported action type " + str(type(self.env.action_space))) lds = [] for i in range(len(layerSizes)): ld = pyaon.LayerDesc(hiddenSize=layerSizes[i]) ld.eRadius = layerRadius ld.dRadius = layerRadius lds.append(ld) self.h = pyaon.Hierarchy() ioDescs = [] for i in range(len(self.inputSizes)): ioDescs.append( pyaon.IODesc(self.inputSizes[i], self.inputTypes[i], layerRadius, layerRadius, 64)) self.h.initRandom(ioDescs, lds) self.actions = [] for i in range(len(self.actionIndices)): index = self.actionIndices[i] size = len(self.inputLows[index]) startAct = [] for j in range(size): startAct.append(np.random.randint(0, self.inputSizes[index][2])) self.actions.append(startAct)