def create_reward(screen, ai_settings, rock, rewards): # create a new reward at the same position where a designated rock is hit reward = Reward(screen, ai_settings, rock.reward_flag, rock.x_speed) reward.rect.centerx = rock.rect.centerx reward.rect.centery = rock.rect.centery reward.x = float(reward.rect.x) rewards.add(reward)
def create_reward(screen, ai_settings, reward_flag, rewards, alien): # create a new reward at the same position where a designated alien is hit reward = Reward(screen, ai_settings, reward_flag) reward.rect.x = alien.rect.x reward.y = alien.rect.y reward.rect.y = reward.y rewards.add(reward)
def step(self, prices): """ - get what the controller would output - controller.update to pass in reward - controller initiatlization """ # get controllers points controller = self.controller controllers_points = controller.get_points(prices) end = False energy_dict = {} rewards_dict = {} for player_name in self.players_dict: # get the points output from players player = self.players_dict.get(player_name) player_energy = player.threshold_exp_response( controllers_points.numpy()) last_player_energy = player_energy energy_dict[player_name] = player_energy # get the reward from the player's output player_min_demand = player.get_min_demand() player_max_demand = player.get_max_demand() player_reward = Reward(player_energy, prices, player_min_demand, player_max_demand) player_ideal_demands = player_reward.ideal_use_calculation() last_player_ideal = player_ideal_demands # either distance from ideal or cost distance # distance = player_reward.neg_distance_from_ideal(player_ideal_demands) # print("Ideal demands: ", player_ideal_demands) # print("Actual demands: ", player_energy) reward = player_reward.scaled_cost_distance_neg( player_ideal_demands) rewards_dict[player_name] = reward total_reward = sum(rewards_dict.values()) # reward goes back into controller as controller update controller.update(total_reward, prices, controllers_points) self._timestep = self._timestep + self._time_interval if self._timestep > self._end_timestamp: self._timestep = self._start_timestamp if self.current_iter >= self.num_iters: end = True self.current_iter += 1 return controllers_points, last_player_energy, last_player_ideal, total_reward, end
def initialize(self): self.dt = self.options['dt'] self.controllerTypeOrder = [ 'defaultRandom', 'learnedRandom', 'learned', 'default' ] self.setDefaultOptions() self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'], numRays=self.options['Sensor']['numRays']) self.Controller = ControllerObj(self.Sensor) self.Car = CarPlant(controller=self.Controller, velocity=self.options['Car']['velocity']) self.Reward = Reward( self.Sensor, collisionThreshold=self.collisionThreshold, actionCost=self.options['Reward']['actionCost'], collisionPenalty=self.options['Reward']['collisionPenalty'], raycastCost=self.options['Reward']['raycastCost']) self.setSARSA() # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName('world')) self.world = World.buildCircleWorld( percentObsDensity=self.options['World']['percentObsDensity'], circleRadius=self.options['World']['circleRadius'], nonRandom=self.options['World']['nonRandomWorld'], scale=self.options['World']['scale'], randomSeed=self.options['World']['randomSeed'], obstaclesInnerFraction=self.options['World'] ['obstaclesInnerFraction']) om.removeFromObjectModel(om.findObjectByName('robot')) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty('Scale', 3) self.frame.setProperty('Edit', True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.supervisedTrainingTime = self.options['runTime'][ 'supervisedTrainingTime'] self.learningRandomTime = self.options['runTime']['learningRandomTime'] self.learningEvalTime = self.options['runTime']['learningEvalTime'] self.defaultControllerTime = self.options['runTime'][ 'defaultControllerTime'] self.Car.setFrame(self.frame) print "Finished initialization"
def execute(self, action): #print('execute') run = Assembler(self.carID) self.output = run.getTensor() if self.output is None: term = True #print('is none') return self.getObservation(), term, 0 #20000 rew = Reward('ego', run.getTraffic(), self.output, run.getPrioritizedTraffic()) coll, term = rew.collision() if term is True: cost = coll return self.getObservation(), term, cost # over, term = rew.overtime() # if term is True: # cost = over # return self.getObservation(), term, cost traci.vehicle.setSpeedMode('ego', 0) num_vec = traci.vehicle.getIDList() for i in range(len(num_vec)): if num_vec[i] != 'ego': traci.vehicle.setLaneChangeMode(num_vec[i], 512) carID = 'ego' act = Action(carID) #print(action) if action == 0: act.decelerate() #print('dec') elif action == 1: act.accelerate() #print('acc') #elif action == 2: # act.emergency() else: act.remain() #print('rem') gap, term = rew.emergency_gap() #wary = rew.wary_before_intersection() #brake = rew.emergency_brake() #print(self.output[0:20]) #print(gap) cost = rew.optimum_speed_deviation() + gap #+ over traci.simulationStep() #print(self.getObservation()) #print(term) return self.getObservation(), term, cost
def random_encounter( agent=Agent, foe=Foe, max_turns=int, ) -> pd.DataFrame: """ :param agent: :param foe: :param max_turns: :return: """ reward = Reward(agent, foe) utility = reward.get_reward(agent, foe) # Arrays to hold encounter_stats agent_policies = [] agent_spell_slots = [] agent_shields = [] agent_healths = [] foe_healths = [] rewards = [] for i in range(max_turns): agent_policy = agent.random_action() agent_action, __ = turn(agent, agent_policy, foe) utility = reward.get_reward(agent, foe) # Collect turn data into encounter_stats agent_policies.append(agent_policy) agent_spell_slots.append(agent.states["spell slots"]) agent_shields.append(agent.states["shield"]) agent_healths.append(agent.hp) foe_healths.append(foe.hp) rewards.append(utility) if agent.hp <= 0 or foe.hp <= 0: # end encounter if either dies break encounter_stats = pd.DataFrame({ "agent actions": agent_policies, "agent spell slots": agent_spell_slots, "agent shield": agent_shields, "agent health": agent_healths, "foe health": foe_healths, "reward": rewards, }) return agent, foe, encounter_stats
def execute(self, action): #traci.vehicle.setSpeedMode('ego', 0) #print(action) run = Assembler(self.carID) # self.output = tensor.executeTensor() self.output = run.getTensor() # print(self.output) if self.output is None: term = True return self.getObservation(), term, 0 # rew = Reward('ego', self.output) rew = Reward('ego') coll, term = rew.collision() if term is True: cost = coll return self.getObservation(), term, cost traci.vehicle.setSpeedMode('ego', 0) num_vec = traci.vehicle.getIDList() for i in range(len(num_vec)): if num_vec[i] != 'ego': traci.vehicle.setLaneChangeMode(num_vec[i], 512) # print(action) carID = 'ego' act = Action(carID) if action == 0: pass #act.decelerate() #print('dec') elif action == 1: pass #act.accelerate() #print('acc') else: pass #act.remain() #print('rem') traci.simulationStep() #net = sumolib.net.readNet('huge_crossing/huge_crossing.net.xml') #tensor = Tensor('ego', net, self.conflictory_list[0], self.connection_list[0]) #self.output = tensor.executeTensor() #rew = Reward('ego', self.output) win = rew.target() cost = rew.optimum_speed_deviation() + win # + brake traci.simulationStep() return self.getObservation(), term, cost
def __init__(self, env_config={}): pomme_config = pommerman.configs.ffa_competition_env() self.reward = Reward(env_config.get("reward")) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config)
def __lookahead( agent: Agent, # Agent and foe represent the full state foe: Foe, policy_step: str, reward: Reward, discount: float) -> float: agent_copy = copy.deepcopy(agent) foe_copy = copy.deepcopy(foe) utility = reward.get_reward(agent_copy, foe_copy) turn(agent_copy, policy_step, foe_copy, "expectation") return utility + discount * reward.get_reward(agent_copy, foe_copy)
def get_balance(pk_hex): balance = 0.0 # TODO implement balance loading for block in blockchain.blocks: try: blk_data = json.loads(block.data) blk_transaction = Transaction.from_json(blk_data[0]) blk_reward = Reward.from_json(blk_data[1]) if (blk_transaction.sender_pub_key == pk_hex): balance = balance - float(blk_transaction.amnt) balance = balance - float(blk_reward.reward_amnt) if (blk_transaction.address == pk_hex): balance = balance + float(blk_transaction.amnt) if (blk_reward.recipient == pk_hex): balance = balance + blk_reward.block_reward except json.decoder.JSONDecodeError: continue return balance
def create_block(block_return, transaction): print ("mining block...") iteration = len(blockchain.blocks) prev_block = blockchain.blocks[iteration - 1] miner_secret = get_miner_secret() miner_address = get_miner_address() reward = Reward(miner_address, transaction.amnt, block_iteration = iteration, private_key = miner_secret ) data = json.dumps([str(transaction), str(reward)]) block_data = {} block_data['index'] = iteration block_data['timestamp'] = date.datetime.now() block_data['data'] = str(data) block_data['prev_hash'] = prev_block.hash block_data['hash'] = None block_data['nonce'] = 0 mined_block_data = mine(block_data) new_block = Block.from_dict(mined_block_data) print (new_block) block_return[0] = new_block return block_return
def main(_): with tf.Session() as sess: env = gym.make(ENV_NAME) np.random.seed(RANDOM_SEED) tf.set_random_seed(RANDOM_SEED) env.seed(RANDOM_SEED) print(env.observation_space) print(env.action_space) state_dim = env.observation_space.shape[0] try: action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high == -env.action_space.low) discrete = False print('Continuous Action Space') except AttributeError: action_dim = env.action_space.n action_bound = 1 discrete = True print('Discrete Action Space') actor = ActorNetwork(sess, state_dim, action_dim, action_bound, ACTOR_LEARNING_RATE, TAU) critic = CriticNetwork(sess, state_dim, action_dim, CRITIC_LEARNING_RATE, TAU, actor.get_num_trainable_vars()) noise = Noise(DELTA, SIGMA, OU_A, OU_MU) reward = Reward(REWARD_FACTOR, GAMMA)
def main(_): with tf.compat.v1.Session() as sess: env = StageWorld(LASER_BEAM, map_type) np.random.seed(RANDOM_SEED) tf.compat.v1.set_random_seed(RANDOM_SEED) state_dim = LASER_BEAM * LASER_HIST + SPEED + TARGET action_dim = ACTION #action_bound = [0.25, np.pi/6] #bounded acceleration action_bound = [0.5, np.pi / 3] #bounded velocity switch_dim = SWITCH discrete = False print('Continuous Action Space') actor = ActorNetwork(sess, state_dim, action_dim, action_bound, ACTOR_LEARNING_RATE, TAU) critic = CriticNetwork(sess, state_dim, action_dim, switch_dim, CRITIC_LEARNING_RATE, TAU, actor.get_num_trainable_vars()) noise = Noise(DELTA, SIGMA, OU_A, OU_MU) reward = Reward(REWARD_FACTOR, GAMMA) try: train(sess, env, actor, critic, noise, reward, discrete, action_bound) except KeyboardInterrupt: pass
def step(self, prices): """ - get what the controller would output - controller.update to pass in reward - controller initiatlization """ # get controllers points controller = self.controller controllers_points = controller.get_points(prices) end = False energy_dict = {} rewards_dict = {} for player_name in self.players_dict: # get the points output from players player = self.players_dict.get(player_name) player_energy = player.energy_output_simple_linear( controllers_points) energy_dict[player_name] = player_energy # get the reward from the player's output player_min_demand = player.get_min_demand() player_max_demand = player.get_max_demand() player_reward = Reward(player_energy, prices, player_min_demand, player_max_demand) player_ideal_demands = player_reward.ideal_use_calculation() distance_from_ideal = player_reward.neg_distance_from_ideal( player_ideal_demands) rewards_dict[player_name] = distance_from_ideal total_distance = sum(rewards_dict.values()) # reward goes back into controller as controller update # controller.update(reward = total_distance) self._timestep = self._timestep + self._time_interval if self._timestep > self._end_timestamp: end = True return total_distance, end
def main(_): with tf.Session() as sess: env = gym.make(ENV_NAME) np.random.seed(RANDOM_SEED) tf.set_random_seed(RANDOM_SEED) env.seed(RANDOM_SEED) print("[Main start]") print("Env observation space ::", env.observation_space, env.observation_space.shape) print("Env action space ::", env.action_space, env.action_space.shape) state_dim = env.observation_space.shape[0] try: action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high == -env.action_space.low) discrete = False print('Continuous Action Space') except AttributeError: action_dim = env.action_space.n action_bound = None discrete = True print('Discrete Action Space') actor = actorNet(sess, state_dim, action_dim, action_bound, TAU) critic = criticNet(sess, state_dim, action_dim, TAU, actor.get_num_vars()) SAVER_DIR = "./save/pend" saver = tf.train.Saver() checkpoint_path = os.path.join(SAVER_DIR, "model") ckpt = tf.train.get_checkpoint_state(SAVER_DIR) noise = Noise(DELTA, SIGMA, OU_A, OU_MU) reward = Reward(REWARD_FACTOR, GAMMA) try: if ckpt and ckpt.model_checkpoint_path: print("[Restore Model]") saver.restore(sess, ckpt.model_checkpoint_path) else: print("[Initialzie Model]") sess.run(tf.global_variables_initializer()) train(sess, env, actor, critic, noise, reward, discrete, saver, checkpoint_path) except KeyboardInterrupt: pass if GYM_MONITOR_EN: env.monitor.close()
def __init__(self): SEED = datetime.now() SEED = 0 self.n = 100 self.tolerance = 1e-2 self.bandwidth = 0.3 self.maxNegAccMin = 3.0 self.maxNegAccMax = 6.0 self.run_cnt = 3600 self.params_template = { "length": 5.0, "width": 2.0, "maxPosAcc": 3.0, "maxNegAcc": 4.5, "usualPosAcc": 2.0, "usualNegAcc": 2.5, "minGap": 2.5, "maxSpeed": 50 / 3, "headwayTime": 1.5 } self.memo = "1_3" roadnet_file = 'data/roadnet/roadnet_{0}.json'.format(self.memo) flow_file = 'data/flow/flow_{0}.json'.format(self.memo) signal_file = 'data/signal/signal_{0}.json'.format(self.memo) self.observed_file = "data/gmsa_observed_%s.txt" % self.memo self.rand = Random(SEED) self.accepted = [] self.kde = KernelDensity(self.bandwidth) self.reward = Reward() self.gen = DataGenerator(flow_file, signal_file) self.eng = engine.Engine(1.0, 8, True, True) self.eng.load_roadnet(roadnet_file) self.f = open(self.observed_file)
def main(_): with tf.Session() as sess: env = gym.make(ENV_NAME) np.random.seed(RANDOM_SEED) tf.set_random_seed(RANDOM_SEED) env.seed(RANDOM_SEED) print(env.observation_space) print(env.action_space) state_dim = env.observation_space.shape[0] try: action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high == -env.action_space.low) discrete = False print('Continuous Action Space') except: #原来的对象抛出处理不了这里的异常,此处更换为全部处理 action_dim = env.action_space.n action_bound = 1 discrete = True print('Discrete Action Space') actor = ActorNetwork(sess, state_dim, action_dim, action_bound, ACTOR_LEARNING_RATE, TAU) critic = CriticNetwork(sess, state_dim, action_dim, CRITIC_LEARNING_RATE, TAU, actor.get_num_trainable_vars()) noise = Noise(DELTA, SIGMA, OU_A, OU_MU) reward = Reward(REWARD_FACTOR, GAMMA) if GYM_MONITOR_EN: if not RENDER_ENV: gym.wrappers.Monitor(env, MONITOR_DIR, video_callable=False, force=True) #此处更换为新版本 # env.monitor.start(MONITOR_DIR, video_callable=False, force=True) else: gym.wrappers.Monitor(env, force=True) #此处更换为新版本 # env.monitor.start(MONITOR_DIR, force=True) try: train(sess, env, actor, critic, noise, reward, discrete) except KeyboardInterrupt: pass if GYM_MONITOR_EN: env.monitor.close()
def initialize(self): self.dt = self.options["dt"] self.controllerTypeOrder = ["defaultRandom", "learnedRandom", "learned", "default"] self.setDefaultOptions() self.Sensor = SensorObj( rayLength=self.options["Sensor"]["rayLength"], numRays=self.options["Sensor"]["numRays"] ) self.Controller = ControllerObj(self.Sensor) self.Car = CarPlant(controller=self.Controller, velocity=self.options["Car"]["velocity"]) self.Reward = Reward( self.Sensor, collisionThreshold=self.collisionThreshold, actionCost=self.options["Reward"]["actionCost"], collisionPenalty=self.options["Reward"]["collisionPenalty"], raycastCost=self.options["Reward"]["raycastCost"], ) self.setSARSA() # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName("world")) self.world = World.buildCircleWorld( percentObsDensity=self.options["World"]["percentObsDensity"], circleRadius=self.options["World"]["circleRadius"], nonRandom=self.options["World"]["nonRandomWorld"], scale=self.options["World"]["scale"], randomSeed=self.options["World"]["randomSeed"], obstaclesInnerFraction=self.options["World"]["obstaclesInnerFraction"], ) om.removeFromObjectModel(om.findObjectByName("robot")) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty("Scale", 3) self.frame.setProperty("Edit", True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.supervisedTrainingTime = self.options["runTime"]["supervisedTrainingTime"] self.learningRandomTime = self.options["runTime"]["learningRandomTime"] self.learningEvalTime = self.options["runTime"]["learningEvalTime"] self.defaultControllerTime = self.options["runTime"]["defaultControllerTime"] self.Car.setFrame(self.frame) print "Finished initialization"
def __init__(self): # initialize baxter node in ROS rospy.init_node('baxter_node') # create instances of baxter_interfaces's limb class self.limb_right = baxter_interface.Limb('right') self.limb_left = baxter_interface.Limb('left') # load the gazebo model for simulation block_sample.load_gazebo_models() # create a new action for both arms self.action = Action(self.limb_right.joint_angles(), self.limb_left.joint_angles()) self.neutral() # initialize the reward with goal location, param 2 is block location in sample # TODO add a 2nd block, have each hand work towards their own goal. # TODO use input from robot vision to get goal position self.right_reward = Reward(self.limb_right.endpoint_pose(), (0.6725, 0.1265, 0.7825)) self.left_reward = Reward(self.limb_left.endpoint_pose(), (0.6725, 0.1265, 0.7825))
def main(_): with tf.Session() as sess: env = gym.make(ENV_NAME) np.random.seed(RANDOM_SEED) tf.set_random_seed(RANDOM_SEED) env.seed(RANDOM_SEED) print env.observation_space print env.action_space state_dim = env.observation_space.shape[0] try: action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high == -env.action_space.low) discrete = False print "Continuous Action Space" except AttributeError: action_dim = env.action_space.n action_bound = 1 discrete = True print "Discrete Action Space" actor = ActorNetwork(sess, state_dim, action_dim, action_bound, ACTOR_LEARNING_RATE, TAU) critic = CriticNetwork(sess, state_dim, action_dim, CRITIC_LEARNING_RATE, TAU, actor.get_num_trainable_vars()) noise = Noise(DELTA, SIGMA, OU_A, OU_MU) reward = Reward(REWARD_FACTOR, GAMMA) if GYM_MONITOR_EN: if not RENDER_ENV: env = wrappers.Monitor(env, MONITOR_DIR, force=True) else: env = wrappers.Monitor(env, MONITOR_DIR, force=True) try: train(sess, env, actor, critic, noise, reward, discrete) except KeyboardInterrupt: pass #if GYM_MONITOR_EN: #env.monitor.close() env.close() gym.upload(MONITOR_DIR, api_key="sk_JObiOSHpRjw48FpWvI1GA")
def __init__(self, env_config=None): pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v self.reward = Reward(env_config.get("reward")) else: self.reward = Reward() print("Pommerman Config:", pomme_config['env_kwargs']) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config)
def initialize(self): self.dt = self.options['dt'] self.controllerTypeOrder = ['defaultRandom', 'learnedRandom', 'learned', 'default'] self.setDefaultOptions() self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'], numRays=self.options['Sensor']['numRays']) self.Controller = ControllerObj(self.Sensor) self.Car = CarPlant( controller=self.Controller, velocity=self.options['Car']['velocity']) self.Reward = Reward(self.Sensor, collisionThreshold=self.collisionThreshold, actionCost=self.options['Reward']['actionCost'], collisionPenalty=self.options['Reward']['collisionPenalty'], raycastCost=self.options['Reward']['raycastCost']) self.setSARSA() # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName('world')) self.world = World.buildCircleWorld(percentObsDensity=self.options['World']['percentObsDensity'], circleRadius=self.options['World']['circleRadius'], nonRandom=self.options['World']['nonRandomWorld'], scale=self.options['World']['scale'], randomSeed=self.options['World']['randomSeed'], obstaclesInnerFraction=self.options['World']['obstaclesInnerFraction']) om.removeFromObjectModel(om.findObjectByName('robot')) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty('Scale', 3) self.frame.setProperty('Edit', True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.supervisedTrainingTime = self.options['runTime']['supervisedTrainingTime'] self.learningRandomTime = self.options['runTime']['learningRandomTime'] self.learningEvalTime = self.options['runTime']['learningEvalTime'] self.defaultControllerTime = self.options['runTime']['defaultControllerTime'] self.Car.setFrame(self.frame) print "Finished initialization"
def execute(self, action): run = Assembler(self.carID) self.output = run.getTensor() #print(run.getTensor()) if self.output is None: term = True return self.getObservation(), term, 0 rew = Reward('ego', run.getTraffic(), self.output) coll, term = rew.collision() if term is True: cost = coll return self.getObservation(), term, cost traci.vehicle.setSpeedMode('ego', 0) num_vec = traci.vehicle.getIDList() for i in range(len(num_vec)): if num_vec[i] != 'ego': traci.vehicle.setLaneChangeMode(num_vec[i], 512) ### #if i % 2 == 0: #traci.vehicle.setSpeedMode(num_vec[i], 23) carID = 'ego' act = Action(carID) if action == 0: act.decelerate() # print('dec') elif action == 1: act.accelerate() # print('acc') else: act.remain() # print('rem') gap, term = rew.emergency_gap() wary = rew.wary_before_intersection() #win = rew.target() brake = rew.emergency_brake() cost = rew.optimum_speed_deviation() + brake + gap + wary traci.simulationStep() #print(self.output) return self.getObservation(), term, cost
def update_screen(ai_settings, screen, stats, sb, ship, aliens, bullets, play_button, exit_button, start_background, die_aliens): # 更新屏幕上的图像,并且换到新屏幕 # 每次循环时都重绘屏幕 screen.fill(ai_settings.bg_color) image_path_list = ['', 'images/reward1.bmp', 'images/reward2.bmp', 'images/reward3.bmp', 'images/reward4.bmp', 'images/wingame.bmp'] if stats.score == 800 or stats.score == 1600 or stats.score == 2400 or stats.score == 3200: if stats.level < 5: # 创建一个奖品 reward = Reward(screen, ai_settings, image_path_list[stats.level]) reward.blitme() ship.blitme() sb.show_score() else: # 创建通关结束画面 reward = Reward(screen, ai_settings, image_path_list[stats.level]) reward.blitme() # 如果游戏处于非活动状态,就绘制Play按钮 if not stats.game_active: exit_button.draw_button() play_button.draw_button() update_reward(ai_settings, sb, screen, ship, reward, stats, bullets, aliens) else: ship.blitme() aliens.draw(screen) sb.show_score() # 在飞船和外星人后面重绘所有的子弹 for bullet in bullets.sprites(): bullet.draw_bullet() for die_alien in die_aliens: if die_alien.down_index == 0: pass if die_alien.down_index > 7: die_aliens.remove(die_alien) continue # 显示碰撞图片 screen.blit(pygame.image.load('images/bump.png'), die_alien.rect) die_alien.down_index += 1 # 让最近绘制的屏幕可见 pygame.display.flip()
def create_start_block(self, transaction): iteration = len(self.blockchain.blocks) prev_block = self.blockchain.blocks[iteration - 1] reward = Reward(self.miner_address, self.transaction.amnt, block_iteration=iteration, private_key=self.miner_secret) data = json.dumps([str(transaction), str(reward)]) block_data = {} block_data['index'] = iteration block_data['timestamp'] = date.datetime.now() block_data['data'] = str(data) block_data['prev_hash'] = prev_block.hash block_data['hash'] = None block_data['nonce'] = 0 self.unmined_block_dict = block_data
def main(_): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3) # with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: with tf.Session() as sess: env = StageWorld(LASER_BEAM) np.random.seed(RANDOM_SEED) tf.set_random_seed(RANDOM_SEED) state_dim = LASER_BEAM * LASER_HIST + SPEED + TARGET action_dim = ACTION action_bound = [0.5, np.pi / 3] switch_dim = SWITCH discrete = False print('Continuous Action Space') with tf.name_scope("Actor"): actor = ActorNetwork(sess, state_dim, action_dim, action_bound, ACTOR_LEARNING_RATE, TAU) with tf.name_scope("Critic"): critic = CriticNetwork(sess, state_dim, action_dim, switch_dim, CRITIC_LEARNING_RATE, TAU, actor.get_num_trainable_vars(), baseline_rate=10., control_variance_flag=CONTROL_VARIANCE) noise = Noise(DELTA, SIGMA, OU_A, OU_MU) reward = Reward(REWARD_FACTOR, GAMMA) try: train(sess, env, actor, critic, noise, reward, discrete, action_bound) except KeyboardInterrupt: pass
class Simulator(object): def __init__(self, percentObsDensity=20, endTime=40, randomizeControl=False, nonRandomWorld=False, circleRadius=0.7, worldScale=1.0, supervisedTrainingTime=500, autoInitialize=True, verbose=True, sarsaType="discrete"): self.verbose = verbose self.randomizeControl = randomizeControl self.startSimTime = time.time() self.collisionThreshold = 1.3 self.randomSeed = 5 self.Sarsa_numInnerBins = 4 self.Sarsa_numOuterBins = 4 self.Sensor_rayLength = 8 self.sarsaType = sarsaType self.percentObsDensity = percentObsDensity self.supervisedTrainingTime = 10 self.learningRandomTime = 10 self.learningEvalTime = 10 self.defaultControllerTime = 10 self.nonRandomWorld = nonRandomWorld self.circleRadius = circleRadius self.worldScale = worldScale # create the visualizer object self.app = ConsoleApp() # view = app.createView(useGrid=False) self.view = self.app.createView(useGrid=False) self.initializeOptions() self.initializeColorMap() if autoInitialize: self.initialize() def initializeOptions(self): self.options = dict() self.options['Reward'] = dict() self.options['Reward']['actionCost'] = 0.1 self.options['Reward']['collisionPenalty'] = 100.0 self.options['Reward']['raycastCost'] = 20.0 self.options['SARSA'] = dict() self.options['SARSA']['type'] = "discrete" self.options['SARSA']['lam'] = 0.7 self.options['SARSA']['alphaStepSize'] = 0.2 self.options['SARSA']['useQLearningUpdate'] = False self.options['SARSA']['epsilonGreedy'] = 0.2 self.options['SARSA']['burnInTime'] = 500 self.options['SARSA']['epsilonGreedyExponent'] = 0.3 self.options['SARSA'][ 'exponentialDiscountFactor'] = 0.05 #so gamma = e^(-rho*dt) self.options['SARSA']['numInnerBins'] = 5 self.options['SARSA']['numOuterBins'] = 4 self.options['SARSA']['binCutoff'] = 0.5 self.options['World'] = dict() self.options['World']['obstaclesInnerFraction'] = 0.85 self.options['World']['randomSeed'] = 40 self.options['World']['percentObsDensity'] = 7.5 self.options['World']['nonRandomWorld'] = True self.options['World']['circleRadius'] = 1.75 self.options['World']['scale'] = 1.0 self.options['Sensor'] = dict() self.options['Sensor']['rayLength'] = 10 self.options['Sensor']['numRays'] = 20 self.options['Car'] = dict() self.options['Car']['velocity'] = 12 self.options['dt'] = 0.05 self.options['runTime'] = dict() self.options['runTime']['supervisedTrainingTime'] = 10 self.options['runTime']['learningRandomTime'] = 10 self.options['runTime']['learningEvalTime'] = 10 self.options['runTime']['defaultControllerTime'] = 10 def setDefaultOptions(self): defaultOptions = dict() defaultOptions['Reward'] = dict() defaultOptions['Reward']['actionCost'] = 0.1 defaultOptions['Reward']['collisionPenalty'] = 100.0 defaultOptions['Reward']['raycastCost'] = 20.0 defaultOptions['SARSA'] = dict() defaultOptions['SARSA']['type'] = "discrete" defaultOptions['SARSA']['lam'] = 0.7 defaultOptions['SARSA']['alphaStepSize'] = 0.2 defaultOptions['SARSA']['useQLearningUpdate'] = False defaultOptions['SARSA']['useSupervisedTraining'] = True defaultOptions['SARSA']['epsilonGreedy'] = 0.2 defaultOptions['SARSA']['burnInTime'] = 500 defaultOptions['SARSA']['epsilonGreedyExponent'] = 0.3 defaultOptions['SARSA'][ 'exponentialDiscountFactor'] = 0.05 #so gamma = e^(-rho*dt) defaultOptions['SARSA']['numInnerBins'] = 5 defaultOptions['SARSA']['numOuterBins'] = 4 defaultOptions['SARSA']['binCutoff'] = 0.5 defaultOptions['SARSA']['forceDriveStraight'] = True defaultOptions['World'] = dict() defaultOptions['World']['obstaclesInnerFraction'] = 0.85 defaultOptions['World']['randomSeed'] = 40 defaultOptions['World']['percentObsDensity'] = 7.5 defaultOptions['World']['nonRandomWorld'] = True defaultOptions['World']['circleRadius'] = 1.75 defaultOptions['World']['scale'] = 1.0 defaultOptions['Sensor'] = dict() defaultOptions['Sensor']['rayLength'] = 10 defaultOptions['Sensor']['numRays'] = 20 defaultOptions['Car'] = dict() defaultOptions['Car']['velocity'] = 12 defaultOptions['dt'] = 0.05 defaultOptions['runTime'] = dict() defaultOptions['runTime']['supervisedTrainingTime'] = 10 defaultOptions['runTime']['learningRandomTime'] = 10 defaultOptions['runTime']['learningEvalTime'] = 10 defaultOptions['runTime']['defaultControllerTime'] = 10 for k in defaultOptions: self.options.setdefault(k, defaultOptions[k]) for k in defaultOptions: if not isinstance(defaultOptions[k], dict): continue for j in defaultOptions[k]: self.options[k].setdefault(j, defaultOptions[k][j]) def initializeColorMap(self): self.colorMap = dict() self.colorMap['defaultRandom'] = [0, 0, 1] self.colorMap['learnedRandom'] = [1.0, 0.54901961, 0.] # this is orange self.colorMap['learned'] = [0.58039216, 0.0, 0.82745098] # this is yellow self.colorMap['default'] = [0, 1, 0] def initialize(self): self.dt = self.options['dt'] self.controllerTypeOrder = [ 'defaultRandom', 'learnedRandom', 'learned', 'default' ] self.setDefaultOptions() self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'], numRays=self.options['Sensor']['numRays']) self.Controller = ControllerObj(self.Sensor) self.Car = CarPlant(controller=self.Controller, velocity=self.options['Car']['velocity']) self.Reward = Reward( self.Sensor, collisionThreshold=self.collisionThreshold, actionCost=self.options['Reward']['actionCost'], collisionPenalty=self.options['Reward']['collisionPenalty'], raycastCost=self.options['Reward']['raycastCost']) self.setSARSA() # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName('world')) self.world = World.buildCircleWorld( percentObsDensity=self.options['World']['percentObsDensity'], circleRadius=self.options['World']['circleRadius'], nonRandom=self.options['World']['nonRandomWorld'], scale=self.options['World']['scale'], randomSeed=self.options['World']['randomSeed'], obstaclesInnerFraction=self.options['World'] ['obstaclesInnerFraction']) om.removeFromObjectModel(om.findObjectByName('robot')) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty('Scale', 3) self.frame.setProperty('Edit', True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.supervisedTrainingTime = self.options['runTime'][ 'supervisedTrainingTime'] self.learningRandomTime = self.options['runTime']['learningRandomTime'] self.learningEvalTime = self.options['runTime']['learningEvalTime'] self.defaultControllerTime = self.options['runTime'][ 'defaultControllerTime'] self.Car.setFrame(self.frame) print "Finished initialization" def setSARSA(self, type=None): if type is None: type = self.options['SARSA']['type'] if type == "discrete": self.Sarsa = SARSADiscrete( sensorObj=self.Sensor, actionSet=self.Controller.actionSet, collisionThreshold=self.collisionThreshold, alphaStepSize=self.options['SARSA']['alphaStepSize'], useQLearningUpdate=self.options['SARSA']['useQLearningUpdate'], lam=self.options['SARSA']['lam'], numInnerBins=self.options['SARSA']['numInnerBins'], numOuterBins=self.options['SARSA']['numOuterBins'], binCutoff=self.options['SARSA']['binCutoff'], burnInTime=self.options['SARSA']['burnInTime'], epsilonGreedy=self.options['SARSA']['epsilonGreedy'], epsilonGreedyExponent=self.options['SARSA'] ['epsilonGreedyExponent'], forceDriveStraight=self.options['SARSA']['forceDriveStraight']) elif type == "continuous": self.Sarsa = SARSAContinuous( sensorObj=self.Sensor, actionSet=self.Controller.actionSet, lam=self.options['SARSA']['lam'], alphaStepSize=self.options['SARSA']['alphaStepSize'], collisionThreshold=self.collisionThreshold, burnInTime=self.options['SARSA']['burnInTime'], epsilonGreedy=self.options['SARSA']['epsilonGreedy'], epsilonGreedyExponent=self.options['SARSA'] ['epsilonGreedyExponent']) else: raise ValueError( "sarsa type must be either discrete or continuous") def runSingleSimulation(self, updateQValues=True, controllerType='default', simulationCutoff=None): if self.verbose: print "using QValue based controller = ", useQValueController self.setCollisionFreeInitialState() currentCarState = np.copy(self.Car.state) nextCarState = np.copy(self.Car.state) self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) nextRaycast = np.zeros(self.Sensor.numRays) self.Sarsa.resetElibilityTraces() # record the reward data runData = dict() reward = 0 discountedReward = 0 avgReward = 0 startIdx = self.counter while (self.counter < self.numTimesteps - 1): idx = self.counter currentTime = self.t[idx] self.stateOverTime[idx, :] = currentCarState x = self.stateOverTime[idx, 0] y = self.stateOverTime[idx, 1] theta = self.stateOverTime[idx, 2] self.setRobotFrameState(x, y, theta) # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) self.raycastData[idx, :] = currentRaycast S_current = (currentCarState, currentRaycast) if controllerType not in self.colorMap.keys(): print raise ValueError("controller of type " + controllerType + " not supported") if controllerType in ["default", "defaultRandom"]: if controllerType == "defaultRandom": controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=True) else: controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False) if controllerType in ["learned", "learnedRandom"]: if controllerType == "learned": randomizeControl = False else: randomizeControl = True counterForGreedyDecay = self.counter - self.idxDict[ "learnedRandom"] controlInput, controlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy( S_current, counter=counterForGreedyDecay, randomize=randomizeControl) self.emptyQValue[idx] = emptyQValue if emptyQValue and self.options['SARSA'][ 'useSupervisedTraining']: controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False) self.controlInputData[idx] = controlInput nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt) # want to compute nextRaycast so we can do the SARSA algorithm x = nextCarState[0] y = nextCarState[1] theta = nextCarState[2] self.setRobotFrameState(x, y, theta) nextRaycast = self.Sensor.raycastAll(self.frame) # Compute the next control input S_next = (nextCarState, nextRaycast) if controllerType in ["default", "defaultRandom"]: if controllerType == "defaultRandom": nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=True) else: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False) if controllerType in ["learned", "learnedRandom"]: if controllerType == "learned": randomizeControl = False else: randomizeControl = True counterForGreedyDecay = self.counter - self.idxDict[ "learnedRandom"] nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy( S_next, counter=counterForGreedyDecay, randomize=randomizeControl) if emptyQValue and self.options['SARSA'][ 'useSupervisedTraining']: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False) # if useQValueController: # nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(S_next) # # if not useQValueController or emptyQValue: # nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(nextCarState, currentTime, self.frame, # raycastDistance=nextRaycast) # compute the reward reward = self.Reward.computeReward(S_next, controlInput) self.rewardData[idx] = reward discountedReward += self.Sarsa.gamma**(self.counter - startIdx) * reward avgReward += reward ###### SARSA update if updateQValues: self.Sarsa.sarsaUpdate(S_current, controlInputIdx, reward, S_next, nextControlInputIdx) #bookkeeping currentCarState = nextCarState currentRaycast = nextRaycast self.counter += 1 # break if we are in collision if self.checkInCollision(nextRaycast): if self.verbose: print "Had a collision, terminating simulation" break if self.counter >= simulationCutoff: break # fill in the last state by hand self.stateOverTime[self.counter, :] = currentCarState self.raycastData[self.counter, :] = currentRaycast # return the total reward avgRewardNoCollisionPenalty = avgReward - reward avgReward = avgReward * 1.0 / max(1, self.counter - startIdx) avgRewardNoCollisionPenalty = avgRewardNoCollisionPenalty * 1.0 / max( 1, self.counter - startIdx) # this extra multiplication is so that it is in the same "units" as avgReward runData['discountedReward'] = discountedReward * (1 - self.Sarsa.gamma) runData['avgReward'] = avgReward runData['avgRewardNoCollisionPenalty'] = avgRewardNoCollisionPenalty # this just makes sure we don't get stuck in an infinite loop. if startIdx == self.counter: self.counter += 1 return runData def setNumpyRandomSeed(self, seed=1): np.random.seed(seed) def runBatchSimulation(self, endTime=None, dt=0.05): # for use in playback self.dt = self.options['dt'] self.Sarsa.setDiscountFactor(dt) self.endTime = self.supervisedTrainingTime + self.learningRandomTime + self.learningEvalTime + self.defaultControllerTime self.t = np.arange(0.0, self.endTime, dt) maxNumTimesteps = np.size(self.t) self.stateOverTime = np.zeros((maxNumTimesteps + 1, 3)) self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays)) self.controlInputData = np.zeros(maxNumTimesteps + 1) self.rewardData = np.zeros(maxNumTimesteps + 1) self.emptyQValue = np.zeros(maxNumTimesteps + 1, dtype='bool') self.numTimesteps = maxNumTimesteps self.controllerTypeOrder = [ 'defaultRandom', 'learnedRandom', 'learned', 'default' ] self.counter = 0 self.simulationData = [] self.initializeStatusBar() self.idxDict = dict() numRunsCounter = 0 # three while loops for different phases of simulation, supervisedTraining, learning, default self.idxDict['defaultRandom'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.supervisedTrainingTime / dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.supervisedTrainingTime / dt) and self.counter < self.numTimesteps): self.printStatusBar() useQValueController = False startIdx = self.counter runData = self.runSingleSimulation(updateQValues=True, controllerType='defaultRandom', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "defaultRandom" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict['learnedRandom'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.learningRandomTime / dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.learningRandomTime / dt) and self.counter < self.numTimesteps): self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(updateQValues=True, controllerType='learnedRandom', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "learnedRandom" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict['learned'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.learningEvalTime / dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.learningEvalTime / dt) and self.counter < self.numTimesteps): self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(updateQValues=False, controllerType='learned', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "learned" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict['default'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.defaultControllerTime / dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.defaultControllerTime / dt) and self.counter < self.numTimesteps - 1): self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(updateQValues=False, controllerType='default', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "default" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) # BOOKKEEPING # truncate stateOverTime, raycastData, controlInputs to be the correct size self.numTimesteps = self.counter + 1 self.stateOverTime = self.stateOverTime[0:self.counter + 1, :] self.raycastData = self.raycastData[0:self.counter + 1, :] self.controlInputData = self.controlInputData[0:self.counter + 1] self.rewardData = self.rewardData[0:self.counter + 1] self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime def initializeStatusBar(self): self.numTicks = 10 self.nextTickComplete = 1.0 / float(self.numTicks) self.nextTickIdx = 1 print "Simulation percentage complete: (", self.numTicks, " # is complete)" def printStatusBar(self): fractionDone = float(self.counter) / float(self.numTimesteps) if fractionDone > self.nextTickComplete: self.nextTickIdx += 1 self.nextTickComplete += 1.0 / self.numTicks timeSoFar = time.time() - self.startSimTime estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0 print "#" * self.nextTickIdx, "-" * ( self.numTicks - self.nextTickIdx ), "estimated", estimatedTimeLeft_minutes, "minutes left" def setCollisionFreeInitialState(self): tol = 5 while True: x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol, 1)[0] y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol, 1)[0] theta = np.random.uniform(0, 2 * np.pi, 1)[0] self.Car.setCarState(x, y, theta) self.setRobotFrameState(x, y, theta) if not self.checkInCollision(): break # if self.checkInCollision(): # print "IN COLLISION" # else: # print "COLLISION FREE" return x, y, theta def setupPlayback(self): self.timer = TimerCallback(targetFps=30) self.timer.callback = self.tick playButtonFps = 1.0 / self.dt print "playButtonFPS", playButtonFps self.playTimer = TimerCallback(targetFps=playButtonFps) self.playTimer.callback = self.playTimerCallback self.sliderMovedByPlayTimer = False panel = QtGui.QWidget() l = QtGui.QHBoxLayout(panel) playButton = QtGui.QPushButton('Play/Pause') playButton.connect('clicked()', self.onPlayButton) slider = QtGui.QSlider(QtCore.Qt.Horizontal) slider.connect('valueChanged(int)', self.onSliderChanged) self.sliderMax = self.numTimesteps slider.setMaximum(self.sliderMax) self.slider = slider l.addWidget(playButton) l.addWidget(slider) w = QtGui.QWidget() l = QtGui.QVBoxLayout(w) l.addWidget(self.view) l.addWidget(panel) w.showMaximized() self.frame.connectFrameModified(self.updateDrawIntersection) self.updateDrawIntersection(self.frame) applogic.resetCamera(viewDirection=[0.2, 0, -1]) self.view.showMaximized() self.view.raise_() panel = screengrabberpanel.ScreenGrabberPanel(self.view) panel.widget.show() elapsed = time.time() - self.startSimTime simRate = self.counter / elapsed print "Total run time", elapsed print "Ticks (Hz)", simRate print "Number of steps taken", self.counter self.app.start() def run(self, launchApp=True): self.counter = 1 self.runBatchSimulation() # self.Sarsa.plotWeights() if launchApp: self.setupPlayback() def updateDrawIntersection(self, frame): origin = np.array(frame.transform.GetPosition()) #print "origin is now at", origin d = DebugData() sliderIdx = self.slider.value controllerType = self.getControllerTypeFromCounter(sliderIdx) colorMaxRange = self.colorMap[controllerType] # if the QValue was empty then color it green if self.emptyQValue[sliderIdx]: colorMaxRange = [1, 1, 0] # this is yellow for i in xrange(self.Sensor.numRays): ray = self.Sensor.rays[:, i] rayTransformed = np.array(frame.transform.TransformNormal(ray)) #print "rayTransformed is", rayTransformed intersection = self.Sensor.raycast( self.locator, origin, origin + rayTransformed * self.Sensor.rayLength) if intersection is not None: d.addLine(origin, intersection, color=[1, 0, 0]) else: d.addLine(origin, origin + rayTransformed * self.Sensor.rayLength, color=colorMaxRange) vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255') #camera = self.view.camera() #camera.SetFocalPoint(frame.transform.GetPosition()) #camera.SetPosition(frame.transform.TransformPoint((-30,0,10))) def getControllerTypeFromCounter(self, counter): name = self.controllerTypeOrder[0] for controllerType in self.controllerTypeOrder[1:]: if counter >= self.idxDict[controllerType]: name = controllerType return name def setRobotFrameState(self, x, y, theta): t = vtk.vtkTransform() t.Translate(x, y, 0.0) t.RotateZ(np.degrees(theta)) self.robot.getChildFrame().copyFrame(t) # returns true if we are in collision def checkInCollision(self, raycastDistance=None): if raycastDistance is None: self.setRobotFrameState(self.Car.state[0], self.Car.state[1], self.Car.state[2]) raycastDistance = self.Sensor.raycastAll(self.frame) if np.min(raycastDistance) < self.collisionThreshold: return True else: return False def tick(self): #print timer.elapsed #simulate(t.elapsed) x = np.sin(time.time()) y = np.cos(time.time()) self.setRobotFrameState(x, y, 0.0) if (time.time() - self.playTime) > self.endTime: self.playTimer.stop() def tick2(self): newtime = time.time() - self.playTime print time.time() - self.playTime x = np.sin(newtime) y = np.cos(newtime) self.setRobotFrameState(x, y, 0.0) # just increment the slider, stop the timer if we get to the end def playTimerCallback(self): self.sliderMovedByPlayTimer = True currentIdx = self.slider.value nextIdx = currentIdx + 1 self.slider.setSliderPosition(nextIdx) if currentIdx >= self.sliderMax: print "reached end of tape, stopping playTime" self.playTimer.stop() def onSliderChanged(self, value): if not self.sliderMovedByPlayTimer: self.playTimer.stop() numSteps = len(self.stateOverTime) idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax))) idx = min(idx, numSteps - 1) x, y, theta = self.stateOverTime[idx] self.setRobotFrameState(x, y, theta) self.sliderMovedByPlayTimer = False def onPlayButton(self): if self.playTimer.isActive(): self.onPauseButton() return print 'play' self.playTimer.start() self.playTime = time.time() def onPauseButton(self): print 'pause' self.playTimer.stop() def computeRunStatistics(self): numRuns = len(self.simulationData) runStart = np.zeros(numRuns) runDuration = np.zeros(numRuns) grid = np.arange(1, numRuns + 1) discountedReward = np.zeros(numRuns) avgReward = np.zeros(numRuns) avgRewardNoCollisionPenalty = np.zeros(numRuns) idxMap = dict() for controllerType, color in self.colorMap.iteritems(): idxMap[controllerType] = np.zeros(numRuns, dtype=bool) for idx, val in enumerate(self.simulationData): runStart[idx] = val['startIdx'] runDuration[idx] = val['duration'] discountedReward[idx] = val['discountedReward'] avgReward[idx] = val['avgReward'] avgRewardNoCollisionPenalty[idx] = val[ 'avgRewardNoCollisionPenalty'] controllerType = val['controllerType'] idxMap[controllerType][idx] = True def plotRunData(self, controllerTypeToPlot=None, showPlot=True, onlyLearned=False): if controllerTypeToPlot == None: controllerTypeToPlot = self.colorMap.keys() if onlyLearned: controllerTypeToPlot = ['learnedRandom', 'learned'] numRuns = len(self.simulationData) runStart = np.zeros(numRuns) runDuration = np.zeros(numRuns) grid = np.arange(1, numRuns + 1) discountedReward = np.zeros(numRuns) avgReward = np.zeros(numRuns) avgRewardNoCollisionPenalty = np.zeros(numRuns) idxMap = dict() for controllerType, color in self.colorMap.iteritems(): idxMap[controllerType] = np.zeros(numRuns, dtype=bool) for idx, val in enumerate(self.simulationData): runStart[idx] = val['startIdx'] runDuration[idx] = val['duration'] discountedReward[idx] = val['discountedReward'] avgReward[idx] = val['avgReward'] avgRewardNoCollisionPenalty[idx] = val[ 'avgRewardNoCollisionPenalty'] controllerType = val['controllerType'] idxMap[controllerType][idx] = True # usedQValueController[idx] = (val['controllerType'] == "QValue") # usedDefaultController[idx] = (val['controllerType'] == "default") # usedDefaultRandomController[idx] = (val['controllerType'] == "defaultRandom") # usedQValueRandomController[idx] = (val['controllerType'] == "QValueRandom") self.runStatistics = dict() dataMap = { 'duration': runDuration, 'discountedReward': discountedReward, 'avgReward': avgReward, 'avgRewardNoCollisionPenalty': avgRewardNoCollisionPenalty } def computeRunStatistics(dataMap): for controllerType, idx in idxMap.iteritems(): d = dict() for dataName, dataSet in dataMap.iteritems(): # average the appropriate values in dataset d[dataName] = np.sum( dataSet[idx]) / (1.0 * np.size(dataSet[idx])) self.runStatistics[controllerType] = d computeRunStatistics(dataMap) if not showPlot: return plt.figure() # # idxDefaultRandom = np.where(usedDefaultRandomController==True)[0] # idxQValueController = np.where(usedQValueController==True)[0] # idxQValueControllerRandom = np.where(usedQValueControllerRandom==True)[0] # idxDefault = np.where(usedDefaultController==True)[0] # # plotData = dict() # plotData['defaultRandom'] = {'idx': idxDefaultRandom, 'color': 'b'} # plotData['QValue'] = {'idx': idxQValueController, 'color': 'y'} # plotData['default'] = {'idx': idxDefault, 'color': 'g'} def scatterPlot(dataToPlot): for controllerType in controllerTypeToPlot: idx = idxMap[controllerType] plt.scatter(grid[idx], dataToPlot[idx], color=self.colorMap[controllerType]) def barPlot(dataName): plt.title(dataName) barWidth = 0.5 barCounter = 0 index = np.arange(len(controllerTypeToPlot)) for controllerType in controllerTypeToPlot: val = self.runStatistics[controllerType] plt.bar(barCounter, val[dataName], barWidth, color=self.colorMap[controllerType], label=controllerType) barCounter += 1 plt.xticks(index + barWidth / 2.0, controllerTypeToPlot) plt.subplot(4, 1, 1) plt.title('run duration') scatterPlot(runDuration) # for controllerType, idx in idxMap.iteritems(): # plt.scatter(grid[idx], runDuration[idx], color=self.colorMap[controllerType]) # plt.scatter(runStart[idxDefaultRandom], runDuration[idxDefaultRandom], color='b') # plt.scatter(runStart[idxQValueController], runDuration[idxQValueController], color='y') # plt.scatter(runStart[idxDefault], runDuration[idxDefault], color='g') plt.xlabel('run #') plt.ylabel('episode duration') plt.subplot(2, 1, 2) plt.title('discounted reward') scatterPlot(discountedReward) # for key, val in plotData.iteritems(): # plt.scatter(grid[idx], discountedReward[idx], color=self.colorMap[controllerType]) # plt.subplot(3,1,3) # plt.title("average reward") # scatterPlot(avgReward) # for key, val in plotData.iteritems(): # plt.scatter(grid[val['idx']],avgReward[val['idx']], color=val['color']) # plt.subplot(4,1,4) # plt.title("average reward no collision penalty") # scatterPlot(avgRewardNoCollisionPenalty) # # for key, val in plotData.iteritems(): # # plt.scatter(grid[val['idx']],avgRewardNoCollisionPenalty[val['idx']], color=val['color']) ## plot summary statistics plt.figure() plt.subplot(2, 1, 1) barPlot("duration") plt.subplot(2, 1, 2) barPlot("discountedReward") # plt.subplot(3,1,3) # barPlot("avgReward") # # plt.subplot(4,1,4) # barPlot("avgRewardNoCollisionPenalty") plt.show() def plotMultipleRunData(self, simList, toPlot=['duration', 'discountedReward'], controllerType='learned'): plt.figure() numPlots = len(toPlot) grid = np.arange(len(simList)) def plot(fieldToPlot, plotNum): plt.subplot(numPlots, 1, plotNum) plt.title(fieldToPlot) val = 0 * grid barWidth = 0.5 barCounter = 0 for idx, sim in enumerate(simList): value = sim.runStatistics[controllerType][fieldToPlot] plt.bar(idx, value, barWidth) counter = 1 for fieldToPlot in toPlot: plot(fieldToPlot, counter) counter += 1 plt.show() def saveToFile(self, filename): # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime filename = 'data/' + filename + ".out" my_shelf = shelve.open(filename, 'n') my_shelf['options'] = self.options if self.options['SARSA']['type'] == "discrete": my_shelf['SARSA_QValues'] = self.Sarsa.QValues my_shelf['simulationData'] = self.simulationData my_shelf['stateOverTime'] = self.stateOverTime my_shelf['raycastData'] = self.raycastData my_shelf['controlInputData'] = self.controlInputData my_shelf['emptyQValue'] = self.emptyQValue my_shelf['numTimesteps'] = self.numTimesteps my_shelf['idxDict'] = self.idxDict my_shelf['counter'] = self.counter my_shelf.close() @staticmethod def loadFromFile(filename): filename = 'data/' + filename + ".out" sim = Simulator(autoInitialize=False, verbose=False) my_shelf = shelve.open(filename) sim.options = my_shelf['options'] sim.initialize() if sim.options['SARSA']['type'] == "discrete": sim.Sarsa.QValues = np.array(my_shelf['SARSA_QValues']) sim.simulationData = my_shelf['simulationData'] # sim.runStatistics = my_shelf['runStatistics'] sim.stateOverTime = np.array(my_shelf['stateOverTime']) sim.raycastData = np.array(my_shelf['raycastData']) sim.controlInputData = np.array(my_shelf['controlInputData']) sim.emptyQValue = np.array(my_shelf['emptyQValue']) sim.numTimesteps = my_shelf['numTimesteps'] sim.idxDict = my_shelf['idxDict'] sim.counter = my_shelf['counter'] my_shelf.close() return sim
def main(params): start_time = time.time() output_path = params['output_path'] if 'gridsearch' in params.keys(): gridsearch_results_path = params['gridsearch_results_path'] gridsearch = params['gridsearch'] else: gridsearch = False plot_interval = 10 if gridsearch else 1 with open(os.path.join(output_path, 'parameters.yaml'), 'w') as f: yaml.dump(params, f) # LOGGING all_loggers = [] formatter = logging.Formatter( '%(asctime)s:%(msecs)03d-%(levelname)s-%(message)s', datefmt='%H:%M:%S') # General logger logger = logging.getLogger('general') logger.setLevel(logging.DEBUG) general_level_file_handler = logging.FileHandler( os.path.join(output_path, 'general_log.txt')) general_level_file_handler.setLevel(logging.INFO) general_level_file_handler.setFormatter(formatter) logger.addHandler(general_level_file_handler) if not gridsearch: # only save debug logs for real experiments (debug logs are huge) debug_level_file_handler = logging.FileHandler( os.path.join(output_path, 'debug_log.txt')) debug_level_file_handler.setLevel(logging.DEBUG) debug_level_file_handler.setFormatter(formatter) logger.addHandler(debug_level_file_handler) handler = logging.StreamHandler() handler.setLevel(logging.INFO) handler.setFormatter(formatter) logger.addHandler(handler) all_loggers.append(logger) # Received-measurement logger measurement_logger = logging.getLogger("measurement") handler = logging.FileHandler( os.path.join(output_path, 'measurement_logs.txt')) handler.setFormatter(formatter) measurement_logger.addHandler(handler) measurement_logger.setLevel(logging.DEBUG) all_loggers.append(measurement_logger) # Action logger action_logger = logging.getLogger("action") handler = logging.FileHandler(os.path.join(output_path, 'action_logs.txt')) handler.setFormatter(formatter) action_logger.addHandler(handler) action_logger.setLevel(logging.DEBUG) all_loggers.append(action_logger) # Trajectory logger trajectory_logger = logging.getLogger("trajectory") handler = logging.FileHandler( os.path.join(output_path, 'trajectory_logs.txt')) handler.setFormatter(formatter) trajectory_logger.addHandler(handler) trajectory_logger.setLevel(logging.DEBUG) all_loggers.append(trajectory_logger) # create results directories model_dir = os.path.join(output_path, 'models') data_basepath = os.path.join(output_path, 'data') all_paths = [model_dir, data_basepath] for _path in all_paths: if not os.path.exists(_path): os.makedirs(_path) # initialize/load program state start_program_state = { 'episode': -1, 'global_step': 0, 'first_success': None, 'best_episode_score': 0, 'best_episode_index': 0, 'output_path': output_path, 'git_head': util.get_git_revision_hash(), 'run_finished': False, 'start_time': time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(start_time)) } program_state_path = os.path.join(output_path, 'program_state.yaml') try: with open(program_state_path, 'r') as f: program_state = yaml.load(f) logger.info('Loaded program_state') except Exception: logger.debug( 'Did not find program_state.dump file to restore program state, starting new' ) program_state = start_program_state # Experience buffer exppath = os.path.join(output_path, 'experience') if not os.path.exists(exppath): os.makedirs(exppath) expbuffer_dtype = [('s1', np.ndarray), ('a1', np.int), ('r1', np.float), ('s2', np.ndarray), ('td', np.float), ('t2', np.bool)] exp = ExperienceBuffer(exppath, keep_n_episodes=params['keep_n_episodes'], buffer_dtype=expbuffer_dtype) if len(exp.episode_buffers) - 1 != program_state['episode']: error_msg = 'Episode index found in program_state does not match the episodes in ' + \ 'the experience buffer. Did you delete experience? In this case, you can ' + \ 'modify program_state.yaml accordingly. This involves setting the value ' + \ 'for episode to -1. Currently, we would see inconsistent indices in the ' + \ 'experience buffer and the log files. Should we continue anyways? (y/n)' if input(error_msg) != 'y': sys.exit() all_loggers.append(exp.logger) logger.info('Experience size: ' + str(len(exp))) # ACTIONS x_y_step = 0.3 z_step = 0.1 actions = np.array( [ [0., 0., z_step], # up [0., -x_y_step, z_step], # forward [0., x_y_step, z_step], # backward [-x_y_step, 0., z_step], # left [x_y_step, 0., z_step], # right ], dtype=np.float32) n_actions = len(actions) # setup models # Reward reward = Reward(step_reward=params['step_reward'], fail=params['fail_reward'], success=params['success_reward']) # QModels model_module = getattr(models, params['model']) model = model_module.Model(params, n_actions) target_model = model_module.Model(params, n_actions) current_model_path = os.path.join(output_path, 'models', 'current') previous_model_path = os.path.join(output_path, 'models', 'previous') try: logger.info('Loading model from: ' + current_model_path) model = torch.load(current_model_path) target_model = torch.load(current_model_path) except Exception: # no saved models found, saving randomly initiated ones model.save(current_model_path) model.save(previous_model_path) # environment if params['environment'] == 'toy': env = environment.ToyTrajectoryEnvironment( os.path.join("trajectories", params['train_trajectory_name']), lower_hose_radius=params['lower_hose_radius'], project_angle=np.pi / 4) elif params['environment'] == 'toy_sophisticated': env = environment.SophisticatedToyTrajectoryEnvironment( os.path.join("trajectories", params['train_trajectory_name']), lower_hose_radius=params['lower_hose_radius'], upper_hose_radius=params['upper_hose_radius'], project_angle=np.pi / 4) elif params['environment'] == 'microscope': # if connected to the SPM env = environment.STMEnvironment(params['host'], params['port']) else: raise Exception('Unrecognized environment {} requested'.format( params['environment'])) # ruptures and successes arrays rupt_file = os.path.join(output_path, 'ruptures.npy') if os.path.exists(rupt_file): ruptures = np.load(rupt_file) else: ruptures = np.empty((0, 3)) succ_file = os.path.join(output_path, 'successes.npy') if os.path.exists(succ_file): successes = np.load(succ_file) else: successes = np.empty((0, 3)) # agent agent_module = getattr(agents, params['agent']) agent = agent_module.Agent(params=params, model=model, target_model=target_model, environment=env, experience=exp, actions=actions, reward=reward, ruptures=ruptures, successes=successes, global_step=program_state['global_step'], episode=program_state['episode']) logger.info('Starting at training step ' + str(agent.global_step)) best_reward = 0 while True: agent.reset() env.reset() agent.run_episode() exp.finish_episode() Q, V, A, R, actions = agent.get_episode_records() episode_status = agent.episode_status if agent.episode % plot_interval == 0 and agent.episode_steps > 0: agent.end_of_episode_plots() if episode_status == 'delete_last_episode': # SPM experimenter wants to delete the last episode logger.info( 'Deleting last episode and reload model from one episode before' ) exp.delete_episode(agent.episode) # if we received delete_last_episode while having 0 steps in the current episode, # then the protocol is to actually to delete the previous episode. if agent.episode_steps == 0: logger.info( 'I received delete_last_episode while the current episode still ' + 'had not started, which means I should actually delete the previous ' + 'episode. I will do that now') exp.delete_episode(agent.episode - 1) # reload previous model logger.info('Load model from: ' + previous_model_path) model = torch.load(previous_model_path) target_model = torch.load(previous_model_path) elif episode_status in ['fail', 'success']: # Training agent.train() # Logging logger.info('Global training step: {}'.format(agent.global_step)) # save best episode current_reward = np.sum(R) if current_reward > best_reward: best_reward = current_reward program_state['best_episode_score'] = agent.episode_steps program_state['best_episode_index'] = agent.episode # update program state program_state['episode'] = int(agent.episode) program_state['global_step'] = int(agent.global_step) if episode_status == 'success' and program_state[ 'first_success'] is None: program_state['first_success'] = int(agent.episode) # save everything to disk. That process should not be interrupted. with util.Ignore_KeyboardInterrupt(): data_path = os.path.join(data_basepath, str(agent.episode).zfill(3) + '_data') with open(program_state_path, 'w') as f: yaml.dump(program_state, f) # put program_state into gridsearch results folder if gridsearch: with open( os.path.join(gridsearch_results_path, 'program_state.yaml'), 'w') as f: yaml.dump(program_state, f) exp.save_episode() np.savez(data_path, Q=Q, A=A, V=V, actions=actions, reward=R) # on disk, copy the current model file to previous model, then save the current # in-memory model to disk as the current model if not episode_status == 'delete_last_episode': shutil.copy(current_model_path, previous_model_path) # save the model parameters both in the file for the current model, as well as # in the file for the model for the current episode model.save(current_model_path) model.save( os.path.join(output_path, 'models', 'episode_{}'.format(agent.episode))) np.save(rupt_file, agent.ruptures) np.save(succ_file, agent.successes) # handle end of run (lifting success, or more than stop_after_episode episodes) if episode_status == 'success' or agent.episode >= params[ 'stop_after_episode']: program_state['run_finished'] = True end_time = time.time() program_state['run_time'] = end_time - start_time program_state['end_time'] = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(end_time)) with open(program_state_path, 'w') as f: yaml.dump(program_state, f) # put program_state into gridsearch results folder if gridsearch: with open( os.path.join(gridsearch_results_path, 'program_state.yaml'), 'w') as f: yaml.dump(program_state, f) # if this is a gridsearch, pack the output files into a .tar.gz # and delete the output folder # close all logger filehandlers such that we can delete the folder later for logger in all_loggers: for handler in logger.handlers[:]: handler.close() logger.removeHandler(handler) tarfile_path = output_path + '.tar.gz' with tarfile.open(tarfile_path, "w:gz") as tar: tar.add(output_path, arcname=os.path.basename(output_path)) tar.close() # delete the output folder shutil.rmtree(output_path, ignore_errors=True) # end of "if gridsearch" print('Run finished at episode: {}'.format(agent.episode)) return
class ABCAgent(): def __init__(self): SEED = datetime.now() SEED = 0 self.n = 100 self.tolerance = 1e-2 self.bandwidth = 0.3 self.maxNegAccMin = 3.0 self.maxNegAccMax = 6.0 self.run_cnt = 3600 self.params_template = { "length": 5.0, "width": 2.0, "maxPosAcc": 3.0, "maxNegAcc": 4.5, "usualPosAcc": 2.0, "usualNegAcc": 2.5, "minGap": 2.5, "maxSpeed": 50 / 3, "headwayTime": 1.5 } self.memo = "1_3" roadnet_file = 'data/roadnet/roadnet_{0}.json'.format(self.memo) flow_file = 'data/flow/flow_{0}.json'.format(self.memo) signal_file = 'data/signal/signal_{0}.json'.format(self.memo) self.observed_file = "data/gmsa_observed_%s.txt" % self.memo self.rand = Random(SEED) self.accepted = [] self.kde = KernelDensity(self.bandwidth) self.reward = Reward() self.gen = DataGenerator(flow_file, signal_file) self.eng = engine.Engine(1.0, 8, True, True) self.eng.load_roadnet(roadnet_file) self.f = open(self.observed_file) def get_params(self): params = self.params_template.copy() params["maxNegAcc"] = self.maxNegAccMin + self.rand.random() * ( self.maxNegAccMax - self.maxNegAccMin) return params def get_observed(self): return self.f.readline() def run_single(self, params): self.f = open(self.observed_file) self.eng.reset() losses = [] for t in range(self.run_cnt): self.gen.simulate_data(self.eng, params, t) self.eng.next_step() generated = self.reward.get_output(self.eng) observed = self.get_observed() # get loss loss = -self.reward.process(generated, observed)["lane_speed"] losses.append(loss) return np.mean(losses) def plot_density(self, fname): x = np.linspace(self.maxNegAccMin, self.maxNegAccMax, 100) plt.plot(x, np.exp(self.kde.score_samples(x[:, np.newaxis]))) plt.savefig(fname) def run(self): for _ in range(self.n): params = self.get_params() print("MaxNegAcc %.6f: " % params["maxNegAcc"], end="") loss = self.run_single(params) print("Loss %f" % loss) if loss < self.tolerance: self.accepted.append(params["maxNegAcc"]) print(self.accepted) self.kde.fit(np.array(self.accepted).reshape((-1, 1))) self.plot_density('data/kde.png')
def gen_reward(self): reward = Reward(self.next_id, random.randint(0, self.sx), random.randint(0, self.sy)) self.rewards.append(reward) self.next_id += 1
def encounter( agent=Agent, foe=Foe, max_turns=int, forward_search_and_reward_kwargs={}, ) -> pd.DataFrame: """ TODO: document me!! :param agent: :param foe: :param max_turns: :param forward_search_and_reward_kwargs: - forward_search_kwargs: - depth: int = 3, - reward_kwargs: - reward_for_kill: float = 1000, - penalty_for_dying: float = -1000, - agent_hp_bonus: float = 2, - foe_hp_bonus: float = -1 :return: """ # Handle kwargs forward_search_kwargs, reward_kwargs = __get_kwargs( forward_search_and_reward_kwargs) reward = Reward(agent, foe, **reward_kwargs) utility = reward.get_reward(agent, foe) faux_foe = Foe() # The belief state of our foe # Arrays to hold encounter_stats agent_policies = [] agent_spell_slots = [] agent_shields = [] agent_healths = [] foe_healths = [] foe_reactions = [] faux_foe_healths = [] forward_search_utilities = [] rewards = [] for i in range(max_turns): agent_policy, forward_search_utility = forward_search( agent=copy.deepcopy(agent), foe=copy.deepcopy(faux_foe), reward=reward, utility=utility, **forward_search_kwargs) agent_action, foe_reaction = turn(agent, agent_policy, foe) faux_foe = update_foe_belief(faux_foe, foe_reaction) utility += reward.get_reward(agent, foe) # Collect turn data into encounter_stats agent_policies.append(agent_policy) agent_spell_slots.append(agent.states["spell slots"]) agent_shields.append(agent.states["shield"]) agent_healths.append(agent.hp) foe_healths.append(foe.hp) foe_reactions.append(foe_reaction) faux_foe_healths.append(faux_foe.hp) forward_search_utilities.append(forward_search_utility) rewards.append(utility) if agent.hp <= 0 or foe.hp <= 0: # end encounter if either dies break encounter_stats = pd.DataFrame({ "agent actions": agent_policies, "agent spell slots": agent_spell_slots, "agent shield": agent_shields, "agent health": agent_healths, "foe health": foe_healths, "foe reactions": foe_reactions, "faux foe health": faux_foe_healths, "forward search utility": forward_search_utilities, "utility": rewards, }) return agent, foe, encounter_stats
from game import Game from agent import Agent from reward import Reward game_length = 10 num_colors = 6 guess_length = 4 game = Game(game_length, num_colors, guess_length) agent = Agent(game_length, num_colors, guess_length) reward = Reward() while not game.is_over(): game.print() game_state = game.getState() agent_guess = agent.guess(game_state, reward.compute(game_state)) print(agent) game.guess(agent_guess)
class Simulator(object): def __init__( self, percentObsDensity=20, endTime=40, randomizeControl=False, nonRandomWorld=False, circleRadius=0.7, worldScale=1.0, supervisedTrainingTime=500, autoInitialize=True, verbose=True, sarsaType="discrete", ): self.verbose = verbose self.randomizeControl = randomizeControl self.startSimTime = time.time() self.collisionThreshold = 1.3 self.randomSeed = 5 self.Sarsa_numInnerBins = 4 self.Sarsa_numOuterBins = 4 self.Sensor_rayLength = 8 self.sarsaType = sarsaType self.percentObsDensity = percentObsDensity self.supervisedTrainingTime = 10 self.learningRandomTime = 10 self.learningEvalTime = 10 self.defaultControllerTime = 10 self.nonRandomWorld = nonRandomWorld self.circleRadius = circleRadius self.worldScale = worldScale # create the visualizer object self.app = ConsoleApp() # view = app.createView(useGrid=False) self.view = self.app.createView(useGrid=False) self.initializeOptions() self.initializeColorMap() if autoInitialize: self.initialize() def initializeOptions(self): self.options = dict() self.options["Reward"] = dict() self.options["Reward"]["actionCost"] = 0.1 self.options["Reward"]["collisionPenalty"] = 100.0 self.options["Reward"]["raycastCost"] = 20.0 self.options["SARSA"] = dict() self.options["SARSA"]["type"] = "discrete" self.options["SARSA"]["lam"] = 0.7 self.options["SARSA"]["alphaStepSize"] = 0.2 self.options["SARSA"]["useQLearningUpdate"] = False self.options["SARSA"]["epsilonGreedy"] = 0.2 self.options["SARSA"]["burnInTime"] = 500 self.options["SARSA"]["epsilonGreedyExponent"] = 0.3 self.options["SARSA"]["exponentialDiscountFactor"] = 0.05 # so gamma = e^(-rho*dt) self.options["SARSA"]["numInnerBins"] = 5 self.options["SARSA"]["numOuterBins"] = 4 self.options["SARSA"]["binCutoff"] = 0.5 self.options["World"] = dict() self.options["World"]["obstaclesInnerFraction"] = 0.85 self.options["World"]["randomSeed"] = 40 self.options["World"]["percentObsDensity"] = 7.5 self.options["World"]["nonRandomWorld"] = True self.options["World"]["circleRadius"] = 1.75 self.options["World"]["scale"] = 1.0 self.options["Sensor"] = dict() self.options["Sensor"]["rayLength"] = 10 self.options["Sensor"]["numRays"] = 20 self.options["Car"] = dict() self.options["Car"]["velocity"] = 12 self.options["dt"] = 0.05 self.options["runTime"] = dict() self.options["runTime"]["supervisedTrainingTime"] = 10 self.options["runTime"]["learningRandomTime"] = 10 self.options["runTime"]["learningEvalTime"] = 10 self.options["runTime"]["defaultControllerTime"] = 10 def setDefaultOptions(self): defaultOptions = dict() defaultOptions["Reward"] = dict() defaultOptions["Reward"]["actionCost"] = 0.1 defaultOptions["Reward"]["collisionPenalty"] = 100.0 defaultOptions["Reward"]["raycastCost"] = 20.0 defaultOptions["SARSA"] = dict() defaultOptions["SARSA"]["type"] = "discrete" defaultOptions["SARSA"]["lam"] = 0.7 defaultOptions["SARSA"]["alphaStepSize"] = 0.2 defaultOptions["SARSA"]["useQLearningUpdate"] = False defaultOptions["SARSA"]["useSupervisedTraining"] = True defaultOptions["SARSA"]["epsilonGreedy"] = 0.2 defaultOptions["SARSA"]["burnInTime"] = 500 defaultOptions["SARSA"]["epsilonGreedyExponent"] = 0.3 defaultOptions["SARSA"]["exponentialDiscountFactor"] = 0.05 # so gamma = e^(-rho*dt) defaultOptions["SARSA"]["numInnerBins"] = 5 defaultOptions["SARSA"]["numOuterBins"] = 4 defaultOptions["SARSA"]["binCutoff"] = 0.5 defaultOptions["SARSA"]["forceDriveStraight"] = True defaultOptions["World"] = dict() defaultOptions["World"]["obstaclesInnerFraction"] = 0.85 defaultOptions["World"]["randomSeed"] = 40 defaultOptions["World"]["percentObsDensity"] = 7.5 defaultOptions["World"]["nonRandomWorld"] = True defaultOptions["World"]["circleRadius"] = 1.75 defaultOptions["World"]["scale"] = 1.0 defaultOptions["Sensor"] = dict() defaultOptions["Sensor"]["rayLength"] = 10 defaultOptions["Sensor"]["numRays"] = 20 defaultOptions["Car"] = dict() defaultOptions["Car"]["velocity"] = 12 defaultOptions["dt"] = 0.05 defaultOptions["runTime"] = dict() defaultOptions["runTime"]["supervisedTrainingTime"] = 10 defaultOptions["runTime"]["learningRandomTime"] = 10 defaultOptions["runTime"]["learningEvalTime"] = 10 defaultOptions["runTime"]["defaultControllerTime"] = 10 for k in defaultOptions: self.options.setdefault(k, defaultOptions[k]) for k in defaultOptions: if not isinstance(defaultOptions[k], dict): continue for j in defaultOptions[k]: self.options[k].setdefault(j, defaultOptions[k][j]) def initializeColorMap(self): self.colorMap = dict() self.colorMap["defaultRandom"] = [0, 0, 1] self.colorMap["learnedRandom"] = [1.0, 0.54901961, 0.0] # this is orange self.colorMap["learned"] = [0.58039216, 0.0, 0.82745098] # this is yellow self.colorMap["default"] = [0, 1, 0] def initialize(self): self.dt = self.options["dt"] self.controllerTypeOrder = ["defaultRandom", "learnedRandom", "learned", "default"] self.setDefaultOptions() self.Sensor = SensorObj( rayLength=self.options["Sensor"]["rayLength"], numRays=self.options["Sensor"]["numRays"] ) self.Controller = ControllerObj(self.Sensor) self.Car = CarPlant(controller=self.Controller, velocity=self.options["Car"]["velocity"]) self.Reward = Reward( self.Sensor, collisionThreshold=self.collisionThreshold, actionCost=self.options["Reward"]["actionCost"], collisionPenalty=self.options["Reward"]["collisionPenalty"], raycastCost=self.options["Reward"]["raycastCost"], ) self.setSARSA() # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName("world")) self.world = World.buildCircleWorld( percentObsDensity=self.options["World"]["percentObsDensity"], circleRadius=self.options["World"]["circleRadius"], nonRandom=self.options["World"]["nonRandomWorld"], scale=self.options["World"]["scale"], randomSeed=self.options["World"]["randomSeed"], obstaclesInnerFraction=self.options["World"]["obstaclesInnerFraction"], ) om.removeFromObjectModel(om.findObjectByName("robot")) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty("Scale", 3) self.frame.setProperty("Edit", True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.supervisedTrainingTime = self.options["runTime"]["supervisedTrainingTime"] self.learningRandomTime = self.options["runTime"]["learningRandomTime"] self.learningEvalTime = self.options["runTime"]["learningEvalTime"] self.defaultControllerTime = self.options["runTime"]["defaultControllerTime"] self.Car.setFrame(self.frame) print "Finished initialization" def setSARSA(self, type=None): if type is None: type = self.options["SARSA"]["type"] if type == "discrete": self.Sarsa = SARSADiscrete( sensorObj=self.Sensor, actionSet=self.Controller.actionSet, collisionThreshold=self.collisionThreshold, alphaStepSize=self.options["SARSA"]["alphaStepSize"], useQLearningUpdate=self.options["SARSA"]["useQLearningUpdate"], lam=self.options["SARSA"]["lam"], numInnerBins=self.options["SARSA"]["numInnerBins"], numOuterBins=self.options["SARSA"]["numOuterBins"], binCutoff=self.options["SARSA"]["binCutoff"], burnInTime=self.options["SARSA"]["burnInTime"], epsilonGreedy=self.options["SARSA"]["epsilonGreedy"], epsilonGreedyExponent=self.options["SARSA"]["epsilonGreedyExponent"], forceDriveStraight=self.options["SARSA"]["forceDriveStraight"], ) elif type == "continuous": self.Sarsa = SARSAContinuous( sensorObj=self.Sensor, actionSet=self.Controller.actionSet, lam=self.options["SARSA"]["lam"], alphaStepSize=self.options["SARSA"]["alphaStepSize"], collisionThreshold=self.collisionThreshold, burnInTime=self.options["SARSA"]["burnInTime"], epsilonGreedy=self.options["SARSA"]["epsilonGreedy"], epsilonGreedyExponent=self.options["SARSA"]["epsilonGreedyExponent"], ) else: raise ValueError("sarsa type must be either discrete or continuous") def runSingleSimulation(self, updateQValues=True, controllerType="default", simulationCutoff=None): if self.verbose: print "using QValue based controller = ", useQValueController self.setCollisionFreeInitialState() currentCarState = np.copy(self.Car.state) nextCarState = np.copy(self.Car.state) self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) nextRaycast = np.zeros(self.Sensor.numRays) self.Sarsa.resetElibilityTraces() # record the reward data runData = dict() reward = 0 discountedReward = 0 avgReward = 0 startIdx = self.counter while self.counter < self.numTimesteps - 1: idx = self.counter currentTime = self.t[idx] self.stateOverTime[idx, :] = currentCarState x = self.stateOverTime[idx, 0] y = self.stateOverTime[idx, 1] theta = self.stateOverTime[idx, 2] self.setRobotFrameState(x, y, theta) # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) self.raycastData[idx, :] = currentRaycast S_current = (currentCarState, currentRaycast) if controllerType not in self.colorMap.keys(): print raise ValueError("controller of type " + controllerType + " not supported") if controllerType in ["default", "defaultRandom"]: if controllerType == "defaultRandom": controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=True ) else: controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False ) if controllerType in ["learned", "learnedRandom"]: if controllerType == "learned": randomizeControl = False else: randomizeControl = True counterForGreedyDecay = self.counter - self.idxDict["learnedRandom"] controlInput, controlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy( S_current, counter=counterForGreedyDecay, randomize=randomizeControl ) self.emptyQValue[idx] = emptyQValue if emptyQValue and self.options["SARSA"]["useSupervisedTraining"]: controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False ) self.controlInputData[idx] = controlInput nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt) # want to compute nextRaycast so we can do the SARSA algorithm x = nextCarState[0] y = nextCarState[1] theta = nextCarState[2] self.setRobotFrameState(x, y, theta) nextRaycast = self.Sensor.raycastAll(self.frame) # Compute the next control input S_next = (nextCarState, nextRaycast) if controllerType in ["default", "defaultRandom"]: if controllerType == "defaultRandom": nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=True ) else: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False ) if controllerType in ["learned", "learnedRandom"]: if controllerType == "learned": randomizeControl = False else: randomizeControl = True counterForGreedyDecay = self.counter - self.idxDict["learnedRandom"] nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy( S_next, counter=counterForGreedyDecay, randomize=randomizeControl ) if emptyQValue and self.options["SARSA"]["useSupervisedTraining"]: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False ) # if useQValueController: # nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(S_next) # # if not useQValueController or emptyQValue: # nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(nextCarState, currentTime, self.frame, # raycastDistance=nextRaycast) # compute the reward reward = self.Reward.computeReward(S_next, controlInput) self.rewardData[idx] = reward discountedReward += self.Sarsa.gamma ** (self.counter - startIdx) * reward avgReward += reward ###### SARSA update if updateQValues: self.Sarsa.sarsaUpdate(S_current, controlInputIdx, reward, S_next, nextControlInputIdx) # bookkeeping currentCarState = nextCarState currentRaycast = nextRaycast self.counter += 1 # break if we are in collision if self.checkInCollision(nextRaycast): if self.verbose: print "Had a collision, terminating simulation" break if self.counter >= simulationCutoff: break # fill in the last state by hand self.stateOverTime[self.counter, :] = currentCarState self.raycastData[self.counter, :] = currentRaycast # return the total reward avgRewardNoCollisionPenalty = avgReward - reward avgReward = avgReward * 1.0 / max(1, self.counter - startIdx) avgRewardNoCollisionPenalty = avgRewardNoCollisionPenalty * 1.0 / max(1, self.counter - startIdx) # this extra multiplication is so that it is in the same "units" as avgReward runData["discountedReward"] = discountedReward * (1 - self.Sarsa.gamma) runData["avgReward"] = avgReward runData["avgRewardNoCollisionPenalty"] = avgRewardNoCollisionPenalty # this just makes sure we don't get stuck in an infinite loop. if startIdx == self.counter: self.counter += 1 return runData def setNumpyRandomSeed(self, seed=1): np.random.seed(seed) def runBatchSimulation(self, endTime=None, dt=0.05): # for use in playback self.dt = self.options["dt"] self.Sarsa.setDiscountFactor(dt) self.endTime = ( self.supervisedTrainingTime + self.learningRandomTime + self.learningEvalTime + self.defaultControllerTime ) self.t = np.arange(0.0, self.endTime, dt) maxNumTimesteps = np.size(self.t) self.stateOverTime = np.zeros((maxNumTimesteps + 1, 3)) self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays)) self.controlInputData = np.zeros(maxNumTimesteps + 1) self.rewardData = np.zeros(maxNumTimesteps + 1) self.emptyQValue = np.zeros(maxNumTimesteps + 1, dtype="bool") self.numTimesteps = maxNumTimesteps self.controllerTypeOrder = ["defaultRandom", "learnedRandom", "learned", "default"] self.counter = 0 self.simulationData = [] self.initializeStatusBar() self.idxDict = dict() numRunsCounter = 0 # three while loops for different phases of simulation, supervisedTraining, learning, default self.idxDict["defaultRandom"] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.supervisedTrainingTime / dt, self.numTimesteps) while (self.counter - loopStartIdx < self.supervisedTrainingTime / dt) and self.counter < self.numTimesteps: self.printStatusBar() useQValueController = False startIdx = self.counter runData = self.runSingleSimulation( updateQValues=True, controllerType="defaultRandom", simulationCutoff=simCutoff ) runData["startIdx"] = startIdx runData["controllerType"] = "defaultRandom" runData["duration"] = self.counter - runData["startIdx"] runData["endIdx"] = self.counter runData["runNumber"] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict["learnedRandom"] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.learningRandomTime / dt, self.numTimesteps) while (self.counter - loopStartIdx < self.learningRandomTime / dt) and self.counter < self.numTimesteps: self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation( updateQValues=True, controllerType="learnedRandom", simulationCutoff=simCutoff ) runData["startIdx"] = startIdx runData["controllerType"] = "learnedRandom" runData["duration"] = self.counter - runData["startIdx"] runData["endIdx"] = self.counter runData["runNumber"] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict["learned"] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.learningEvalTime / dt, self.numTimesteps) while (self.counter - loopStartIdx < self.learningEvalTime / dt) and self.counter < self.numTimesteps: self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation( updateQValues=False, controllerType="learned", simulationCutoff=simCutoff ) runData["startIdx"] = startIdx runData["controllerType"] = "learned" runData["duration"] = self.counter - runData["startIdx"] runData["endIdx"] = self.counter runData["runNumber"] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict["default"] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.defaultControllerTime / dt, self.numTimesteps) while (self.counter - loopStartIdx < self.defaultControllerTime / dt) and self.counter < self.numTimesteps - 1: self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation( updateQValues=False, controllerType="default", simulationCutoff=simCutoff ) runData["startIdx"] = startIdx runData["controllerType"] = "default" runData["duration"] = self.counter - runData["startIdx"] runData["endIdx"] = self.counter runData["runNumber"] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) # BOOKKEEPING # truncate stateOverTime, raycastData, controlInputs to be the correct size self.numTimesteps = self.counter + 1 self.stateOverTime = self.stateOverTime[0 : self.counter + 1, :] self.raycastData = self.raycastData[0 : self.counter + 1, :] self.controlInputData = self.controlInputData[0 : self.counter + 1] self.rewardData = self.rewardData[0 : self.counter + 1] self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime def initializeStatusBar(self): self.numTicks = 10 self.nextTickComplete = 1.0 / float(self.numTicks) self.nextTickIdx = 1 print "Simulation percentage complete: (", self.numTicks, " # is complete)" def printStatusBar(self): fractionDone = float(self.counter) / float(self.numTimesteps) if fractionDone > self.nextTickComplete: self.nextTickIdx += 1 self.nextTickComplete += 1.0 / self.numTicks timeSoFar = time.time() - self.startSimTime estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0 print "#" * self.nextTickIdx, "-" * ( self.numTicks - self.nextTickIdx ), "estimated", estimatedTimeLeft_minutes, "minutes left" def setCollisionFreeInitialState(self): tol = 5 while True: x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol, 1)[0] y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol, 1)[0] theta = np.random.uniform(0, 2 * np.pi, 1)[0] self.Car.setCarState(x, y, theta) self.setRobotFrameState(x, y, theta) if not self.checkInCollision(): break # if self.checkInCollision(): # print "IN COLLISION" # else: # print "COLLISION FREE" return x, y, theta def setupPlayback(self): self.timer = TimerCallback(targetFps=30) self.timer.callback = self.tick playButtonFps = 1.0 / self.dt print "playButtonFPS", playButtonFps self.playTimer = TimerCallback(targetFps=playButtonFps) self.playTimer.callback = self.playTimerCallback self.sliderMovedByPlayTimer = False panel = QtGui.QWidget() l = QtGui.QHBoxLayout(panel) playButton = QtGui.QPushButton("Play/Pause") playButton.connect("clicked()", self.onPlayButton) slider = QtGui.QSlider(QtCore.Qt.Horizontal) slider.connect("valueChanged(int)", self.onSliderChanged) self.sliderMax = self.numTimesteps slider.setMaximum(self.sliderMax) self.slider = slider l.addWidget(playButton) l.addWidget(slider) w = QtGui.QWidget() l = QtGui.QVBoxLayout(w) l.addWidget(self.view) l.addWidget(panel) w.showMaximized() self.frame.connectFrameModified(self.updateDrawIntersection) self.updateDrawIntersection(self.frame) applogic.resetCamera(viewDirection=[0.2, 0, -1]) self.view.showMaximized() self.view.raise_() panel = screengrabberpanel.ScreenGrabberPanel(self.view) panel.widget.show() elapsed = time.time() - self.startSimTime simRate = self.counter / elapsed print "Total run time", elapsed print "Ticks (Hz)", simRate print "Number of steps taken", self.counter self.app.start() def run(self, launchApp=True): self.counter = 1 self.runBatchSimulation() # self.Sarsa.plotWeights() if launchApp: self.setupPlayback() def updateDrawIntersection(self, frame): origin = np.array(frame.transform.GetPosition()) # print "origin is now at", origin d = DebugData() sliderIdx = self.slider.value controllerType = self.getControllerTypeFromCounter(sliderIdx) colorMaxRange = self.colorMap[controllerType] # if the QValue was empty then color it green if self.emptyQValue[sliderIdx]: colorMaxRange = [1, 1, 0] # this is yellow for i in xrange(self.Sensor.numRays): ray = self.Sensor.rays[:, i] rayTransformed = np.array(frame.transform.TransformNormal(ray)) # print "rayTransformed is", rayTransformed intersection = self.Sensor.raycast(self.locator, origin, origin + rayTransformed * self.Sensor.rayLength) if intersection is not None: d.addLine(origin, intersection, color=[1, 0, 0]) else: d.addLine(origin, origin + rayTransformed * self.Sensor.rayLength, color=colorMaxRange) vis.updatePolyData(d.getPolyData(), "rays", colorByName="RGB255") # camera = self.view.camera() # camera.SetFocalPoint(frame.transform.GetPosition()) # camera.SetPosition(frame.transform.TransformPoint((-30,0,10))) def getControllerTypeFromCounter(self, counter): name = self.controllerTypeOrder[0] for controllerType in self.controllerTypeOrder[1:]: if counter >= self.idxDict[controllerType]: name = controllerType return name def setRobotFrameState(self, x, y, theta): t = vtk.vtkTransform() t.Translate(x, y, 0.0) t.RotateZ(np.degrees(theta)) self.robot.getChildFrame().copyFrame(t) # returns true if we are in collision def checkInCollision(self, raycastDistance=None): if raycastDistance is None: self.setRobotFrameState(self.Car.state[0], self.Car.state[1], self.Car.state[2]) raycastDistance = self.Sensor.raycastAll(self.frame) if np.min(raycastDistance) < self.collisionThreshold: return True else: return False def tick(self): # print timer.elapsed # simulate(t.elapsed) x = np.sin(time.time()) y = np.cos(time.time()) self.setRobotFrameState(x, y, 0.0) if (time.time() - self.playTime) > self.endTime: self.playTimer.stop() def tick2(self): newtime = time.time() - self.playTime print time.time() - self.playTime x = np.sin(newtime) y = np.cos(newtime) self.setRobotFrameState(x, y, 0.0) # just increment the slider, stop the timer if we get to the end def playTimerCallback(self): self.sliderMovedByPlayTimer = True currentIdx = self.slider.value nextIdx = currentIdx + 1 self.slider.setSliderPosition(nextIdx) if currentIdx >= self.sliderMax: print "reached end of tape, stopping playTime" self.playTimer.stop() def onSliderChanged(self, value): if not self.sliderMovedByPlayTimer: self.playTimer.stop() numSteps = len(self.stateOverTime) idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax))) idx = min(idx, numSteps - 1) x, y, theta = self.stateOverTime[idx] self.setRobotFrameState(x, y, theta) self.sliderMovedByPlayTimer = False def onPlayButton(self): if self.playTimer.isActive(): self.onPauseButton() return print "play" self.playTimer.start() self.playTime = time.time() def onPauseButton(self): print "pause" self.playTimer.stop() def computeRunStatistics(self): numRuns = len(self.simulationData) runStart = np.zeros(numRuns) runDuration = np.zeros(numRuns) grid = np.arange(1, numRuns + 1) discountedReward = np.zeros(numRuns) avgReward = np.zeros(numRuns) avgRewardNoCollisionPenalty = np.zeros(numRuns) idxMap = dict() for controllerType, color in self.colorMap.iteritems(): idxMap[controllerType] = np.zeros(numRuns, dtype=bool) for idx, val in enumerate(self.simulationData): runStart[idx] = val["startIdx"] runDuration[idx] = val["duration"] discountedReward[idx] = val["discountedReward"] avgReward[idx] = val["avgReward"] avgRewardNoCollisionPenalty[idx] = val["avgRewardNoCollisionPenalty"] controllerType = val["controllerType"] idxMap[controllerType][idx] = True def plotRunData(self, controllerTypeToPlot=None, showPlot=True, onlyLearned=False): if controllerTypeToPlot == None: controllerTypeToPlot = self.colorMap.keys() if onlyLearned: controllerTypeToPlot = ["learnedRandom", "learned"] numRuns = len(self.simulationData) runStart = np.zeros(numRuns) runDuration = np.zeros(numRuns) grid = np.arange(1, numRuns + 1) discountedReward = np.zeros(numRuns) avgReward = np.zeros(numRuns) avgRewardNoCollisionPenalty = np.zeros(numRuns) idxMap = dict() for controllerType, color in self.colorMap.iteritems(): idxMap[controllerType] = np.zeros(numRuns, dtype=bool) for idx, val in enumerate(self.simulationData): runStart[idx] = val["startIdx"] runDuration[idx] = val["duration"] discountedReward[idx] = val["discountedReward"] avgReward[idx] = val["avgReward"] avgRewardNoCollisionPenalty[idx] = val["avgRewardNoCollisionPenalty"] controllerType = val["controllerType"] idxMap[controllerType][idx] = True # usedQValueController[idx] = (val['controllerType'] == "QValue") # usedDefaultController[idx] = (val['controllerType'] == "default") # usedDefaultRandomController[idx] = (val['controllerType'] == "defaultRandom") # usedQValueRandomController[idx] = (val['controllerType'] == "QValueRandom") self.runStatistics = dict() dataMap = { "duration": runDuration, "discountedReward": discountedReward, "avgReward": avgReward, "avgRewardNoCollisionPenalty": avgRewardNoCollisionPenalty, } def computeRunStatistics(dataMap): for controllerType, idx in idxMap.iteritems(): d = dict() for dataName, dataSet in dataMap.iteritems(): # average the appropriate values in dataset d[dataName] = np.sum(dataSet[idx]) / (1.0 * np.size(dataSet[idx])) self.runStatistics[controllerType] = d computeRunStatistics(dataMap) if not showPlot: return plt.figure() # # idxDefaultRandom = np.where(usedDefaultRandomController==True)[0] # idxQValueController = np.where(usedQValueController==True)[0] # idxQValueControllerRandom = np.where(usedQValueControllerRandom==True)[0] # idxDefault = np.where(usedDefaultController==True)[0] # # plotData = dict() # plotData['defaultRandom'] = {'idx': idxDefaultRandom, 'color': 'b'} # plotData['QValue'] = {'idx': idxQValueController, 'color': 'y'} # plotData['default'] = {'idx': idxDefault, 'color': 'g'} def scatterPlot(dataToPlot): for controllerType in controllerTypeToPlot: idx = idxMap[controllerType] plt.scatter(grid[idx], dataToPlot[idx], color=self.colorMap[controllerType]) def barPlot(dataName): plt.title(dataName) barWidth = 0.5 barCounter = 0 index = np.arange(len(controllerTypeToPlot)) for controllerType in controllerTypeToPlot: val = self.runStatistics[controllerType] plt.bar(barCounter, val[dataName], barWidth, color=self.colorMap[controllerType], label=controllerType) barCounter += 1 plt.xticks(index + barWidth / 2.0, controllerTypeToPlot) plt.subplot(4, 1, 1) plt.title("run duration") scatterPlot(runDuration) # for controllerType, idx in idxMap.iteritems(): # plt.scatter(grid[idx], runDuration[idx], color=self.colorMap[controllerType]) # plt.scatter(runStart[idxDefaultRandom], runDuration[idxDefaultRandom], color='b') # plt.scatter(runStart[idxQValueController], runDuration[idxQValueController], color='y') # plt.scatter(runStart[idxDefault], runDuration[idxDefault], color='g') plt.xlabel("run #") plt.ylabel("episode duration") plt.subplot(2, 1, 2) plt.title("discounted reward") scatterPlot(discountedReward) # for key, val in plotData.iteritems(): # plt.scatter(grid[idx], discountedReward[idx], color=self.colorMap[controllerType]) # plt.subplot(3,1,3) # plt.title("average reward") # scatterPlot(avgReward) # for key, val in plotData.iteritems(): # plt.scatter(grid[val['idx']],avgReward[val['idx']], color=val['color']) # plt.subplot(4,1,4) # plt.title("average reward no collision penalty") # scatterPlot(avgRewardNoCollisionPenalty) # # for key, val in plotData.iteritems(): # # plt.scatter(grid[val['idx']],avgRewardNoCollisionPenalty[val['idx']], color=val['color']) ## plot summary statistics plt.figure() plt.subplot(2, 1, 1) barPlot("duration") plt.subplot(2, 1, 2) barPlot("discountedReward") # plt.subplot(3,1,3) # barPlot("avgReward") # # plt.subplot(4,1,4) # barPlot("avgRewardNoCollisionPenalty") plt.show() def plotMultipleRunData(self, simList, toPlot=["duration", "discountedReward"], controllerType="learned"): plt.figure() numPlots = len(toPlot) grid = np.arange(len(simList)) def plot(fieldToPlot, plotNum): plt.subplot(numPlots, 1, plotNum) plt.title(fieldToPlot) val = 0 * grid barWidth = 0.5 barCounter = 0 for idx, sim in enumerate(simList): value = sim.runStatistics[controllerType][fieldToPlot] plt.bar(idx, value, barWidth) counter = 1 for fieldToPlot in toPlot: plot(fieldToPlot, counter) counter += 1 plt.show() def saveToFile(self, filename): # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime filename = "data/" + filename + ".out" my_shelf = shelve.open(filename, "n") my_shelf["options"] = self.options if self.options["SARSA"]["type"] == "discrete": my_shelf["SARSA_QValues"] = self.Sarsa.QValues my_shelf["simulationData"] = self.simulationData my_shelf["stateOverTime"] = self.stateOverTime my_shelf["raycastData"] = self.raycastData my_shelf["controlInputData"] = self.controlInputData my_shelf["emptyQValue"] = self.emptyQValue my_shelf["numTimesteps"] = self.numTimesteps my_shelf["idxDict"] = self.idxDict my_shelf["counter"] = self.counter my_shelf.close() @staticmethod def loadFromFile(filename): filename = "data/" + filename + ".out" sim = Simulator(autoInitialize=False, verbose=False) my_shelf = shelve.open(filename) sim.options = my_shelf["options"] sim.initialize() if sim.options["SARSA"]["type"] == "discrete": sim.Sarsa.QValues = np.array(my_shelf["SARSA_QValues"]) sim.simulationData = my_shelf["simulationData"] # sim.runStatistics = my_shelf['runStatistics'] sim.stateOverTime = np.array(my_shelf["stateOverTime"]) sim.raycastData = np.array(my_shelf["raycastData"]) sim.controlInputData = np.array(my_shelf["controlInputData"]) sim.emptyQValue = np.array(my_shelf["emptyQValue"]) sim.numTimesteps = my_shelf["numTimesteps"] sim.idxDict = my_shelf["idxDict"] sim.counter = my_shelf["counter"] my_shelf.close() return sim