def cost_benchmark(): sample_state = MiniSoccerState.generate_start_state() player = sample_state.index['player'] opponent = sample_state.index['opponent'] # player_on_left = sample_state.index['player_on_left'] player_has_ball = sample_state.index['player_has_ball'] right_goal_center = sample_state.index['rightgoalcenter'] left_goal_center = sample_state.index['leftgoalcenter'] upper_left = sample_state.index['upperleft'] print "training the base agent..." base_features = [rl.FeatureFlag(player_has_ball), rl.FeatureAngle(player, upper_left, left_goal_center), rl.FeatureDistY(player, right_goal_center), rl.FeaturePointXY(player) ] base_agent = MiniSoccerAgent(rl.FeatureSet(base_features)) a = time.clock() arbitrator = rl.ArbitratorStandard(base_agent, NUM_TRIALS, NUM_EPISODES) arbitrator.run(MAX_STEPS) b = time.clock() base_time = b - a print "Running time: %.1f" % base_time print "Do it again..." a = time.clock() arbitrator = rl.ArbitratorStandard(base_agent, NUM_TRIALS, NUM_EPISODES) arbitrator.run(MAX_STEPS) b = time.clock() base_time = b - a print "Running time: %.1f" % base_time feature_lists = [ [rl.FeatureFlag(player_has_ball)], [rl.FeatureAngle(opponent, left_goal_center, upper_left)], [rl.FeatureAngle(opponent, left_goal_center, upper_left, 20)], [rl.FeatureDist(opponent, player)], [rl.FeatureDist(opponent, player, 20)], [rl.FeatureDistX(opponent, player)], [rl.FeatureDistX(opponent, player, 20)], [rl.FeaturePointXY(opponent)], [rl.FeaturePointXY(opponent, 400)], [rl.FeatureInteraction([rl.FeatureDist(opponent, player), rl.FeatureAngle(opponent, left_goal_center, upper_left)])], [rl.FeatureInteraction([rl.FeatureDist(opponent, player, 20), rl.FeatureAngle(opponent, left_goal_center, upper_left)])], [rl.FeatureInteraction([rl.FeatureDist(opponent, player), rl.FeaturePointXY(opponent)])], [rl.FeatureInteraction([rl.FeatureDist(opponent, player, 20), rl.FeaturePointXY(opponent)])], ] for feature_list in feature_lists: agent = base_agent.clone() for feature in feature_list: agent.add_feature(feature) arbitrator = rl.ArbitratorStandard(agent, NUM_TRIALS, NUM_EPISODES) print "testing %s..." % feature_list a = time.clock() arbitrator.run(MAX_STEPS) b = time.clock() print "Overhead time: %.1f" % (b - a - base_time) print
def learn_w_multitile_features(): sample_state = MiniSoccerState.generate_start_state() player = sample_state.index['player'] opponent = sample_state.index['opponent'] # player_on_left = sample_state.index['player_on_left'] player_has_ball = sample_state.index['player_has_ball'] right_goal_center = sample_state.index['rightgoalcenter'] left_goal_center = sample_state.index['leftgoalcenter'] features = [ rl.FeatureDist('dist-player-opponent', player, opponent), rl.FeatureDist('dist-player-rightgoalcenter', player, right_goal_center), rl.FeatureDist('dist-opponent-rightgoalcenter', opponent, right_goal_center), rl.FeatureDist('dist-player-leftgoalcenter', player, left_goal_center), rl.FeatureDist('dist-opponent-leftgoalcenter', opponent, left_goal_center), rl.FeatureFlag('flag-has-ball', player_has_ball) ] offsets = rl.TiledFeature.EVEN_OFFSETS feature_list = [] for offset in offsets: for i in range(len(features)): the_feature = copy.deepcopy(features[i]) the_feature.offset = offset feature_list.append(the_feature) agent = MiniSoccerAgent(rl.FeatureSet(feature_list)) arbitrator = rl.ArbitratorStandard(agent, NUM_TRIALS, NUM_EPISODES) arbitrator.execute(MAX_STEPS)
def learn_w_multitile_features(): sample_state = MiniSoccerState.generate_start_state() player = sample_state.index['player'] opponent = sample_state.index['opponent'] # player_on_left = sample_state.index['player_on_left'] player_has_ball = sample_state.index['player_has_ball'] right_goal_center = sample_state.index['rightgoalcenter'] features = [ rl.FeatureFlag(player_has_ball), rl.FeatureAngle(player, opponent, right_goal_center), rl.FeatureDist(player, opponent) ] feature_list = features agent = MiniSoccerAgent(rl.FeatureSet(feature_list)) arbitrator = rl.ArbitratorStandard(agent, NUM_TRIALS, NUM_EPISODES) arbitrator.execute(MAX_STEPS)
def learn_w_multitile_features(): sample_state = MiniSoccerState.generate_start_state() player = sample_state.index['player'] opponent = sample_state.index['opponent'] player_has_ball = sample_state.index['player_has_ball'] right_goal_center = sample_state.index['rightgoalcenter'] left_goal_center = sample_state.index['leftgoalcenter'] right_goal_top = sample_state.index['rightgoaltop'] right_goal_bottom = sample_state.index['rightgoalbottom'] left_goal_top = sample_state.index['leftgoaltop'] left_goal_bottom = sample_state.index['leftgoalbottom'] features = [ rl.FeatureDist(player, opponent), rl.FeatureDist(player, right_goal_center), rl.FeatureDist(player, left_goal_center), rl.FeatureDist(opponent, right_goal_center), rl.FeatureDist(opponent, left_goal_center), rl.FeatureAngle(player, opponent, right_goal_top), rl.FeatureAngle(player, opponent, left_goal_bottom) ] offsets = rl.TiledFeature.EVEN_OFFSETS feature_list = [] feature_list.append(rl.FeatureFlag(player_has_ball)) for offset in offsets: for i in range(len(features)): the_feature = copy.deepcopy(features[i]) the_feature.offset = offset feature_list.append(the_feature) agent = MiniSoccerAgent(rl.FeatureSet(feature_list)) arbitrator = rl.ArbitratorStandard(agent, NUM_TRIALS, NUM_EPISODES) arbitrator.run(MAX_STEPS)
from minisoccer import MiniSoccerState import rl sample_state = MiniSoccerState.generate_start_state() player_has_ball = sample_state.index['player_has_ball'] feature = rl.FeatureFlag('flag-has-ball', player_has_ball) print feature.encode_state(sample_state) sample_state.index['player_has_ball'].truth = False print feature.encode_state(sample_state)