def __init__(self): maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, MAP_NAME) self.sc2_env = sc2_env.SC2Env( map_name=MAP_NAME, players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=64), action_space=actions.ActionSpace.FEATURES, ), step_mul=16, game_steps_per_episode=0, score_index=0, visualize=True) self.current_obs = None self.actions_taken = 0 self.last_mineral_count = 0 self.reward = 0 self.zergling_count = 0 self.roach_count = 0 self.last_reward = 0 self.last2_reward = 0 self.rewards = [] self.decomposed_rewards = [0, 0] self.last_losses = 0
def make_sc2env(num_players, render=False): if num_players == 1: players = [sc2_env.Agent(sc2_env.Race.terran)] else: players = [sc2_env.Agent(sc2_env.Race.terran), sc2_env.Agent(sc2_env.Race.terran)] if render: interface = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(MAP_SIZE, MAP_SIZE), minimap=(MAP_SIZE, MAP_SIZE) ), rgb_dimensions=sc2_env.Dimensions( screen=(RGB_SCREEN_WIDTH, RGB_SCREEN_HEIGHT), minimap=(RGB_SCREEN_WIDTH, RGB_SCREEN_HEIGHT), ), action_space=actions.ActionSpace.FEATURES) else: interface = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(MAP_SIZE, MAP_SIZE), minimap=(MAP_SIZE, MAP_SIZE) ), action_space=actions.ActionSpace.FEATURES) env_args = { 'agent_interface_format': interface, 'map_name': MAP_NAME, 'step_mul': FIVE_SECONDS, # 17 is ~1 action per second 'players': players, } maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, env_args['map_name'], players=num_players) return sc2_env.SC2Env(**env_args)
def __init__(self, map_name = None): if map_name is None: map_name = MAP_NAME maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, map_name) self.sc2_env = sc2_env.SC2Env( map_name = map_name, players = [sc2_env.Agent(sc2_env.Race.protoss)], agent_interface_format = features.AgentInterfaceFormat( feature_dimensions = features.Dimensions(screen = SCREEN_SIZE, minimap = 30), action_space = actions.ActionSpace.FEATURES, camera_width_world_units = 28 ), step_mul = 16, game_steps_per_episode = 0, score_index = 0, visualize = True,) # lib.renderer_human.zoom(1.5) self.current_obs = None self.actions_taken = 0 self.decomposed_rewards_all = [] self.decomposed_rewards = [] self.decomposed_rewards_mark = 0 self.signal_of_finished = 1 self.last_state = None ''' self.enemy_type_number_dict = { 101 : 'damageToMarine', 102 : 'damageByMarine', 103 : 'damageToZergling', 104 : 'damageByZergling', 105 : 'damageToMarauder', 106 : 'damageByMarauder', 107 : 'damageToHydralisk', 108 : 'damageByHydralisk', 109 : 'damageToThor', 110 : 'damageByThor', 111 : 'damageToUltralisk', 112 : 'damageByUltralisk', 113 : 'penalty' } ''' self.decomposed_reward_dict = { 'damageToMarine' : 0, 'damageByMarine' : 0, 'damageToZergling' : 0, 'damageByZergling' : 0, 'damageToMarauder' : 0, 'damageByMarauder' : 0, 'damageToHydralisk' : 0, 'damageByHydralisk' : 0, 'damageToThor' : 0, 'damageByThor' : 0, 'damageToUltralisk' : 0, 'damageByUltralisk' : 0, 'penalty' : 0 }
def make_sc2env(map_name, render=False, screen_size=RGB_SCREEN_SIZE, map_size=MAP_SIZE): rgb_dimensions = False if render: rgb_dimensions = sc2_env.Dimensions(screen=(screen_size, screen_size), minimap=(screen_size, screen_size)) env_args = { 'agent_interface_format': sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=(map_size, map_size), minimap=(map_size, map_size)), rgb_dimensions=rgb_dimensions, action_space=actions.ActionSpace.FEATURES, ), 'map_name': map_name, 'step_mul': SIMULATION_STEP_MUL, } maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, env_args['map_name']) return sc2_env.SC2Env(**env_args)
def __init__(self, generate_xai_replay): print( "================================WITHOUT RGB DIMENSION CONFIG ====================================" ) maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, MAP_NAME) aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=40, minimap=30), action_space=actions.ActionSpace.FEATURES, ) step_mul_value = 16 if generate_xai_replay: aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=40, minimap=30), rgb_dimensions=sc2_env.Dimensions( screen=(1520, 1280), #screen=(2048, 2048), #screen=(256, 256), minimap=(64, 64), ), action_space=actions.ActionSpace.FEATURES, ) step_mul_value = 8 self.sc2_env = sc2_env.SC2Env( map_name=MAP_NAME, players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=aif, step_mul=step_mul_value, game_steps_per_episode=0, score_index=0, visualize=True) self.current_obs = None self.actions_taken = 0 self.last_mineral_count = 0 self.reward = 0 self.zergling_count = 0 self.roach_count = 0 self.last_reward = 0 self.last2_reward = 0 self.rewards = [] self.decomposed_rewards = [0, 0] self.last_losses = 0
def make_sc2env(): env_args = { 'agent_interface_format': sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=(MAP_SIZE, MAP_SIZE), minimap=(MAP_SIZE, MAP_SIZE)), rgb_dimensions=sc2_env.Dimensions( screen=(RGB_SCREEN_SIZE, RGB_SCREEN_SIZE), minimap=(RGB_SCREEN_SIZE, RGB_SCREEN_SIZE), ), action_space=actions.ActionSpace.FEATURES, ), 'map_name': MAP_NAME, 'step_mul': 170, # 17 is ~1 action per second } maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, env_args['map_name']) return sc2_env.SC2Env(**env_args)
def __init__(self, reward_types, map_name = None): if map_name is None: map_name = MAP_NAME maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') print(maps_dir) register_map(maps_dir, map_name) self.sc2_env = sc2_env.SC2Env( map_name = map_name, players = [sc2_env.Agent(sc2_env.Race.protoss)], agent_interface_format = features.AgentInterfaceFormat( feature_dimensions = features.Dimensions(screen = SCREEN_SIZE, minimap = 30), action_space = actions.ActionSpace.FEATURES, camera_width_world_units = 28 ), step_mul = 16, game_steps_per_episode = 0, score_index = 0, visualize = True,) # lib.renderer_human.zoom(1.5) self.current_obs = None self.actions_taken = 0 self.decomposed_rewards_all = [] self.decomposed_rewards = [] self.decomposed_rewards_mark = 0 self.signal_of_finished = 1 self.end_state = None self.reward_types = reward_types self.decomposed_reward_dict = {} for rt in reward_types: self.decomposed_reward_dict[rt] = 0 self.input_screen_features = { "PLAYER_RELATIVE":[1, 3, 4], "UNIT_TYPE": [48, 105, 73, 83, 52, 109, 51, 107], 'HIT_POINT': 0, 'HIT_POINT_RATIO': 0 } '''
def __init__(self, map_name=None, unit_type=[], generate_xai_replay=False, xai_replay_dimension=256, verbose=False): if map_name is None: map_name = MAP_NAME maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') print("map director: " + str(maps_dir)) register_map(maps_dir, map_name) if generate_xai_replay: aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), rgb_dimensions=sc2_env.Dimensions( screen=(1.5 * xai_replay_dimension, xai_replay_dimension), minimap=(64, 64), ), action_space=actions.ActionSpace.FEATURES, camera_width_world_units=28, #use_camera_position = True, ) step_mul_value = 4 # step_mul_value = 16 else: aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), action_space=actions.ActionSpace.FEATURES, camera_width_world_units=100, ) step_mul_value = 16 np.set_printoptions(threshold=sys.maxsize, linewidth=sys.maxsize, precision=2) self.sc2_env = sc2_env.SC2Env( map_name=map_name, agent_interface_format=aif, step_mul=step_mul_value, game_steps_per_episode=0, score_index=0, visualize=True, ) self.current_obs = None self.decomposed_rewards = [] self.verbose = verbose self.miner_index = 0 self.reset_steps = -1 self.mineral_limiation = 1500 self.norm_vector = np.array([ 1500, # Player 1 unspent minerals 30, 30, 10, # Player 1 top lane building 30, 30, 10, # Player 1 bottom lane building 3, # Player 1 pylons 30, 30, 10, # Player 2 top lane building 30, 30, 10, # Player 2 bottom lane building 3, # Player 2 pylons 30, 30, 10, # Player 1 units top lane grid 1 30, 30, 10, # Player 1 units top lane grid 2 30, 30, 10, # Player 1 units top lane grid 3 30, 30, 10, # Player 1 units top lane grid 4 30, 30, 10, # Player 1 units bottom lane grid 1 30, 30, 10, # Player 1 units bottom lane grid 2 30, 30, 10, # Player 1 units bottom lane grid 3 30, 30, 10, # Player 1 units bottom lane grid 4 30, 30, 10, # Player 2 units top lane grid 1 30, 30, 10, # Player 2 units top lane grid 2 30, 30, 10, # Player 2 units top lane grid 3 30, 30, 10, # Player 2 units top lane grid 4 30, 30, 10, # Player 2 units bottom lane grid 1 30, 30, 10, # Player 2 units bottom lane grid 2 30, 30, 10, # Player 2 units bottom lane grid 3 30, 30, 10, # Player 2 units bottom lane grid 4 2000, 2000, # Player 1 Nexus HP (top, bottom) 2000, 2000, # Player 2 Nexus HP (top, bottom) 40 ]) # Wave Number self.decision_point = 1 self.signal_of_end = False self.end_state = None self.maker_cost_np = np.zeros(len(maker_cost)) # Have to change the combine func if this changed self.pylon_cost = 300 self.pylon_index = 7 for i, mc in enumerate(maker_cost.values()): self.maker_cost_np[i] = mc self.last_decomposed_reward_dict = {} self.decomposed_reward_dict = {} self.num_waves = 0 maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') action_dict_path = os.path.join(os.path.dirname(__file__), 'action_1500_tow_2L.pt') print("actions path:" + action_dict_path) self.a_dict = torch.load(action_dict_path) self.action_space = self.a_dict['actions'] self.action_space_dict = self.a_dict['mineral'] # print(self.a_dict.keys()) # at the end of the reward type name: # 1 means for player 1 is positive, for player 2 is negative # 2 means for player 2 is positive, for player 1 is negative self.reward_types = list(reward_dict.values()) # print(self.reward_types) for rt in self.reward_types: self.decomposed_reward_dict[rt] = 0 self.last_decomposed_reward_dict[rt] = 0 unit_type = [ UNIT_TYPES['Marine'], UNIT_TYPES['Baneling'], UNIT_TYPES['Immortal'] ]
def __init__(self, reward_types, map_name = None, unit_type = [], generate_xai_replay = False, xai_replay_dimension = 256, verbose = False): if map_name is None: map_name = MAP_NAME maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') print("map director: " + str(maps_dir)) register_map(maps_dir, map_name) if generate_xai_replay: aif=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), rgb_dimensions=sc2_env.Dimensions( screen=(xai_replay_dimension, xai_replay_dimension), minimap=(64, 64), ), action_space=actions.ActionSpace.FEATURES, camera_width_world_units = 28, #use_camera_position = True, ) step_mul_value = 4 else: aif=features.AgentInterfaceFormat( feature_dimensions = features.Dimensions(screen = SCREEN_SIZE, minimap = SCREEN_SIZE), action_space = actions.ActionSpace.FEATURES, camera_width_world_units = 100, ) step_mul_value = 16 np.set_printoptions(threshold=sys.maxsize,linewidth=sys.maxsize, precision = 1) self.sc2_env = sc2_env.SC2Env( map_name = map_name, agent_interface_format = aif, step_mul = step_mul_value, game_steps_per_episode = 0, score_index = 0, visualize = True,) self.current_obs = None self.actions_taken = 0 self.decomposed_rewards = [] self.verbose = verbose self.decision_point = 1 self.miner_index = 10 self.reset_steps = -1 self.fifo_player_1 = [] self.fifo_player_2 = [] self.building_limiation = 30 self.mineral_limiation = 1500 self.norm_vector = np.array([1, 1, 1, 1, 100, 1, 1, 1, 1, 100, 100, 1, 1, 1, 1, 1, 1]) self.signal_of_end = False self.end_state = None self.maker_cost_np = np.zeros(len(maker_cost)) for i, mc in enumerate(maker_cost.values()): self.maker_cost_np[i] = mc self.reward_types = reward_types self.last_decomposed_reward_dict = {} self.decomposed_reward_dict = {} maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') action_dict_path = os.path.join(os.path.dirname(__file__), 'action_tug_of_war.pt') print("actions path:" + action_dict_path) self.a_dict = torch.load(action_dict_path) self.a_dict['actions'] = np.array(self.a_dict['actions']) # print(self.a_dict.keys()) for rt in reward_types: self.decomposed_reward_dict[rt] = 0 self.last_decomposed_reward_dict[rt] = 0 unit_type = [UNIT_TYPES['Marine'], UNIT_TYPES['Viking'], UNIT_TYPES['Colossus']] self.input_screen_features = { "PLAYER_RELATIVE":[1, 4], "UNIT_TYPE": unit_type, 'HIT_POINT': 0, 'HIT_POINT_RATIO': 0, 'SHIELD': 0, 'SHIELD_RATIO': 0, 'UNIT_DENSITY': 0 }
def __init__(self, reward_types, map_name=None, unit_type=[83, 52, 48], generate_xai_replay=False, xai_replay_dimension=256): if map_name is None: map_name = MAP_NAME maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') print("map director: " + str(maps_dir)) register_map(maps_dir, map_name) aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), action_space=actions.ActionSpace.FEATURES, camera_width_world_units=28) step_mul_value = 16 if generate_xai_replay: aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), rgb_dimensions=sc2_env.Dimensions( screen=(xai_replay_dimension, xai_replay_dimension), minimap=(64, 64), ), action_space=actions.ActionSpace.FEATURES, camera_width_world_units=28, #use_camera_position = True, ) step_mul_value = 4 self.sc2_env = sc2_env.SC2Env( map_name=map_name, players=[sc2_env.Agent(sc2_env.Race.protoss)], agent_interface_format=aif, step_mul=step_mul_value, game_steps_per_episode=0, score_index=0, visualize=True, ) self.current_obs = None self.actions_taken = 0 self.decomposed_rewards_all = [] self.decomposed_rewards = [] self.decomposed_rewards_mark = 0 self.signal_of_finished = 1 self.end_state = None #self.agentInterfaceFormat = features.AgentInterfaceFormat() self.reward_types = reward_types self.decomposed_reward_dict = {} for rt in reward_types: self.decomposed_reward_dict[rt] = 0 self.input_screen_features = { "PLAYER_RELATIVE": [1, 3, 4], "UNIT_TYPE": unit_type, 'HIT_POINT': 0, 'HIT_POINT_RATIO': 0 }
def __init__(self, reward_types, map_name=None, unit_type=[], generate_xai_replay=False, xai_replay_dimension=256, verbose=False): if map_name is None: map_name = MAP_NAME maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, map_name) if generate_xai_replay: aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), rgb_dimensions=sc2_env.Dimensions( screen=(xai_replay_dimension, xai_replay_dimension), minimap=(64, 64), ), action_space=actions.ActionSpace.FEATURES, camera_width_world_units=28, #use_camera_position = True, ) step_mul_value = 4 else: aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), action_space=actions.ActionSpace.FEATURES, camera_width_world_units=50, ) step_mul_value = 16 self.sc2_env = sc2_env.SC2Env( map_name=map_name, agent_interface_format=aif, step_mul=step_mul_value, game_steps_per_episode=0, score_index=0, visualize=True, ) self.current_obs = None self.actions_taken = 0 self.decomposed_rewards = [] self.verbose = verbose self.signal_of_end = False self.end_state = None self.get_income_signal = 2 self.reward_types = reward_types self.last_decomposed_reward_dict = {} self.decomposed_reward_dict = {} for rt in reward_types: self.decomposed_reward_dict[rt] = 0 self.last_decomposed_reward_dict[rt] = 0 unit_type = [ UNIT_TYPES['Marine'], UNIT_TYPES['Viking'], UNIT_TYPES['Colossus'] ] self.input_screen_features = { "PLAYER_RELATIVE": [1, 4], "UNIT_TYPE": unit_type, 'HIT_POINT': 0, 'HIT_POINT_RATIO': 0, 'SHIELD': 0, 'SHIELD_RATIO': 0, 'UNIT_DENSITY': 0 }