def __init__(self, HOST, PORT, debug=False):
     self.minerEnv = MinerEnv(HOST, PORT)
     self.minerEnv.start()
     self.action_space = spaces.Discrete(5)
     self.observation_space = spaces.Discrete(198)
     self.debug = debug
     self.view = None
     self.ob = None
     self.state = self.minerEnv.state
Example #2
0
	def __init__(self, host, port, debug = False):
		super(TFAgentsMiner, self).__init__()

		self.miner_env= MinerEnv(host, port)
		self.miner_env.start()
		self.debug = debug
		
		self._action_spec = array_spec.BoundedArraySpec(shape = (), dtype = np.int32, minimum = 0, maximum = 5, name = 'action')
		self._observation_spec = array_spec.BoundedArraySpec(shape = (MAP_MAX_X*5,MAP_MAX_Y*5,6), 
			dtype = np.float32, name = 'observation')
Example #3
0
 def __init__(self, HOST, PORT, debug=False):
     self.minerEnv = MinerEnv(HOST, PORT)
     self.minerEnv.start()
     self.action_space = spaces.Discrete(6)
     self.observation_space = spaces.Discrete(198)
     self.action = None
     self.reward = None
     self.ob = None
     self.view = None
     self.state = self.minerEnv.state
     self.maxstep = self.minerEnv.state.mapInfo.maxStep
     self.img_array = []
Example #4
0
 def __init__(self, env_config):
     self.env = MinerEnv(None, None)
     self.env.start()
     self.state = self.env.state
     self.width = 21
     self.height = 9
     self.action_space = Discrete(6)
     self.observation_space = Tuple((
         Box(low=0, high=np.inf, shape=(self.width, self.height, 1)),
         Box(low=-np.inf, high=np.inf, shape=(4,)),
         Box(low=-2, high=1, shape=(4,)),
     ))
Example #5
0
class MultiAgentsEnv(MultiAgentEnv):
    def __init__(self, env_config):
        self.env = MinerEnv(None, None)
        self.env.start()
        self.state = self.env.state
        self.width = 21
        self.height = 9
        self.action_space = Discrete(6)
        self.observation_space = Tuple((
            Box(low=0, high=np.inf, shape=(self.width, self.height, 1)),
            Box(low=-np.inf, high=np.inf, shape=(4,)),
            Box(low=-2, high=1, shape=(4,)),
        ))

    def reset(self):
        map_id = np.random.randint(1, 7)
        pos_x = np.random.randint(self.width)
        pos_y = np.random.randint(self.height)
        number_of_players = np.random.randint(1, 5)
        self.env.send_map_info(map_id, pos_x, pos_y,
                               number_of_players=number_of_players)
        self.env.reset()
        ids = list(range(2, 1 + number_of_players))
        self.bots = []
        if number_of_players > 1:
            for _ in range(np.random.randint(1, number_of_players)):
                if random.choice([1, 1, 2]) == 1:
                    self.bots.append(Bot1(ids.pop(random.choice(range(len(ids))))))
                else:
                    self.bots.append(Bot2(ids.pop(random.choice(range(len(ids)))), gamma=random.choice([1.0])))
        return self.get_state()

    def step(self, action):
        for bot in self.bots:
            action[str(bot.id)] = bot.compute_action(self.state)
        self.env.step(action)
        return self.get_state(), self.get_reward(), self.get_done(), {}

    def get_state(self):
        # Building the map
        view = np.zeros([self.width, self.height, 1], dtype=float)
        for obstacle in self.state.mapInfo.obstacles:
            obstacle_type = obstacle['type']
            x = obstacle['posx']
            y = obstacle['posy']
            value = obstacle['value']
            if obstacle_type == 3:
                if value == -5:
                    obstacle_type = 4
                elif value == -20:
                    obstacle_type = 5
                elif value == -40:
                    obstacle_type = 6
                elif value == -100:
                    obstacle_type = 7
                else:
                    raise Exception('No such obstacle')
            view[x, y, 0] = obstacle_type

        for gold in self.state.mapInfo.golds:
            gold_amount = gold['amount']
            x = gold['posx']
            y = gold['posy']
            if gold_amount > 0:
                view[x, y, 0] = min(7 + math.ceil(gold_amount / 50), 37)

        return {
            str(player_id): self.get_single_player_state(np.copy(view), player_id)
            for player_id in self.state.players.keys()
        }

    def get_single_player_state(self, view, playerId):
        players_pos = np.full(4, -1, dtype=int)
        energies = np.zeros(4)
        i = 1
        for player_id, player_state in self.state.players.items():
            x = player_state['posx']
            y = player_state['posy']
            if x < view.shape[0] and y < view.shape[1]:
                if player_id == playerId:
                    players_pos[0] = x * self.height + y
                    energies[0] = player_state['energy'] / 50
                else:
                    players_pos[i] = x * self.height + y
                    energies[i] = player_state['energy'] / 50
                    i += 1

        return (
            view,
            players_pos,
            energies,
        )

    def get_reward(self):
        return {
            str(player_id): self.get_single_player_reward(player_id)
            for player_id in self.state.players.keys()
        }
    
    def get_single_player_reward(self, playerId):
        # Calculate reward
        reward = 0
        player = self.state.players[playerId]
        player_pre = self.state.players_pre[playerId]
        score_action = player['score'] - player_pre['score']
        if score_action > 0:
            reward += score_action / 50

        consumed_energy = player_pre['energy'] - player['energy']
        if Action(player['lastAction']) == Action.CRAFT and consumed_energy == 10:
            reward += -1.0

        if player['status'] == self.state.STATUS_ELIMINATED_OUT_OF_ENERGY:
            reward += -1.0
        
        if Action(player['lastAction']) == Action.FREE and player_pre['energy'] == 50:
            reward += -0.1

        return reward
    
    def get_done(self):
        done = {'__all__': False}
        if all(map(lambda player_state: player_state['status'] != 0, self.state.players.values())):
            done['__all__'] = True
        return done
Example #6
0
# Create header for saving learning file
"""
now = datetime.datetime.now() #Getting the latest datetime
header = ["Ep", "Step", "Reward", "Total_reward", "Action", "Epsilon", "Done", "Termination_Code"] #Defining header for the save file
filename = "Data/data_" + now.strftime("%Y%m%d-%H%M") + ".csv" 
with open(filename, 'w') as f:
    pd.DataFrame(columns=header).to_csv(f, encoding='utf-8', index=False, header=True)
"""

# Initialize environment
HOST = "localhost"
PORT = 1111
if len(sys.argv) == 3:
    HOST = str(sys.argv[1])
    PORT = int(sys.argv[2])
minerEnv = MinerEnv(HOST, PORT)
minerEnv.start()
#train = False #The variable is used to indicate that the epsilon starts to decrease.

#Training Process
if __name__ == '__main__':
    gnet = Net(INPUTNUM, ACTIONNUM)  # global network
    gnet.share_memory()  # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters(), lr=1e-4,
                     betas=(0.95, 0.999))  # global optimizer
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    # parallel training
    workers = [
Example #7
0
BATCH_SIZE = 32  #The number of experiences for each replay
MEMORY_SIZE = 100000  #The size of the batch for storing experiences
SAVE_NETWORK = 100  # After this number of episodes, the DQN model is saved for testing later.
INITIAL_REPLAY_SIZE = 1000  #The number of experiences are stored in the memory batch before starting replaying
INPUTNUM = 198  #The number of input values for the DQN model
ACTIONNUM = 6  #The number of actions output from the DQN model
MAP_MAX_X = 21  #Width of the Map
MAP_MAX_Y = 9  #Height of the Map

# Initialize a DQN model and a memory batch for storing experiences
DQNAgent = DQN(INPUTNUM, ACTIONNUM)
memory = Memory(MEMORY_SIZE)

# Initialize environment
minerEnv = MinerEnv(
    HOST, PORT
)  #Creating a communication environment between the DQN model and the game environment (GAME_SOCKET_DUMMY.py)
minerEnv.start()  # Connect to the game

train = False  #The variable is used to indicate that the replay starts, and the epsilon starts decrease.
#Training Process
#the main part of the deep-q learning agorithm
for episode_i in range(0, N_EPISODE):
    try:
        # Choosing a map in the list
        mapID = np.random.randint(
            1, 6)  #Choosing a map ID from 5 maps in Maps folder randomly
        posID_x = np.random.randint(
            MAP_MAX_X
        )  #Choosing a initial position of the DQN agent on X-axes randomly
        posID_y = np.random.randint(
policy = TD3_conv.TD3(**kwargs)
policy_file = "TD3_conv_Miner_0_2_scale3"
policy.load(f"./models_TD3_tensor/{policy_file}")
print("Loaded model from disk")
status_map = {
    0: "STATUS_PLAYING",
    1: "STATUS_ELIMINATED_WENT_OUT_MAP",
    2: "STATUS_ELIMINATED_OUT_OF_ENERGY",
    3: "STATUS_ELIMINATED_INVALID_ACTION",
    4: "STATUS_STOP_EMPTY_GOLD",
    5: "STATUS_STOP_END_STEP"
}
total_reward = 0
try:
    # Initialize environment
    minerEnv = MinerEnv(HOST, PORT)
    minerEnv.start()  # Connect to the game
    mapID = np.random.randint(
        1, 6)  #Choosing a map ID from 5 maps in Maps folder randomly
    posID_x = np.random.randint(
        MAP_MAX_X
    )  #Choosing a initial position of the DQN agent on X-axes randomly
    posID_y = np.random.randint(
        MAP_MAX_Y
    )  #Choosing a initial position of the DQN agent on Y-axes randomly
    #Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent
    request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) +
               ",50,100")
    #Send the request to the game environment (GAME_SOCKET_DUMMY.py)
    minerEnv.send_map_info(request)
    minerEnv.reset()
json_file.close()
DQNAgent = model_from_json(loaded_model_json)
# load weights into new model
DQNAgent.load_weights("DQNmodel_Test.h5")
print("Loaded model from disk")
status_map = {
    0: "STATUS_PLAYING",
    1: "STATUS_ELIMINATED_WENT_OUT_MAP",
    2: "STATUS_ELIMINATED_OUT_OF_ENERGY",
    3: "STATUS_ELIMINATED_INVALID_ACTION",
    4: "STATUS_STOP_EMPTY_GOLD",
    5: "STATUS_STOP_END_STEP"
}
try:
    # Initialize environment
    minerEnv = MinerEnv(HOST, PORT)
    minerEnv.start()  # Connect to the game
    minerEnv.reset()
    state_map, state_users = minerEnv.get_state(
        initial_flag=True)  ##Getting an initial state
    while not minerEnv.check_terminate():
        try:
            if minerEnv.state.mapInfo.gold_amount(minerEnv.state.x,
                                                  minerEnv.state.y) > 0:
                if minerEnv.state.energy <= 5:
                    action = 4
                else:
                    action = 5
            else:
                action = np.argmax(
                    DQNAgent.predict({
Example #10
0
class MinerGymEnv(gym.Env):
    def __init__(self, HOST, PORT, debug=False):
        self.minerEnv = MinerEnv(HOST, PORT)
        self.minerEnv.start()
        self.action_space = spaces.Discrete(6)
        self.observation_space = spaces.Discrete(198)
        self.action = None
        self.reward = None
        self.ob = None
        self.view = None
        self.state = self.minerEnv.state
        self.maxstep = self.minerEnv.state.mapInfo.maxStep
        self.img_array = []

    def step(self, action):
        self.minerEnv.step(str(action))
        reward = self.get_reward()
        ob = self.get_state()
        episode_over = self.check_terminate()
        self.ob = ob
        self.action = action
        self.reward = reward
        return ob, reward, episode_over, {
            'score': self.minerEnv.state.score,
            'action': action
        }

    def render(self, mode='human'):
        img = cv2.imread("/content/map2.png")
        for player in self.minerEnv.state.players:
            if player['playerId'] in [1, 2]:
                id = player['playerId']
                score = player['score']
                engergy = player['energy']
                free_count = player['freeCount']
                last_action = ACTIONS[player['lastAction']]
                # last_action = ACTIONS[self.action]
                status = player['status']

                x = player['posx']
                y = player['posy']

                if x >= 21 or y >= 9:
                    continue
                pos_img = (36 + x * 71, 36 + y * 71)
                cv2.circle(img, pos_img, 16, COLORS_ID[id], -1)
        self.img_array.append(img)

    def reset(self):
        mapID = 1
        posID_x = np.random.randint(21)
        posID_y = np.random.randint(9)
        request = ("map" + str(mapID) + "," + str(posID_x) + "," +
                   str(posID_y) + ",50,100")
        self.minerEnv.send_map_info(request)
        self.minerEnv.reset()
        state = self.get_state()
        return state

    def check_terminate(self):
        return self.minerEnv.check_terminate()

    def get_reward(self):
        return self.minerEnv.get_reward()

    def get_state(self):
        view = np.zeros(
            [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1],
            dtype=int)
        for i in range(self.state.mapInfo.max_x + 1):
            for j in range(self.state.mapInfo.max_y + 1):
                if self.state.mapInfo.get_obstacle(i, j) == TreeID:  # Tree
                    view[i, j] = -TreeID
                if self.state.mapInfo.get_obstacle(i, j) == TrapID:  # Trap
                    view[i, j] = -TrapID
                if self.state.mapInfo.get_obstacle(i, j) == SwampID:  # Swamp
                    view[i, j] = -SwampID
                if self.state.mapInfo.gold_amount(i, j) > 0:
                    view[i, j] = self.state.mapInfo.gold_amount(i, j)

        self.view = view
        return self.minerEnv.get_state()

    def close(self):
        self.minerEnv.end()

    def start(self):

        self.minerEnv.start()
Example #11
0
        policy = newDDPG.DDPG(**kwargs)
    if args.policy == "TD3_conv":
        policy = TD3_conv.TD3(**kwargs)
    if args.policy == "A2C":
        policy = A2C.A2C(**kwargs)

    if args.load_model != "":
        policy_file = file_name if args.load_model == "default" else args.load_model
        policy.load(f"./models/{policy_file}")

    replay_buffer = utils.ReplayBuffer(state_dim,
                                       action_dim=action_dim,
                                       max_size=int(10000))

    # Initialize environment
    minerEnv = MinerEnv(HOST, PORT)
    minerEnv.start()
    #init environment

    # Evaluate untrained policy
    #evaluations = [eval_policy(policy, minerEnv)]
    train = False
    best_score = {1: 0, 2: 0, 3: 0, 4: 0}
    for episode_i in range(0, N_EPISODE):
        # Reset environment
        mapID = request_to_env(minerEnv, train)
        # init environment game
        minerEnv.reset()
        #action = policy.select_action(np.array(state))
        state = minerEnv.get_state_tensor2(scale_map)
        done = False
Example #12
0
json_file.close()
DQNAgent = model_from_json(loaded_model_json)
# load weights into new model
DQNAgent.load_weights("RLModelSample.h5")
print("Loaded model from disk")
status_map = {
    0: "STATUS_PLAYING",
    1: "STATUS_ELIMINATED_WENT_OUT_MAP",
    2: "STATUS_ELIMINATED_OUT_OF_ENERGY",
    3: "STATUS_ELIMINATED_INVALID_ACTION",
    4: "STATUS_STOP_EMPTY_GOLD",
    5: "STATUS_STOP_END_STEP"
}
try:
    # Initialize environment
    minerEnv = MinerEnv(HOST, PORT)
    minerEnv.start()  # Connect to the game
    minerEnv.reset()
    s = minerEnv.get_state()  ##Getting an initial state
    while not minerEnv.check_terminate():
        try:
            action = np.argmax(DQNAgent.predict(s.reshape(
                1, len(s))))  # Getting an action from the trained model
            print("next action = ", action)
            minerEnv.step(
                str(action)
            )  # Performing the action in order to obtain the new state
            s_next = minerEnv.get_state()  # Getting a new state
            s = s_next
        except Exception as e:
            import traceback
Example #13
0
    mem_size=50000,
    eps_min=0.1,
    replace=1000,
    eps_dec=1e-5,
    chkpt_dir="models/",
    algo="dqnagent",
    env_name="minerai",
    gamma=0.99,
    epsilon=1,
    lr=0.00001,
)
if load_checkpoint:
    DQNAgent.load_models()

# Initialize environment
minerEnv = MinerEnv(HOST, PORT)
minerEnv.start()

fname = (DQNAgent.algo + "_" + DQNAgent.env_name + "_lr" + str(DQNAgent.lr) +
         "_" + str(N_EPISODE) + "games")
figure_file = "plots/" + fname + ".png"

n_steps = -100
scores, eps_history, steps_array = [], [], []

# Training Process
# the main part of the deep-q learning agorithm

best_score = -100
for episode_i in range(0, N_EPISODE):
    try:
Example #14
0
    kwargs1 = {
        "state_dim": 28,
        "action_dim": 6,
        "max_action": 1.0,
    }
    policy_bot = TD3_bot.TD3(**kwargs1)
    policy_bot_file = "TD3_Miner_0_2"
    policy_bot.load(f"./ref_policy/models/{policy_bot_file}")
    #####

    replay_buffer = utils.ReplayBuffer(state_dim,
                                       action_dim=1,
                                       max_size=int(10000))

    # Initialize environment
    minerEnv = MinerEnv(HOST, PORT)
    minerEnv.start()
    #init environment

    # Evaluate untrained policy
    #evaluations = [eval_policy(policy, minerEnv, scale_map)]
    train = False
    best_score = {1: 0, 2: 0, 3: 0, 4: 0}
    score, best_sc = [], 0
    for episode_i in range(0, N_EPISODE):
        # Reset environment
        mapID = request_to_env(minerEnv, True)
        # init environment game
        minerEnv.reset()
        #action = policy.select_action(np.array(state))
        #state = np.reshape([minerEnv.get_state_tensor(scale_map)], (6, INPUTNUM[1], INPUTNUM[2]))
Example #15
0
class TFAgentsMiner(pyenv.PyEnvironment):
	def __init__(self, host, port, debug = False):
		super(TFAgentsMiner, self).__init__()

		self.miner_env= MinerEnv(host, port)
		self.miner_env.start()
		self.debug = debug
		
		self._action_spec = array_spec.BoundedArraySpec(shape = (), dtype = np.int32, minimum = 0, maximum = 5, name = 'action')
		self._observation_spec = array_spec.BoundedArraySpec(shape = (MAP_MAX_X*5,MAP_MAX_Y*5,6), 
			dtype = np.float32, name = 'observation')

	def action_spec(self):
		return self._action_spec

	def observation_spec(self):
		return self._observation_spec

	def _reset(self):
		mapID = np.random.randint(1, 6)
		posID_x = np.random.randint(MAP_MAX_X)
		posID_y = np.random.randint(MAP_MAX_Y)
		request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) + ",50,100")
		self.miner_env.send_map_info(request)
		self.miner_env.reset()
		observation = self.miner_env.get_state()

		return time_step.restart(observation)

	def _log_info(self):
		info = self.miner_env.socket

		# print(f'Map size:{self.info.user.max_x, self.miner_env.state.mapInfo.max_y}')
		print(f"Self  - Pos ({info.user.posx}, {info.user.posy}) - Energy {info.user.energy} - Status {info.user.status}")
		for bot in info.bots:
			print(f"Enemy  - Pos ({bot.info.posx}, {bot.info.posy}) - Energy {bot.info.energy} - Status {bot.info.status}")
				
	def _step(self, action):
		if self.debug:
			self._log_info()
			
		self.miner_env.step(str(action))
		observation = self.miner_env.get_state()
		reward = self.miner_env.get_reward()

		if not self.miner_env.check_terminate():
			return time_step.transition(observation, reward)
		else:
			self.reset()
			return time_step.termination(observation, reward)

	def render(self):
		pass
Example #16
0
                  n_actions=ACTION_NUM,
                  mem_size=MEMORY_SIZE,
                  eps_min=0.05,
                  batch_size=BATCH_SIZE,
                  replace=10000,
                  eps_dec=1e-5,
                  chkpt_dir=FILE_PATH + '/weights/',
                  algo='DDQNAgent',
                  env_name='miner')

load_checkpoint = False
if load_checkpoint:
    agent.load_models()
n_games = 20000
minerEnv = MinerEnv(
    HOST, PORT
)  #Creating a communication environment between the DQN model and the game environment (GAME_SOCKET_DUMMY.py)
minerEnv.start()  # Connect to the game
path = FILE_PATH + '/Maps/'

fname = agent.algo + '_' + agent.env_name + '_lr' + str(agent.lr) +'_' \
            + str(n_games) + 'games'

figure_file = FILE_PATH + '/' + fname + '.png'
best_score = -np.inf

n_steps = 0
scores, eps_history, steps_array = [], [], []

for episode_i in range(n_games):
    done = False
class MinerGymEnv(gym.Env):
    def __init__(self, HOST, PORT, debug=False):
        self.minerEnv = MinerEnv(HOST, PORT)
        self.minerEnv.start()
        self.action_space = spaces.Discrete(5)
        self.observation_space = spaces.Discrete(198)
        self.debug = debug
        self.view = None
        self.ob = None
        self.state = self.minerEnv.state

    def print(self, message):
        if self.debug:
            print(message)

    def draw_text(self, mat, text):
        cv2_im_rgb = cv2.cvtColor(mat, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        draw = ImageDraw.Draw(pil_im)

        draw.text((10, 10), text, font=font)

        cv2_im_processed = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)
        return cv2_im_processed
        # cv2.imwrite("result.png", cv2_im_processed)

    def step(self, action):
        self.minerEnv.step(str(action))
        self.status = self.minerEnv.get_state()
        reward = self.get_reward()
        ob = self.get_state()
        episode_over = self.check_terminate()
        self.ob = ob
        if self.debug:
            self.render()

        return ob, reward, episode_over, {}

    def check_terminate(self):
        return self.minerEnv.check_terminate()

    def send_map_info(self, request):
        return self.minerEnv.send_map_info(request)

    def get_state(self):
        view = np.zeros(
            [self.state.mapInfo.max_x + 1, self.state.mapInfo.max_y + 1],
            dtype=int)
        for i in range(self.state.mapInfo.max_x + 1):
            for j in range(self.state.mapInfo.max_y + 1):
                if self.state.mapInfo.get_obstacle(i, j) == TreeID:  # Tree
                    view[i, j] = -TreeID
                if self.state.mapInfo.get_obstacle(i, j) == TrapID:  # Trap
                    view[i, j] = -TrapID
                if self.state.mapInfo.get_obstacle(i, j) == SwampID:  # Swamp
                    view[i, j] = -SwampID
                if self.state.mapInfo.gold_amount(i, j) > 0:
                    view[i, j] = self.state.mapInfo.gold_amount(i, j)

        self.view = view
        return self.minerEnv.get_state()

    def reset(self):

        mapID = np.random.randint(1, 6)
        posID_x = np.random.randint(MAP_MAX_X)
        posID_y = np.random.randint(MAP_MAX_Y)
        request = ("map" + str(mapID) + "," + str(posID_x) + "," +
                   str(posID_y) + ",50,100")
        self.minerEnv.send_map_info(request)
        state = self.get_state()
        self.minerEnv.reset()
        return state

    def render(self, mode='human'):
        if self.view is None:
            return
        h, w = self.view.shape
        mat = np.zeros(shape=(h, w, 3), dtype=np.uint8)

        mat[self.view == -1, 1] = 153
        mat[self.view == -3, 1] = 53
        mat[self.view == -2, 0] = 153

        mat[self.view > 0, 1:3] = np.array(
            [self.view[self.view > 0], self.view[self.view > 0]]).T
        remaining_gold = sum(self.view[self.view > 0].flatten())
        t = PrettyTable(['ID', 'Score', 'Engergy', 'Free count'])
        for player in self.minerEnv.state.players:
            id = player['playerId']
            score = player['score']
            engergy = player['energy']
            free_count = player['freeCount']

            x = player['posx']
            y = player['posy']

            if x >= h or y >= w:
                continue

            if player['playerId'] == self.minerEnv.state.id:
                mat[x, y, :] = 255
                t.add_row(['player', score, engergy, free_count])
            else:
                mat[x, y, 2] = 153
                t.add_row(['bot {}'.format(id), score, engergy, free_count])

        blank = np.zeros(shape=(h * 38, w * 38, 3), dtype=np.uint8)
        z = 'Remaining gold: {}\n'.format(remaining_gold)
        z += t.get_string()
        blank = self.draw_text(mat=blank, text=z)

        mat = cv2.resize(mat, (w * 38, h * 38), interpolation=cv2.INTER_AREA)
        mat = np.concatenate((mat, blank), 1)
        cv2.imshow('game view', mat)
        cv2.waitKey(1)

    def get_reward(self):
        return self.minerEnv.get_reward()

    def close(self):
        self.minerEnv.end()

    def start(self):

        return self.minerEnv.start()
Example #18
0
        policy = DDPG.DDPG(**kwargs)
    if args.policy == "newDDPG":
        policy = newDDPG.DDPG(**kwargs)
    if args.policy == "newTD3":
        policy = newTD3.TD3(**kwargs)
    if args.policy == "A2C":
        policy = A2C.A2C(**kwargs)

    if args.load_model != "":
        policy_file = file_name if args.load_model == "default" else args.load_model
        policy.load(f"./models/{policy_file}")

    replay_buffer = utils.ReplayBuffer(state_dim, action_dim=action_dim)

    # Initialize environment
    minerEnv = MinerEnv(HOST, PORT)
    minerEnv.start()
    #init environment

    # Evaluate untrained policy
    evaluations = [eval_policy(policy, minerEnv)]
    train = False
    for episode_i in range(0, N_EPISODE):
        # Reset environment
        mapID = request_to_env(minerEnv, train)
        # init environment game
        minerEnv.reset()
        #action = policy.select_action(np.array(state))
        state = minerEnv.get_state2(int(args.limit_obs))
        done = False
        maxStep = minerEnv.state.mapInfo.maxStep
Example #19
0
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = 'Logs/' + current_time
summary_writer = tf.summary.FileWriter(log_dir)

#log_dir_2 = 'Logs/check_time_' + current_time
#summary_writer_time = tf.summary.FileWriter(log_dir_2)

# Initialize a DQN model and a memory batch for storing experiences
DQNAgent = DQN(INPUT_SHAPE_1, INPUT_SHAPE_2, ACTION_NUM, epsilon_decay=0.99999, epsilon_min=0.1)
DQNAgent.update_target_model()
memory = Memory(MEMORY_SIZE)
current_memory = Memory(32000)

# Initialize environment
minerEnv = MinerEnv(HOST, PORT) #Creating a communication environment between the DQN model and the game environment (GAME_SOCKET_DUMMY.py)
minerEnv.start()  # Connect to the game

train = False #The variable is used to indicate that the replay starts, and the epsilon starts decrease.
#Training Process
#the main part of the deep-q learning agorithm

total_step = 0
loss1 = 0
loss2 = 0

for episode_i in range(0, N_EPISODE):
    try:
        # Choosing a map in the list
        #mapID = np.random.randint(1, 13)  # Choosing a map ID from 12 maps in Maps folder randomly
        mapID = 1  # Choosing a map ID from 12 maps in Maps folder randomly