def init_conn(self): """Init connection with torchcraft server""" # Import torchcraft in this function so that torchcraft is not an explicit # dependency for projects importing this repo import torchcraft as tc self.client1 = tc.Client() self.client1.connect(self.server_ip, self.server_port1) self.state1 = self.client1.init() self.client2 = tc.Client() self.client2.connect(self.server_ip, self.server_port2) self.state2 = self.client2.init() setup = [ [tcc.set_combine_frames, 1], [tcc.set_speed, self.speed], [tcc.set_gui, self.set_gui], # NOTE: We use custom frameskip method now # Skip frame below [tcc.set_frameskip, 1], [tcc.set_cmd_optim, 1] ] self.client1.send(setup) self.state1 = self.client1.recv() self.client2.send(setup) self.state2 = self.client2.recv()
def __init__(self, server_ip, server_port, speed, frame_skip, self_play, max_episode_steps): self.ip = server_ip self.port = server_port self.client = tc.Client() self.client.connect(server_ip, server_port) self.state = self.client.init(micro_battles=True) self.speed = speed self.frame_skip = frame_skip self.self_play = self_play self.max_episode_steps = max_episode_steps self.step_limit = 300 self.step_rate = 10 self.episodes = 0 self.episode_wins = 0 self.episode_steps = 0 self.action_space = self._action_space() self.observation_space = self._observation_space() self.state = None self.obs = None self.obs_pre = None self.advanced_termination = True
def dump_replay(path, dest, bwenv): print('>> Dumping {} -> {}'.format(path, dest)) port = find_free_port() cmdline = OPENBW_REPLAY_TEMPLATE.format(port=port, bwenv=bwenv, map=path) openbw = subprocess.Popen(cmdline, shell=True, preexec_fn=os.setsid) cl = tc.Client() cl.connect('localhost', port) state = cl.init() skip_frames = 3 cl.send([ [tcc.set_speed, 0], [tcc.set_gui, 0], [tcc.set_combine_frames, skip_frames, skip_frames], [tcc.set_max_frame_time_ms, 0], [tcc.set_blocking, 0], [tcc.set_frameskip, 1000], [tcc.set_log, 0], [tcc.set_cmd_optim, 1], ]) state = cl.recv() rep = tc.replayer.Replayer() rep.setMapFromState(state) while not state.game_ended: rep.push(state.frame) state = cl.recv() rep.setKeyFrame(-1) rep.save(dest, True) # Bye, bye os.killpg(os.getpgid(openbw.pid), signal.SIGTERM)
def __connect(self): ''' connect with the server :return: ''' self.cl = tc.Client() dprint("connection start.....") connect_rt = self.cl.connect(self.default_config['hostname'], self.default_config['port']) dprint('conection rt: ', connect_rt) # dprint True state = self.cl.init(micro_battles=True) for pid, player in state.player_info.items(): dprint("player {} named {} is {}".format( player.id, player.name, tc.Constants.races._dict[player.race]), level=2) dprint(state.map_size, level=1) # Initial setup the game _ = self.cl.send([ [tcc.set_combine_frames, self.default_config['skip_frames']], [tcc.set_speed, self.default_config['set_speed']], [tcc.set_gui, 1], [tcc.set_cmd_optim, 1], ]) # dprint('init set up: ', _) #print True dprint("connection ended.....") return state
def __init__(self, server_ip, server_port): assert (server_ip != ''), "Server ip cannot be empty" assert (server_port != ''), "Server port cannot be empty" self.server_ip = server_ip self.server_port = server_port self.socket = tc.Client() self.message_just_sent = False self.state = ServerState()
def _launch_client(self): self.controller = tc.Client() self.controller.connect(self.hostname, self.port) self._obs = self.controller.init(micro_battles=self.micro_battles) self.controller.send([ [tcc.set_combine_frames, self._step_mul], [tcc.set_speed, 0], [tcc.set_gui, 1], [tcc.set_cmd_optim, 1], ])
command = [ tcc.command_openbw, tcc.openbwcommandtypes.KillUnit, u.id, ] commands.append(command) return commands skip_frames = 1 nrestarts = 0 total_battles = 0 max_add_quantity = 20 tries = 5 cl = tc.Client() cl.connect(args.hostname, args.port) state = cl.init(micro_battles=args.micro_battles) returned = cl.send([ [tcc.set_combine_frames, skip_frames], [tcc.set_speed, 0], [tcc.set_gui, 1], [tcc.set_cmd_optim, 1], [tcc.map_hack], ]) state = cl.recv() for i in range(tries): print("# try: {}".format(i)) while state.game_ended or state.waiting_for_restart:
def run_thread(agent, game_num, Synchronizer, difficulty, sc_port): global UPDATE_EVENT, ROLLING_EVENT, Counter, Waiting_Counter, Update_Counter, Result_List num = 0 all_num = 0 proc_name = mp.current_process().name while all_num != game_num * TRAIN_ITERS: env = tc.Client() #print('begin connect') env.connect(FLAGS.hostname, sc_port) #print('end connect') state = env.init(micro_battles=False) #print('end init') for pid, player in state.player_info.items(): pass #print("player {} named {} is {}".format(player.id, player.name, tc.Constants.races._dict[player.race])) # Initial setup env.send([ [tcc.set_speed, 0], [tcc.set_gui, 0], [tcc.set_cmd_optim, 1], [tcc.set_frameskip, FLAGS.frame_skip], ]) agent.set_env(env) agent.set_obs(state) #print('begin play') agent.play() #print('end play') if FLAGS.training: # check if the num of episodes is enough to update num += 1 all_num += 1 reward = agent.result['reward'] Counter += 1 Result_List.append(reward) logging("(diff: %d) %d epoch: %s get %d/%d episodes! return: %d!" % (int(difficulty), Update_Counter, proc_name, len(Result_List), game_num * THREAD_NUM, reward)) # time for update if num == game_num: num = 0 ROLLING_EVENT.clear() # worker stops rolling, wait for update if agent.index != 0 and THREAD_NUM > 1: Waiting_Counter += 1 if Waiting_Counter == THREAD_NUM - 1: # wait for all the workers stop UPDATE_EVENT.set() ROLLING_EVENT.wait() # update! else: if THREAD_NUM > 1: UPDATE_EVENT.wait() Synchronizer.wait() # wait for other processes to update agent.update_network(Result_List) Result_List.clear() agent.global_buffer.reset() Synchronizer.wait() Update_Counter += 1 # finish update UPDATE_EVENT.clear() Waiting_Counter = 0 ROLLING_EVENT.set() if FLAGS.save_replay: env.save_replay(FLAGS.replay_dir) #print('begin close') env.close() #print('end close') agent.reset()
def train(self): while self.episodes <= self.max_episodes: dprint("", 0) dprint("CTRL-C to stop", 0) dprint("", 0) nloop = 0 # 每局的步数 cl = tc.Client() cl.connect(args.hostname, args.port) state = cl.init(micro_battles=True) # dprint("image_size" + str(state.image_size), 0) # dprint("map_size" + str(state.map_size), 0) # dprint("visibility_size" + str(state.visibility_size), 0) # 80, 128 # dprint("start_locations" + str(state.start_locations[0].y), 0) env_utils._SCREEN_SIZE = np.array(state.map_size, np.int32) - 1 for pid, player in state.player_info.items(): dprint("player {} named {} is {}".format(player.id, player.name, tc.Constants.races._dict[player.race]), 0) # Initial setup the game cl.send([ [tcc.set_combine_frames, self.skip_frames], [tcc.set_speed, self.set_speed], [tcc.set_gui, 1], [tcc.set_cmd_optim, 1], ]) while True: nloop += 1 state = cl.recv() reward = None if state.game_ended: dprint("GAME ENDED", 0) break # 超时,结束 if state.battle_frame_count > FLAGS.max_step * self.skip_frames or self.part_end: #2 * 60 * 24: # 统计总局数: self.model_total_episodes += 1 self.reset() cl.send([[tcc.restart]]) dprint("Battle frame count: {} too large!!!!!!!!!!!!!!!!!!!!!!!".format(state.battle_frame_count), 0) continue self.steps += 1 # 1: First step if self.all_friends_tag is None and not state.waiting_for_restart: all_friend_units = state.units[0] all_enemy_units = state.units[1] raw_friends, raw_enemies = env_utils.get_units_info(all_friend_units, all_enemy_units) self.all_friends_tag = raw_friends[:, 0] self.all_enemies_tag = raw_enemies[:, 0] self.friends_tag_2_id = {tag: id for id, tag in enumerate(self.all_friends_tag)} self.friends_id_2_tag = dict(enumerate(self.all_friends_tag)) self.enemies_tag_2_id = {tag: id for id, tag in enumerate(self.all_enemies_tag)} self.enemies_id_2_tag = dict(enumerate(self.all_enemies_tag)) self.friends_pre_health = {tag: health for tag, health in raw_friends[:, [0, 7]]} self.enemies_pre_health = {tag: health for tag, health in raw_enemies[:, [0, 7]]} pass dprint("init!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", 0) # 2: 不是开始,计算上一step 动作的 reward elif self.all_friends_tag is not None: reward = self._calculated_reward(state) self.reward += reward # 统计每局的累计 Return self.return_of_each_episode += reward pass # 3: 结束(胜利或者失败) if state.battle_just_ended: if state.battle_won: # 统计总局数: self.model_total_episodes += 1 # 统计模型赢了次数: self.model_win_episodes += 1 self.recent_100_episodes_win_cumulative += 1 self.cumulative_win_times += 1 print( "win: episode {}, total step {}, return: {}, cumulative win times: {}, recent {}/100".format( self.episodes, self.steps, self.return_of_each_episode, self.cumulative_win_times, self.recent_100_episodes_win_cumulative)) if self.is_training: # 存储 (s, a, r, s_, is_finished) (结局状态不重要) self.save_transition(self.pre_all_alive_agents, reward, None) self.record_recent_win_rate() else: # 统计总局数: self.model_total_episodes += 1 print( "loss: episode {}, total step {}, return: {}, cumulative win times: {}, recent {}/100".format( self.episodes, self.steps, self.return_of_each_episode, self.cumulative_win_times, self.recent_100_episodes_win_cumulative)) if self.is_training: # 存储 (s, a, r, s_, is_finished) (结局状态不重要) self.save_transition(self.pre_all_alive_agents, reward, None) self.record_recent_win_rate() self.reset() actions = [[tcc.restart]] cl.send(actions) continue elif state.waiting_for_restart: dprint("WAITING FOR RESTART", 0) continue # 4: 开始或者游戏中 else: actions = [] alive_friends = state.units[0] alive_enemies = state.units[1] alive_friends, alive_enemies = env_utils.get_units_info(alive_friends, alive_enemies) # dprint("friends: {} \n".format(state.battle_frame_count) + str(alive_friends), -3) # dprint("enemies: {} \n".format(state.battle_frame_count) + str(alive_enemies), -3) # dprint("alives: " + str(state.aliveUnits), 0) # env_utils.get_units_info(my_units, enemy_units) dprint("frame count: {}".format(state.battle_frame_count), 0) dprint("step: {}".format(self.steps), 0) all_alive_agents = {} select_actions_prob = [] for friend in alive_friends: agent_tuple = {} local_observation, sequence_len, alive_friends_order = env_utils.cal_local_observation_for_unit(friend, alive_friends, alive_enemies, self.friends_tag_2_id, self.enemies_tag_2_id) dprint(local_observation, 0) if self.is_training: selected_action_id, _ = self.actor.operation_choose_action(1, [local_observation[0]], [local_observation[1]], [sequence_len[0]], [sequence_len[1]], is_training=False) select_actions_prob.append(_) else: selected_action_id = self.actor.operation_greedy_action(1, [local_observation[0]], [local_observation[1]], [sequence_len[0]], [sequence_len[1]], is_training=False) # 此处存储的相当于是 r, s', finished, actions of s' agent_tuple['state_friend'] = local_observation[0] agent_tuple['state_enemy'] = local_observation[1] agent_tuple['sequence_friend'] = sequence_len[0] agent_tuple['sequence_enemy'] = sequence_len[1] agent_tuple['terminated'] = False agent_tuple['action'] = env_utils.one_hot_action(selected_action_id, FLAGS.action_dim) agent_tuple['action_other_order'] = alive_friends_order # 站在自己角度,其他存活单位 id(not tag) 顺序(不包括自己) all_alive_agents[self.friends_tag_2_id[friend[0]]] = agent_tuple action_sc1 = env_utils.convert_discrete_action_2_sc1_action(friend, selected_action_id, alive_enemies, self.enemies_id_2_tag) # action_sc1 = env_utils.convert_discrete_action_2_sc1_action(friend, 5, alive_enemies, self.enemies_id_2_tag) actions.extend(action_sc1) cl.send(actions) if self.steps % FLAGS.print_softmax_every_steps == 0: print("select_actions", select_actions_prob) self.append_log_to_file("{}/actions/select_actions.txt".format(FLAGS.map), "episodes {}, steps {}, select actions: {}".format(self.episodes, self.steps, select_actions_prob)) if self.is_training and reward: self.save_transition(self.pre_all_alive_agents, reward, all_alive_agents) # update prev properties self.pre_all_alive_agents = all_alive_agents # TODO: batch training if self.is_training and self.replay_buffer.length >= FLAGS.batch_size * 2 and self.steps % FLAGS.training_every_steps == 0: (fr_states, em_state, fr_seq_len, em_seq_len, ac_others, ac, reward, nxt_fr_states, nxt_em_states, nxt_fr_sequence_len, nxt_em_sequence_len, nxt_oth_fr_states, nxt_oth_em_states, nxt_oth_fr_seq_len, nxt_oth_em_seq_len, terminated_batch) = self.replay_buffer.sample_batch(FLAGS.batch_size) # training critic: # 1: 准备 batch 数据 action_others_batch_s_ = [] # TODO: 可能会只有一个(当前单位已经死亡) for nxt_fr_s, nxt_fr_seq, nxt_em_s, nxt_em_seq in zip(nxt_oth_fr_states, nxt_oth_fr_seq_len, nxt_oth_em_states, nxt_oth_em_seq_len): # 对于每一个单位:其他所有单位的观察 if nxt_fr_s is None: action_others_s_ = None else: # 还没有 one-hot action_others_s_ = self.actor.operation_greedy_action(len(nxt_fr_s), nxt_fr_s, nxt_em_s, nxt_fr_seq, nxt_em_seq, is_training=False) action_per = [] if action_others_s_ is not None: for action_id in action_others_s_: one_hot_a = env_utils.one_hot_action(action_id, FLAGS.action_dim) action_per.append(one_hot_a) action_others_batch_s_.append(self._flatten_others_actions(action_per)) # 2: cal td target batch_td_target = self.critic.operation_get_TDtarget( len(nxt_fr_states), nxt_fr_states, nxt_em_states, nxt_fr_sequence_len, nxt_em_sequence_len, action_others_batch_s_, # 已经对齐了,11 * 8 reward, terminated_batch, is_training=True ) # 3: training critic self.critic.operation_critic_learn(len(fr_states), fr_states, em_state, fr_seq_len, em_seq_len, ac_others, ac, batch_td_target, is_training=True) # training actor # 3: calculate advantage actor_output_probability = self.actor.operation_cal_softmax_probablility(len(fr_states), fr_states, em_state, fr_seq_len, em_seq_len, is_training=True) batch_advantages = self.critic.operation_cal_advantage(len(fr_states), fr_states, em_state, fr_seq_len, em_seq_len, ac_others, ac, actor_output_probability, is_training=True) # update actor cost = self.actor.operation_actor_learn(len(fr_states), fr_states, em_state, fr_seq_len, em_seq_len, ac, batch_advantages, is_training=True) # self.new_state = final_state # =================================== 可视化 ==================================== # add summary if self.steps % FLAGS.log_every_steps == 0: feed_dict = { self.actor.state_inputs_friends: fr_states, self.actor.state_inputs_enemies: em_state, self.actor.sequence_length_friends: fr_seq_len, self.actor.sequence_length_enemies: em_seq_len, self.actor.execute_action: ac, self.actor.advantage: batch_advantages, self.actor.is_training: True, self.actor.keep_prob: 1., self.actor.batch_size: len(fr_states), self.critic.state_input_friends: fr_states, self.critic.state_input_enemies: em_state, self.critic.sequence_length_friends: fr_seq_len, self.critic.sequence_length_enemies: em_seq_len, self.critic.other_units_action_input: ac_others, self.critic.self_action_input: ac, self.critic.Q_value_label_input: batch_td_target, self.critic.is_training: True, self.critic.keep_prob: 1., self.critic.batch_size: len(fr_states), self.cumulative_reward_tensor: self.reward, self.cumulative_win_times_tensor: self.cumulative_win_times, self.return_of_each_episode_tensor: self.pre_return_of_each_episode } rs = self.sess.run(self.merged, feed_dict=feed_dict) self.writer.add_summary(rs, self.steps) # ================================== 可视化 END ==================================== # soft update the parameters of the two model # print("soft update parameters: episode {}, step {}, reward: {}".format(self.episodes, self.steps, reward)) self.actor.operation_soft_update_TDnet() self.critic.operation_soft_update_TDnet() if not self.is_training and self.model_total_episodes == FLAGS.cal_win_rate_every_episodes: if not self.is_testing: self.is_training = True print("model {} test end: =========================================================================".format(self.model_id)) content = "|| model {} test win rate : | episodes {} | steps {} | win rate {}/{} ||".format(self.model_id, self.episodes, self.steps, self.model_win_episodes, self.model_total_episodes) self.append_log_to_file("{}/model/model.txt".format(FLAGS.map), content) self.model_id += 1 self.model_win_episodes = 0 self.model_total_episodes = 0 # TODO 每隔半个小时保存一次模型 # if self.is_training and (time.time() - self.pre_save_time) > 1800: if self.is_training and self.model_total_episodes == FLAGS.verify_every_episodes and ( time.time() - self.pre_save_time) > 10: # (防重复) content = "model {}: episodes {}, steps {}, win rate {}/{}".format(self.model_id, self.episodes, self.steps, self.model_win_episodes, self.model_total_episodes) self.append_log_to_file("{}/model/model.txt".format(FLAGS.map), content) self.model_win_episodes = 0 self.model_total_episodes = 0 self.saver.save(self.sess, "{}/checkpoint_{}/model.ckpt".format(FLAGS.map, self.model_id)) self.pre_save_time = time.time() print("model {} test begin: =========================================================================".format(self.model_id)) self.is_training = False # base update self.update_of_each_step(state) self.part_end = False cl.close() print(self.episodes)