def __init__(self, name, debug): self.name = name self.debug = debug self.setup_directory() self.score = 0 self.score_delta = 0 # setting up jar runner self.needs_reset = True self.pid = None self.done = False self.prev_obs = get_initial_obs(1)[0][0] self.clock = Stopwatch() self.step_num = 0
def get_spatial(self, net): ''' Gets the spatial action of the network ''' if self.debug: log("getting spatial action") s = Stopwatch() net = tf.layers.conv2d(self.spatial, 32, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="finalConv") net = tf.layers.conv2d(net, 1, [1, 1], strides=1, padding='SAME', name="conv1x1") flat = tf.layers.flatten(net) dist = tf.distributions.Categorical(logits=flat) sample = dist.sample() coords = tf.unravel_index(sample, [self.rows, self.columns / 2]) if self.debug: log("Finished spatial action inference. Took: " + s.delta) return coords
def add_non_spatial(self, net): ''' Infers the non-spatial action of the network ''' if self.debug: log("Getting non-spatial action") s = Stopwatch() net = custom_layers.add_inception_resnet_B(net, '1a0') net = custom_layers.add_inception_resnet_B(net, '2a0') net = Layers.Conv2D(8, [1, 1], strides=1, padding='SAME', activation=tf.nn.relu, name="non_spat_conv2")(net) flatten = Layers.Flatten()(net) non_spatial = Layers.Dense(256, activation=tf.nn.relu, name="non_spatial")(flatten) a0_logits = Layers.Dense(constants.n_base_actions, name="a0")(non_spatial) a0_sampled = SampleCategoricalLayer()(a0_logits) if self.debug: log("Finished non-spatial action. Took: " + s.delta) return a0_sampled
def generate_action(self): ''' Scud model estimator ''' if self.debug: log("Running conv2d on " + device) with tf.device('/' + device + ':0'): net = self.add_base() #print("state shape: ", net.shape) # (1, 20, 20, 32) ## split into non-spatial and spatial action path a0 = self.get_non_spatial(net) building = int(a0) # now an int between 0 and 3 if self.debug: log("a0 = " + str(a0)) coords = self.get_spatial(net) x = int(coords[0]) y = int(coords[1]) if self.debug: log("x, y = " + str(x) + ", " + str(y)) ## loading the state (for RNN stuffs) if self.debug: log("Loading state") sss = Stopwatch() _ = np.load('scudstate.npy') # takes ~ 0.031s if self.debug: log("State loaded. Took: " + sss.delta) ## saving the state (for RNN stuffs) if self.debug: log("Saving state") ss = Stopwatch() new_state = net np.save('scudstate.npy', new_state) if self.debug: log("State saved. Took: " + ss.delta) #util.write_action(x,y,building) return x, y, building
def add_base(self): if self.debug: log("Adding base") s = Stopwatch() with tf.name_scope("adding_base") as scope: net = self.spatial for i in range(2): net = tf.layers.conv2d(net, 32, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="conv" + str(i)) # ok well this takes 5 seconds if self.debug: log("Finished adding base. Took: " + s.delta) return net
def get_non_spatial(self, net): ''' Infers the non-spatial action of the network ''' if self.debug: log("Getting non-spatial action") s = Stopwatch() non_spatial = tf.layers.dense(tf.layers.flatten(net), 256, activation=tf.nn.relu, name="non_spatial") a0 = tf.layers.dense(non_spatial, n_base_actions, name="a0") # TODO: possibly softmax this and then transform it into an int from 0 - 4 # possibly use tf autoregressive distribution dist = tf.distributions.Categorical(logits=a0) sample = dist.sample() if self.debug: log("Finished non-spatial action. Took: " + s.delta) return sample
def add_spatial(self, net, a0): ''' Gets the spatial action of the network ''' if self.debug: log("getting spatial action") s = Stopwatch() one_hot_a0 = OneHotLayer(constants.n_base_actions)(a0) k = net.get_shape().as_list() broadcast_stats = Layers.RepeatVector(int(k[1] * k[2]))(one_hot_a0) broadcast_stats2 = Layers.Reshape( (k[1], k[2], constants.n_base_actions))(broadcast_stats) net = Layers.concatenate([net, broadcast_stats2], axis=-1) # (?, 8, 8, 38) net = Layers.Conv2D(64, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="finalConv")(net) net = custom_layers.add_inception_resnet_B(net, '1a1') net = custom_layers.add_inception_resnet_B(net, '2a1') net = Layers.Conv2D(1, [1, 1], strides=1, padding='SAME', name="conv1x1")(net) logits = Layers.Flatten()(net) a1_sampled = SampleCategoricalLayer()(logits) if self.debug: log("Finished spatial action inference. Took: " + s.delta) return a1_sampled
str(path)[-50:]) def load(self, filepath, savename): if savename is None: path = os.path.join(filepath, str(self.name) + '.h5') else: if savename.endswith('.h5') == False: path = os.path.join(filepath, str(savename) + '.h5') else: path = os.path.join(filepath, str(savename)) self.model = tf.keras.models.load_model( path, custom_objects=custom_keras_layers) if self.refbot_position != -1: print(">> SCUD >> ", self.name, "(refbot pos ", self.refbot_position, ") had model restored from file ", str(path)[-50:]) else: print(">> SCUD >> ", self.name, " had model restored from file ", str(path)[-50:]) def __str__(self): return "SCUD2 [Name: {:20} | Masking: {:3} | Refbot pos: {:2d}]".format( self.name, self.mask_output, self.refbot_position) if __name__ == '__main__': k = Stopwatch() s = Scud('www', debug=True) we = s.get_flat_weights() log("Round-time was {}".format(k.delta))
def main(mode): sTime = Stopwatch() env_names = ['env' + str(i) for i in range(n_envs)] if mode in ['test', 'rank']: train = False else: train = True if mode == 'resume': resume_training = True else: resume_training = False def make_env(name): def env_fn(): env_inside = Env(name, console_debug) return env_inside return env_fn print('>> manager >> creating envs') s = Stopwatch() try: env = SubprocEnvManager([make_env(s) for s in env_names]) except EOFError as e: print("caught an EOFError ", e, '\nClosing the env now') env.close() return print('>> manager >> created envs. Took ', s.delta) no_act_vec = [constants.no_op_action for _ in range(n_envs)] # TODO: # obs = np.zeros() # some initial state if train: try: storm.train(env, n_envs, no_act_vec, resume_training) except Exception as err: try: exc_info = sys.exc_info() finally: traceback.print_exception(*exc_info) del exc_info finally: print('>> manager >> closing env. Total runtime: ', sTime.delta) env.close() sys.exit(0) elif mode == 'rank': try: print("Getting MMR ranks") runner.mmr_from_checkpoints(env) print("Finished getting ranks") except Exception as err: try: exc_info = sys.exc_info() finally: traceback.print_exception(*exc_info) del exc_info finally: print('>> manager >> closing env. Total runtime: ', sTime.delta) env.close() sys.exit(0) else: try: actions = no_act_vec agents = [Scud(name=str(i), debug=False) for i in range(n_envs)] print(agents[0].model.count_params()) # checkpoint_names = os.listdir(util.get_savedir('checkpoints')) # checkpoint_names = sorted(checkpoint_names, reverse=True) #agents[0].load(util.get_savedir('checkpoints'), 'gen50elite.h5') #agents[0].load(util.get_savedir(), 'scudsave') #agents[0].save(util.get_savedir(), 'scudsave') refbot = Scud('ref', False) env.reset() obs = util.get_initial_obs(n_envs) #print("manager obs shape = ", ob.shape) ref_act = None for i in range(5): ss = Stopwatch() #print(rews) #print("obs shape", obs.shape) #print("obs[:, 1] shape = ", obs[:, 1].shape) # the column of refbot obs try: sss = Stopwatch() actions = [ agent.step(obs[j][0]) for j, agent in enumerate(agents) ] print("running agents NN :", sss.delta) sss.reset() ref_act = refbot.step(obs[:, 1], batch_predict=True) #ref_act = [refbot.step(obs[i][1]) for i in range(len(agents))] print("running refbot NN :", sss.delta) #ref_act = [StarterBotPrime.step(obs[j][1]) for j in range(n_envs)] except TypeError as e: try: exc_info = sys.exc_info() finally: traceback.print_exception(*exc_info) del exc_info print("TypeError!!! ", e) break print( ">> manager >> step {}, taking actions: {} and refactions {}" .format(i, actions, ref_act)) ssss = Stopwatch() obs, rews, infos = env.step(actions, ref_act) # obs is n_envs x 1 print("Running env : ", ssss.delta) print('>> manager >> just took step {}. Took: {}'.format( i, ss.delta)) time.sleep(0.1) runner.run_battle(agents[0], refbot, env) except Exception as err: try: exc_info = sys.exc_info() finally: traceback.print_exception(*exc_info) del exc_info finally: print('>> manager >> closing env. Total runtime: ', sTime.delta) env.close() sys.exit(0) # gets all the variables of the model # all_variables = agents[0].model.get_weights() print('>> manager >> closing env. Total runtime: ', sTime.delta) env.close() sys.exit(0)
def train(env, n_envs, no_op_vec, resume_trianing): print(str('=' * 50) + '\n' + 'Initializing agents\n' + str('=' * 50)) ############################## ## Summary buckets #failed_episodes = 0 #early_episodes = 0 refbot_back_ind = 1 elite_overthrows = 0 elite = None starting_gen = 0 # default startin generation number. Is overwritten if resuming ## Setting up logs writer = summary.create_file_writer(util.get_logdir('train12A'), flush_millis=10000) writer.set_as_default() global_step = tf.train.get_or_create_global_step() ## TODO: change agent layers to use xavier initializer agents = [Scud(name=str(i), debug=scud_debug) for i in range(n_population)] total_steps = 0 elite_moving_average = metrics.MovingAverage( elite_score_moving_avg_periods) next_generation = [ Scud(name=str(i) + 'next', debug=scud_debug) for i in range(n_population) ] refbot_queue = [ Scud(name='refbot' + str(i), debug=scud_debug) for i in range(refbot_queue_length) ] for i, bot in enumerate(refbot_queue): bot.refbot_position = i refbot = refbot_queue[0] ## DOES NOT WORK WITH EAGER EXECUTION # with summary.always_record_summaries(): # summary.graph(agents[0].model.graph) total_s = Stopwatch() ######################################## ## Restoring from last training session if resume_trianing: # loading up config from last train finish print("Restoring progress config from last run...") config_path = os.path.join(util.get_savedir(), 'progress.json') conf = json.load(open(config_path, 'r')) starting_gen = conf['gen_at_end'] + 1 elite_overthrows = conf['elite_overthrows'] total_steps = conf['total_steps'] total_s.startime = conf['clock_start_time'] global_step.assign(starting_gen) # Loading truncs, elite and refbot print( str('=' * 50) + '\n' + '>> STORM >> Resuming training.\n' + str('=' * 50)) trunc_names = os.listdir(util.get_savedir('truncFinals')) trunc_names = sorted(trunc_names, reverse=True) for j in range(trunc_size): if j < len(trunc_names): agents[j + 1].load(util.get_savedir('truncFinals'), trunc_names[j]) else: print("Skipping loading trunc agent for j = ", j) refbot_names = os.listdir(util.get_savedir('refbots')) refbot_names = sorted(refbot_names, reverse=False) refbot_q_names = refbot_names[-refbot_queue_length:] # sec = 0 # for i in range(5, refbot_queue_length): # refbot_queue[i].load(util.get_savedir('refbots'), refbot_q_names[sec]) # refbot_queue[i].refbot_position = i # sec = sec + 1 for i in range(refbot_queue_length): refbot_queue[i].load(util.get_savedir('refbots'), refbot_q_names[i]) refbot_queue[i].refbot_position = i elite = agents[0] elite.load(util.get_savedir(), 'elite') print(">> STORM >> Successfully restored from last checkpoints") print( str('=' * 50) + '\n' + 'Beginning training (at gen ' + str(starting_gen) + ')\n' + str('=' * 50)) s = Stopwatch() #partition_stopwatch = Stopwatch() for g in range(starting_gen, starting_gen + n_generations): ##################### ## Hyperparameter annealing # gamma = gamma_func((g+1)/n_generations) ##################### ## GA Algorithm for i in range(n_population): if g == 0: break else: kappa = random.sample(agents[0:trunc_size], 1) mutate(kappa[0], next_generation[i], g) #partition_stopwatch.lap('mutation') # swap agents and the next gen's agents. i.e set next gen agents to be current agents to evaluate tmp = agents agents = next_generation next_generation = tmp # evaluate fitness on each agent in population try: agents, additional_steps, rollout_info = evaluate_fitness( env, agents, refbot, debug=False) except KeyboardInterrupt as e: print( "Received keyboard interrupt {}. Saving and then closing env.". format(e)) break total_steps += additional_steps # sort them based on final discounted reward agents = sorted(agents, key=lambda agent: agent.fitness_score, reverse=True) #partition_stopwatch.lap('fitness evaluation + sorting') ################################## ## Summary information with summary.always_record_summaries(): sc_vec = [a.fitness_score for a in agents] summary.scalar('rewards/mean', np.mean(sc_vec)) summary.scalar('rewards/max', agents[0].fitness_score) summary.scalar('rewards/min', agents[-1].fitness_score) summary.scalar('rewards/var', np.var(sc_vec)) summary.scalar('rewards/truc_mean', np.mean(sc_vec[:trunc_size])) summary.scalar('hyperparameters/gamma', gamma) summary.scalar('main_rollout/agentWins', rollout_info['agentWins']) summary.scalar('main_rollout/refbotWins', rollout_info['refbotWins']) summary.scalar('main_rollout/ties', rollout_info['ties']) summary.scalar('main_rollout/early_eps', rollout_info['early_eps']) summary.scalar('main_rollout/failed_eps', rollout_info['failed_eps']) if len(rollout_info['ep_lengths']) > 0: mean_ep_lengg = np.mean(rollout_info['ep_lengths']) summary.histogram('main_rollout/ep_lengths', rollout_info['ep_lengths']) summary.scalar('main_rollout/mean_ep_length', mean_ep_lengg) print("Mean ep length: ", mean_ep_lengg) if len(rollout_info['agent_actions']) > 0: summary.histogram('main_rollout/agent_a0', rollout_info['agent_actions']) summary.histogram('main_rollout/agent_a0_first15steps', rollout_info['agent_early_actions']) print("Main stats: agent wins - {} | refbot wins - {} | Early - {}". format(rollout_info['agentWins'], rollout_info['refbotWins'], rollout_info['early_eps'])) for a in agents[:5]: print(a.name, " with fitness score: ", a.fitness_score) ############################################ ## Evaluating elite candidates to find elite #partition_stopwatch.lap('summaries 1') # setup next generation parents / elite agents if g == 0: if resume_trianing == False: elite_candidates = set(agents[0:n_elite_in_royale]) else: elite_candidates = set(agents[0:n_elite_in_royale - 1]) | set([ elite, ]) else: elite_candidates = set(agents[0:n_elite_in_royale - 1]) | set([ elite, ]) # finding next elite by battling proposed elite candidates for some additional rounds #print("Evaluating elite agent...") inds = np.random.random_integers(0, refbot_queue_length - 1, 4) refbots_for_elite = [refbot_queue[lolno] for lolno in inds] elo_ags, additional_steps, rollout_info = evaluate_fitness( env, elite_candidates, refbots_for_elite, runs=elite_additional_episodes) total_steps += additional_steps elo_ags = sorted(elo_ags, key=lambda agent: agent.fitness_score, reverse=True) if elite != elo_ags[0]: elite_overthrows += 1 elite = elo_ags[0] #partition_stopwatch.lap('elite battle royale') try: agents.remove(elite) agents = [ elite, ] + agents except ValueError: agents = [ elite, ] + agents[:len(agents) - 1] print("Elite stats: agent wins - {} | refbot wins - {} | Early - {}". format(rollout_info['agentWins'], rollout_info['refbotWins'], rollout_info['early_eps'])) for i, a in enumerate(elo_ags): print('Elite stats: pos', i, '; name: ', a.name, " ; fitness score: ", a.fitness_score) ############################ ## Summary information 2 with summary.always_record_summaries(): elite_moving_average.push(elite.fitness_score) summary.scalar('rewards/elite_moving_average', elite_moving_average.value()) summary.scalar('rewards/elite_score', elite.fitness_score) summary.scalar('rewards/stable_mean', np.mean([a.fitness_score for a in elo_ags])) summary.scalar('time/wall_clock_time', total_s.deltaT()) summary.scalar('time/single_gen_time', s.deltaT()) summary.scalar('time/total_game_steps', total_steps) summary.scalar('time/elite_overthrows', elite_overthrows) summary.scalar('elite_rollout/agentWins', rollout_info['agentWins']) summary.scalar('elite_rollout/refbotWins', rollout_info['refbotWins']) summary.scalar('elite_rollout/ties', rollout_info['ties']) summary.scalar('elite_rollout/early_eps', rollout_info['early_eps']) summary.scalar('elite_rollout/failed_eps', rollout_info['failed_eps']) if len(rollout_info['ep_lengths']) > 0: mean_ep_lengE = np.mean(rollout_info['ep_lengths']) summary.histogram('elite_rollout/ep_lengths', rollout_info['ep_lengths']) summary.scalar('elite_rollout/mean_ep_length', mean_ep_lengE) print("Elite mean ep length: ", mean_ep_lengE) if len(rollout_info['agent_actions']) > 0: summary.histogram('elite_rollout/agent_a0', rollout_info['agent_actions']) summary.histogram('elite_rollout/agent_a0_first15steps', rollout_info['agent_early_actions']) summary.scalar('hyperparameters/refbot_back_ind', refbot_back_ind) ################################# ## Replacing reference bot if g % replace_refbot_every == 0: toback = refbot del refbot_queue[0] refbot_back_ind = np.random.random_integers( 0, refbot_queue_length - 1) print( str('=' * 50) + '\n' + '>> STORM >> Upgrading refbot (to pos ' + str(refbot_back_ind) + ') now.\n' + str('=' * 50)) #good_params = agents[trunc_size-1].get_flat_weights() good_params = agents[np.random.random_integers( 0, trunc_size - 1)].get_flat_weights() toback.set_flat_weights(good_params) refbot_queue.append(toback) #refbot = refbot_queue[0] ################ ## Sampling refbot uniformly from past <refbot_queue_length> generation's agents refbot = refbot_queue[refbot_back_ind] for meme_review, inner_refbot in enumerate(refbot_queue): inner_refbot.refbot_position = meme_review #for bot in refbot_queue: # print("Bot ", bot.name, ' now has refbot pos: ', bot.refbot_position) ################################# ## Saving agents periodically if g % save_elite_every == 0 and g != 0: elite.save(util.get_savedir('checkpoints'), 'gen' + str(g) + 'elite') if refbot_queue_length < 5: for refAgent in refbot_queue: refAgent.save( util.get_savedir('refbots'), 'gen' + str(g) + 'pos' + str(refAgent.refbot_position)) if trunc_size < 5: for i, truncAgent in enumerate(agents[:trunc_size]): truncAgent.save(util.get_savedir('truncs'), 'gen' + str(g) + 'agent' + str(i)) global_step.assign_add(1) print( str('=' * 50) + '\n' + 'Generation ' + str(g) + '. Took ' + s.delta + '(total: ' + total_s.delta + ')\n' + str('=' * 50)) s.reset() #partition_stopwatch.lap('summaries 2 and updates/saves') ############################### ## Shutdown behavior #print("PARTITION STOPWATCH RESULTS:") # last i checked runtime is *dominated* #partition_stopwatch.print_results() elite.save(util.get_savedir(), elite_savename) summary.flush() for i, ag in enumerate(agents[:trunc_size]): ag.save(util.get_savedir('truncFinals'), 'finalTrunc' + str(i)) print("End refbot queue: ", len(refbot_queue)) for identity, refAgent in enumerate(refbot_queue): refAgent.save(util.get_savedir('refbots'), 'finalRefbot{:03d}'.format(identity)) ########################## ## Saving progress.config conf = {} conf['gen_at_end'] = g conf['gamma_at_end'] = gamma conf['elite_overthrows'] = elite_overthrows conf['total_steps'] = total_steps conf['clock_start_time'] = total_s.startime path = os.path.join(util.get_savedir(), 'progress.json') with open(path, 'w') as config_file: config_file.write(json.dumps(conf)) print(">> STORM >> Saved progress.config to: ", path)
def evaluate_fitness(env, agents, refbot, runs=1, debug=False): ''' Function to run [agents] in the [env] for [runs] number of times each. i.e performs rollouts of each agent [runs] number of times. - agents : list of agents on which to evaluate rollouts - env : env to run agents through - refbot : agent which will be player B for all agents - runs : int number of times each agent should play a rollout ''' if type(refbot) == np.ndarray: assert (runs * len(agents)) % len( refbot) == 0, "Please don't be stupid. refbots={}".format(refbot) queue = list(agents) queue = runs * queue init_length = len(queue) n_envs = env.num_envs print(">> ROLLOUTS >> Running rollout wave with queue length ", init_length) pbar = metrics.ProgressBar(init_length) interior_steps = 0 rollout_info = { 'early_eps': 0, 'failed_eps': 0, 'agentWins': 0, 'refbotWins': 0, 'ties': 0, 'ep_lengths': [], 'agent_actions': [], 'agent_early_actions': [] } next_refbot = refbot while len(queue) > 0: pbar.show(init_length - len(queue)) if len(queue) >= n_envs: cur_playing_agents = [queue.pop() for i in range(n_envs)] else: cur_playing_agents = [queue.pop() for i in range(len(queue))] step = 0 dummy_actions = [( 0, 0, 3, ) for _ in range(n_envs - len(cur_playing_agents))] suc = env.reset() if all(suc) == False: print("something f****d out. Could not reset all envs.") return #obs = env.get_base_obs() obs = util.get_initial_obs(n_envs) if type(next_refbot) == np.ndarray or type(next_refbot) == list: next_refbot = refbot.pop() for a in cur_playing_agents: a.fitness_score = 0 a.mask_output = False while step < max_episode_length: if debug: ss = Stopwatch() actions = [ agent.step(obs[i][0]) for i, agent in enumerate(cur_playing_agents) ] #ref_actions = [refbot.step(obs[i][1]) for i in range(len(obs))] ref_actions = next_refbot.step(obs[:, 1], batch_predict=True) if len(dummy_actions) > 0: actions.extend(dummy_actions) if len(actions) != len(ref_actions): print("LEN OF ACTIONS != LEN OF REF ACTIONS!!!!") raise ValueError if debug: print(">> storm >> taking actions: ", actions, ' and ref actions ', ref_actions) obs, rews, ep_infos = env.step(actions, p2_actions=ref_actions) interior_steps += n_envs ## TODO: loop through obs and check which one is a ControlObj, and stop processing the agents for the rest of that episode failure = False for i, a in enumerate(cur_playing_agents): if type(rews[i][0]) == util.ControlObject: if rews[i][0].code == "EARLY": a.mask_output = True if step == max_episode_length - 1: rollout_info['early_eps'] += 1 elif rews[i][0].code == "FAILURE": # redo this whole f*****g batch rollout_info['failed_eps'] += 1 failure = True break else: inner_rew = rews[i][0] if 'valid' in ep_infos[i].keys(): if ep_infos[i]['valid'] == False: if scoring_method == 'binary': inner_rew += invalid_act_penalty_binary else: inner_rew += invalid_act_penalty_dense a.fitness_score = inner_rew + gamma * a.fitness_score _, _, building_act = actions[i] rollout_info['agent_actions'].append(building_act) if step < 15: rollout_info['agent_early_actions'].append( building_act) if 'winner' in ep_infos[i].keys(): if ep_infos[i]['winner'] == 'A': rollout_info['agentWins'] += 1 elif ep_infos[i]['winner'] == 'B': rollout_info['refbotWins'] += 1 else: rollout_info['ties'] += 1 rollout_info['ep_lengths'].append(ep_infos[i]['n_steps']) if failure: curQlen = len(queue) queue = cur_playing_agents + queue print( "Failure detected. Redoing last batch... (len Q before = ", curQlen, ' ; after = ', len(queue), ')') break if debug: print("obs shape = ", obs.shape) print("rews shape = ", rews.shape) print('>> storm >> just took step {}. Took: {}'.format( step, ss.delta)) step = step + 1 for a in cur_playing_agents: a.fitness_averaging_list.append(a.fitness_score) for a in agents: a.squash_fitness_scores() pbar.close() return agents, interior_steps, rollout_info
class Env(): def __init__(self, name, debug): self.name = name self.debug = debug self.setup_directory() self.score = 0 self.score_delta = 0 # setting up jar runner self.needs_reset = True self.pid = None self.done = False self.prev_obs = get_initial_obs(1)[0][0] self.clock = Stopwatch() self.step_num = 0 def setup_directory(self): # creates the dirs responsible for this env, # and moves a copy of the runner and config to that location print("Setting up file directory for " + self.name + " with pid " + str(os.getpid())) basedir = os.path.dirname(os.path.abspath(__file__)) # now in scudstorm dir self.run_path = os.path.join(basedir, 'runs', self.name) if os.path.isdir(self.run_path): shutil.rmtree(self.run_path) self.wrapper_path = os.path.join(self.run_path, 'jar_wrapper.py') os.makedirs(self.run_path, exist_ok=True) jarpath = os.path.join(basedir, jar_name) copy2(jarpath, self.run_path) config_path = os.path.join(basedir, config_name) copy2(config_path, self.run_path) wrapper_path = os.path.join(basedir, 'common', 'jar_wrapper.py') copy2(wrapper_path, self.run_path) botdir = os.path.join(basedir, bot_file_name) copy2(botdir, self.run_path) copy2(os.path.join(basedir, game_config_name), self.run_path) # Copying over reference bot self.refbot_path = os.path.join(self.run_path, 'refbot') if os.path.isdir(self.refbot_path): shutil.rmtree(self.refbot_path) refbotdir = os.path.join(basedir, 'refbot') shutil.copytree(refbotdir, self.refbot_path) self.in_file = os.path.join(self.run_path, wrapper_out_filename) self.state_file = os.path.join(self.run_path, state_name) self.bot_file = os.path.join(self.run_path, bot_file_name) self.proc = None self.refenv = RefEnv(self, debug=self.debug) with open(self.in_file, 'w') as f: f.write('0') # run path should now have the jar, config and jar wrapper files def step(self, action, ref_act): ############################# ## Maintenence on the process if self.needs_reset: self.reset() ####################### ## Debug messages if self.debug: with open(os.path.join(self.run_path, 'mylog.txt'), 'a') as f: f.write(str(time.time()) + "\t-->Wanting to do op:!!!\t" + str(action) + '\t' + str(ref_act) + '\n') with open(os.path.join(self.refbot_path, 'mylog.txt'), 'a') as f: f.write(str(time.time()) + "\t-->Wanting to do op:!!!\t" + str(ref_act) + '\n') ep_info = {} ####################### ## Writing actions x2, y2, build2 = ref_act write_prep_action(x2, y2, build2, path=self.refbot_path, debug=self.debug) x, y, build = action write_prep_action(x, y, build, path=self.run_path, debug=self.debug) ####################### ## Signalling to jar wrappers to begin their running step with open(self.in_file, 'w') as f: # we want start of a new step if self.debug: print(">> pyenv {} >> writing 2 to file {}".format(self.name, self.in_file)) f.write('2') with open(self.refenv.in_file, 'w') as f: # we want start of a new step if self.debug: print(">> pyenv {} >> writing 2 to file {}".format(self.refenv.name, self.refenv.in_file)) f.write('2') ####################### ## Checking if episode ended early if self.proc.poll() != None and self.done == True: # env ended last step, so reset: if self.debug: print(">> PYENV ", self.name ," >> Ended early") cntrl_obj = ControlObject('EARLY') tp = np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1) return tp, np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1), ep_info ####################### ## Taking step # Vars for Env obs = None should_load_obs = False reward = None # Vars for ref env ref_obs = None should_load_obs2 = False # Waiting for responses from the jar wrappers stopw = Stopwatch() failure = False while True: if should_load_obs == False: with open(self.in_file, 'r') as ff: k = ff.read() try: k = int(k) except ValueError: continue if k == 1: #print("just wrote 0 to the ", self.out_file) # a new turn has just been processed should_load_obs = True if should_load_obs2 == False: with open(self.refenv.in_file, 'r') as ff: k2 = ff.read() try: k2 = int(k2) except ValueError: continue if k2 == 1: #print("just wrote 0 to the ", self.out_file) # a new turn has just been processed should_load_obs2 = True if should_load_obs == True and should_load_obs2 == True: break if self.proc.poll() != None and self.done == False: #ep ended early. if self.debug: print("PYENV: >> GAME ENDING EARLY FOR THE FIRST TIME") self.done = True valid, reason = is_valid_action(action, self.prev_obs) obs = self.load_state() self.prev_obs = obs ep_info['n_steps'] = self.step_num if valid == True: ep_info['valid'] = True else: ep_info['valid'] = False ref_obs = self.refenv.load_state() if obs['players'][0]['playerType'] == 'A': a_hp = obs['players'][0]['health'] b_hp = obs['players'][1]['health'] else: a_hp = obs['players'][1]['health'] b_hp = obs['players'][0]['health'] k = np.asarray([obs,]) u = np.asarray([ref_obs,]) return_obs = np.concatenate([k, u], axis=-1) if reward_mode == 'dense': win_reward = dense_win_reward lose_reward = -1 * dense_win_reward else: win_reward = binary_win_reward lose_reward = -1 * binary_win_reward if a_hp > b_hp: # player a wins ep_info['winner'] = 'A' return return_obs, np.concatenate([np.asarray([win_reward,]), np.asarray([lose_reward,])], axis=-1), ep_info elif a_hp < b_hp: ep_info['winner'] = 'B' return return_obs, np.concatenate([np.asarray([lose_reward,]), np.asarray([win_reward,])], axis=-1), ep_info else: ep_info['winner'] = 'TIE' return return_obs, np.concatenate([np.asarray([0.0,]), np.asarray([0.0,])], axis=-1), ep_info if stopw.deltaT() > 3: # we have waited more than 3s, game clearly ended self.needs_reset = True failure = True print('pyenv: env ' + str(self.name) + ' with pid ' + str(self.pid) + ' encountered error. (', should_load_obs, ',',should_load_obs2, ')' , time.time()) break time.sleep(0.01) # TODO: possibly pre-parse obs here and derive a reward from it? ######################### ## Loading the obs if their jar's ended properly #ref_obs, _ = self.refenv.step(ref_act) if should_load_obs: valid, reason = is_valid_action(action, self.prev_obs) obs = self.load_state() self.prev_obs = obs if valid == True: ep_info['valid'] = True else: ep_info['valid'] = False if should_load_obs2: ref_obs = self.refenv.load_state() if obs is None and self.debug == True: print(">> PY_ENV >> MAIN OBS IS NONE (", self.name, ")") if ref_obs is None: print(">> PY_ENV >> REF OBS IS NONE. (", self.name, ")") if failure == True: cntrl_obj = ControlObject('FAILURE') tp = np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1) return tp, np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1), ep_info # print('-----A------------->', obs['players'][0]['health']) # print('-----B------------->', obs['players'][1]['health']) self.step_num += 1 ######################## ## Forming rewards and packaging the obs into a good numpy form if obs is not None: # Infer reward: #reward = float(obs['players'][0]['score']) - float(obs['players'][1]['score']) curS = float(obs['players'][0]['score']) * general_reward_scaling_factor self.score_delta = curS - self.score reward = self.score_delta + per_step_reward_penalty self.score = curS if ref_obs is not None: curS2 = float(ref_obs['players'][0]['score']) * general_reward_scaling_factor self.refenv.score_delta = curS2 - self.refenv.score ref_reward = self.refenv.score_delta + per_step_reward_penalty self.refenv.score = curS2 k = np.asarray([obs,]) u = np.asarray([ref_obs,]) return_obs = np.concatenate([k, u], axis=-1) if reward_mode == 'dense': return return_obs, np.concatenate([np.asarray([reward,]), np.asarray([ref_reward,])], axis=-1), ep_info elif reward_mode == 'binary': return return_obs, np.concatenate([np.asarray([binary_step_penalty,]), np.asarray([binary_step_penalty,])], axis=-1), ep_info def load_state(self): ''' Gets the current Game State json file. ''' while os.path.isfile(self.state_file) == False: if self.debug: print(">> PYENV >> waiting for state file ", self.state_file, ' to appear') time.sleep(0.01) flag = False while flag == False: try: k = json.load(open(self.state_file,'r')) flag = True break except json.decoder.JSONDecodeError as e: k = None if self.debug: print(">> PYENV >> Failed to decode json state! Got error ", e) time.sleep(0.01) return k def get_obs(self): this_obs = self.load_state() refbot_obs = self.refenv.load_state() x = np.asarray([this_obs,]) y = np.asarray([refbot_obs,]) return np.concatenate([x, y], axis=-1) def reset(self): self.step_num = 0 if self.debug: with open(os.path.join(self.run_path, 'mylog.txt'), 'a') as f: f.write(str(time.time()) + "\t-->RESETTING!!!\n") with open(os.path.join(self.refbot_path, 'mylog.txt'), 'a') as f: f.write(str(time.time()) + "\t-->RESETTING!!!\n") if self.proc is not None: self.proc.terminate() self.proc.wait() self.needs_reset = False self.done = False time.sleep(0.01) # trying to kill jar wrapper of this env pid_file = os.path.join(self.run_path, 'wrapper_pid.txt') if os.path.isfile(pid_file): flag = False while flag == False: with open(pid_file, 'r') as f: try: wrapper_pid = int(f.read()) except ValueError: continue if wrapper_pid == 0: flag = True return None else: flag = True try: os.kill(wrapper_pid, signal.SIGTERM) except (PermissionError, ProcessLookupError) as e: if self.debug: print(">> PYENV ", self.name, " >> Attempted to close wrapper pid ", wrapper_pid, " but got ERROR ", e) break else: if self.debug: print(">> PYENV >> Attempted to close wrapper pid but the wrapper pid file was not found ") ## Trying to prevent reset bugs from propping up # if os.path.isdir(self.refbot_path): # shutil.rmtree(self.refbot_path) # refbotdir = os.path.join(basedir, 'refbot') # shutil.copytree(refbotdir, self.refbot_path) ## Trying to kill jar wrapper of ref env refpid_file = os.path.join(self.refbot_path, 'wrapper_pid.txt') if os.path.isfile(refpid_file): flag = False while flag == False: with open(refpid_file, 'r') as f: try: wrapper_pid2 = int(f.read()) except ValueError: continue if wrapper_pid2 == 0: flag = True return None else: flag = True try: os.kill(wrapper_pid2, signal.SIGTERM) except (PermissionError, ProcessLookupError) as e: if self.debug: print(">> PYENV ", self.name, " >> Attempted to close refbot wrapper pid ", wrapper_pid2, " but got ERROR ", e) else: if self.debug: print(">> PYENV >> Attempted to close refbot wrapper pid but the wrapper pid file was not found ") time.sleep(0.01) ####################### ## Flushing matchlogs folder if env alive for over 1h if self.clock.deltaT() >= 1800: print(">> PYENV {} >> Env alive for over half an hour, flushing (deleting) matchlogs folder".format(self.name)) self.cleanup() self.clock.reset() print("Cleand.") command = 'java -jar ' + os.path.join(self.run_path, jar_name) if sys.platform == "win32": she = False else: she = True if self.debug: self.proc = subprocess.Popen(command, shell=she , stdout=subprocess.PIPE, cwd=self.run_path) print("Opened process: ", str(command), " with pid ", self.proc.pid) else: self.proc = subprocess.Popen(command, shell=she, stdout=subprocess.DEVNULL, cwd=self.run_path) self.pid = self.proc.pid time.sleep(0.01) return True def close(self): if self.debug: print("Closing env ", self.name) # clean up after itself if self.pid is not None: self.needs_reset = True self.proc.terminate() self.proc.wait() else: return None time.sleep(0.1) pid_file = os.path.join(self.run_path, 'wrapper_pid.txt') if os.path.isfile(pid_file): flag = False while flag == False: with open(pid_file, 'r') as f: try: wrapper_pid = int(f.read()) except ValueError: continue if wrapper_pid == 0: flag = True return None else: flag = True try: os.kill(wrapper_pid, signal.SIGTERM) except (PermissionError, ProcessLookupError) as e: if self.debug: print(">> PYENV ", self.name, " >> Attempted to close wrapper pid ", wrapper_pid, " but got ERROR ", e) break else: print(">> PYENV >> Attempted to close wrapper pid but the wrapper pid file was not found ") time.sleep(0.1) refpid_file = os.path.join(self.refbot_path, 'wrapper_pid.txt') if os.path.isfile(refpid_file): flag = False while flag == False: with open(refpid_file, 'r') as f: try: wrapper_pid2 = int(f.read()) except ValueError: continue if wrapper_pid2 == 0: flag = True return None else: flag = True try: os.kill(wrapper_pid2, signal.SIGTERM) except (PermissionError, ProcessLookupError) as e: if self.debug: print(">> PYENV ", self.name, " >> Attempted to close refbot wrapper pid ", wrapper_pid2, " but got ERROR ", e) else: if self.debug: print(">> PYENV >> Attempted to close refbot wrapper pid but the wrapper pid file was not found ") time.sleep(0.1) self.pid = None return True def cleanup(self): log_path = os.path.join(self.run_path, 'matchlogs') if self.debug: print("Removing folder: ", log_path) try: if keep_log_folder_override == False: shutil.rmtree(log_path) else: print(">> PYENV >> OVERRIDE - Keeping log files.") time.sleep(0.1) except Exception: print(">> PYENV >> Exception occured while removing matchlogs folder")
def fight(env, agent1, agent2, n_fights, max_steps, debug=False): ''' Function to run [agents] in the [env] for [runs] number of times each. i.e performs rollouts of each agent [runs] number of times. - agents : list of agents on which to evaluate rollouts - env : env to run agents through - refbot : agent which will be player B for all agents - runs : int number of times each agent should play a rollout ''' queue = list([ agent1, ]) queue = n_fights * queue init_length = len(queue) n_envs = env.num_envs print(">> ROLLOUTS >> Running rollout wave with queue length ", init_length) pbar = metrics.ProgressBar(init_length) interior_steps = 0 early_eps = 0 failed_eps = 0 agent1Wins = 0 agent2Wins = 0 ties = 0 while len(queue) > 0: # KEEP THIS THERE OTHERWISE SHIT BREAKS pbar.show(init_length - len(queue)) if len(queue) >= n_envs: cur_playing_agents = [queue.pop() for i in range(n_envs)] else: cur_playing_agents = [queue.pop() for i in range(len(queue))] step = 0 dummy_actions = [( 0, 0, 3, ) for _ in range(n_envs - len(cur_playing_agents))] suc = env.reset() if all(suc) == False: print("something f****d out. Could not reset all envs.") return #obs = env.get_base_obs() obs = util.get_initial_obs(n_envs) for a in cur_playing_agents: a.fitness_score = 0 a.mask_output = False agent2.mask_output = False ## TODO: Modify this for loop to be able to end early for games which finish early while step < max_steps: if debug: ss = Stopwatch() actions = [ agent.step(obs[i][0]) for i, agent in enumerate(cur_playing_agents) ] ref_actions = [agent2.step(obs[i][1]) for i in range(len(obs))] if len(dummy_actions) > 0: actions.extend(dummy_actions) if len(actions) != len(ref_actions): print("LEN OF ACTIONS != LEN OF REF ACTIONS!!!!") raise ValueError if debug: print(">> storm >> taking actions: ", actions, ' and ref actions ', ref_actions) obs, rews, ep_infos = env.step(actions, p2_actions=ref_actions) interior_steps += n_envs ## TODO: loop through obs and check which one is a ControlObj, and stop processing the agents for the rest of that episode failure = False for i, a in enumerate(cur_playing_agents): if type(rews[i][0]) == util.ControlObject: if rews[i][0].code == "EARLY": a.mask_output = True if step == max_steps - 1: early_eps += 1 #a.fitness_score = a.fitness_score + 1 elif rews[i][0].code == "FAILURE": # redo this whole f*****g batch failed_eps += 1 failure = True break else: #print(rews) # if rews[i][0] >= 0.95: # agent1Wins += 1 # elif rews[i][1] >= 0.95: # agent2Wins += 1 pass #a.fitness_score = rews[i][0] + gamma*a.fitness_score if 'winner' in ep_infos[i].keys(): if ep_infos[i]['winner'] == 'A': agent1Wins += 1 elif ep_infos[i]['winner'] == 'B': agent2Wins += 1 elif ep_infos[i]['winner'] == 'TIE': ties += 1 if failure: curQlen = len(queue) queue = cur_playing_agents + queue print( "Failure detected. Redoing last batch... (len Q before = ", curQlen, ' ; after = ', len(queue)) break if debug: print("obs shape = ", obs.shape) print("rews shape = ", rews.shape) print('>> storm >> just took step {}. Took: {}'.format( step, ss.delta)) step = step + 1 pbar.close() return agent1Wins, agent2Wins, early_eps, failed_eps, ties
Net = tf.contrib.eager.Network ''' Internal agent config ''' debug = True n_base_actions = 4 # number of base actions -- 0=NO OP, 1=DEFENSE, 2=OFFENSE, 3=ENERGY... debug_verbose = False endpoints = {} device = 'cpu' tf.enable_eager_execution() # let an example map size be 20x40, so each player's building area is 20x20 if debug_verbose and debug: log("Testing tensorflow") s = Stopwatch() print("TensorFlow version: {}".format(tf.VERSION)) print("Eager execution: {}".format(tf.executing_eagerly())) log("Finished, took: " + s.delta) class Scud(object): def __init__(self, obs, name, debug=False): ''' Initialize Bot. Load all game state information. ''' self.debug = debug try:
def step(self, action, ref_act): ############################# ## Maintenence on the process if self.needs_reset: self.reset() ####################### ## Debug messages if self.debug: with open(os.path.join(self.run_path, 'mylog.txt'), 'a') as f: f.write(str(time.time()) + "\t-->Wanting to do op:!!!\t" + str(action) + '\t' + str(ref_act) + '\n') with open(os.path.join(self.refbot_path, 'mylog.txt'), 'a') as f: f.write(str(time.time()) + "\t-->Wanting to do op:!!!\t" + str(ref_act) + '\n') ep_info = {} ####################### ## Writing actions x2, y2, build2 = ref_act write_prep_action(x2, y2, build2, path=self.refbot_path, debug=self.debug) x, y, build = action write_prep_action(x, y, build, path=self.run_path, debug=self.debug) ####################### ## Signalling to jar wrappers to begin their running step with open(self.in_file, 'w') as f: # we want start of a new step if self.debug: print(">> pyenv {} >> writing 2 to file {}".format(self.name, self.in_file)) f.write('2') with open(self.refenv.in_file, 'w') as f: # we want start of a new step if self.debug: print(">> pyenv {} >> writing 2 to file {}".format(self.refenv.name, self.refenv.in_file)) f.write('2') ####################### ## Checking if episode ended early if self.proc.poll() != None and self.done == True: # env ended last step, so reset: if self.debug: print(">> PYENV ", self.name ," >> Ended early") cntrl_obj = ControlObject('EARLY') tp = np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1) return tp, np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1), ep_info ####################### ## Taking step # Vars for Env obs = None should_load_obs = False reward = None # Vars for ref env ref_obs = None should_load_obs2 = False # Waiting for responses from the jar wrappers stopw = Stopwatch() failure = False while True: if should_load_obs == False: with open(self.in_file, 'r') as ff: k = ff.read() try: k = int(k) except ValueError: continue if k == 1: #print("just wrote 0 to the ", self.out_file) # a new turn has just been processed should_load_obs = True if should_load_obs2 == False: with open(self.refenv.in_file, 'r') as ff: k2 = ff.read() try: k2 = int(k2) except ValueError: continue if k2 == 1: #print("just wrote 0 to the ", self.out_file) # a new turn has just been processed should_load_obs2 = True if should_load_obs == True and should_load_obs2 == True: break if self.proc.poll() != None and self.done == False: #ep ended early. if self.debug: print("PYENV: >> GAME ENDING EARLY FOR THE FIRST TIME") self.done = True valid, reason = is_valid_action(action, self.prev_obs) obs = self.load_state() self.prev_obs = obs ep_info['n_steps'] = self.step_num if valid == True: ep_info['valid'] = True else: ep_info['valid'] = False ref_obs = self.refenv.load_state() if obs['players'][0]['playerType'] == 'A': a_hp = obs['players'][0]['health'] b_hp = obs['players'][1]['health'] else: a_hp = obs['players'][1]['health'] b_hp = obs['players'][0]['health'] k = np.asarray([obs,]) u = np.asarray([ref_obs,]) return_obs = np.concatenate([k, u], axis=-1) if reward_mode == 'dense': win_reward = dense_win_reward lose_reward = -1 * dense_win_reward else: win_reward = binary_win_reward lose_reward = -1 * binary_win_reward if a_hp > b_hp: # player a wins ep_info['winner'] = 'A' return return_obs, np.concatenate([np.asarray([win_reward,]), np.asarray([lose_reward,])], axis=-1), ep_info elif a_hp < b_hp: ep_info['winner'] = 'B' return return_obs, np.concatenate([np.asarray([lose_reward,]), np.asarray([win_reward,])], axis=-1), ep_info else: ep_info['winner'] = 'TIE' return return_obs, np.concatenate([np.asarray([0.0,]), np.asarray([0.0,])], axis=-1), ep_info if stopw.deltaT() > 3: # we have waited more than 3s, game clearly ended self.needs_reset = True failure = True print('pyenv: env ' + str(self.name) + ' with pid ' + str(self.pid) + ' encountered error. (', should_load_obs, ',',should_load_obs2, ')' , time.time()) break time.sleep(0.01) # TODO: possibly pre-parse obs here and derive a reward from it? ######################### ## Loading the obs if their jar's ended properly #ref_obs, _ = self.refenv.step(ref_act) if should_load_obs: valid, reason = is_valid_action(action, self.prev_obs) obs = self.load_state() self.prev_obs = obs if valid == True: ep_info['valid'] = True else: ep_info['valid'] = False if should_load_obs2: ref_obs = self.refenv.load_state() if obs is None and self.debug == True: print(">> PY_ENV >> MAIN OBS IS NONE (", self.name, ")") if ref_obs is None: print(">> PY_ENV >> REF OBS IS NONE. (", self.name, ")") if failure == True: cntrl_obj = ControlObject('FAILURE') tp = np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1) return tp, np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1), ep_info # print('-----A------------->', obs['players'][0]['health']) # print('-----B------------->', obs['players'][1]['health']) self.step_num += 1 ######################## ## Forming rewards and packaging the obs into a good numpy form if obs is not None: # Infer reward: #reward = float(obs['players'][0]['score']) - float(obs['players'][1]['score']) curS = float(obs['players'][0]['score']) * general_reward_scaling_factor self.score_delta = curS - self.score reward = self.score_delta + per_step_reward_penalty self.score = curS if ref_obs is not None: curS2 = float(ref_obs['players'][0]['score']) * general_reward_scaling_factor self.refenv.score_delta = curS2 - self.refenv.score ref_reward = self.refenv.score_delta + per_step_reward_penalty self.refenv.score = curS2 k = np.asarray([obs,]) u = np.asarray([ref_obs,]) return_obs = np.concatenate([k, u], axis=-1) if reward_mode == 'dense': return return_obs, np.concatenate([np.asarray([reward,]), np.asarray([ref_reward,])], axis=-1), ep_info elif reward_mode == 'binary': return return_obs, np.concatenate([np.asarray([binary_step_penalty,]), np.asarray([binary_step_penalty,])], axis=-1), ep_info
def add_base(self): if self.debug: log("Adding base") s = Stopwatch() with tf.name_scope("adding_base") as scope: net = self.input net = Layers.Conv2D(32, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv1")(net) net = Layers.Conv2D(32, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv2")(net) net = Layers.Conv2D(64, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv3")(net) net_a = Layers.Conv2D(64, [1, 1], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv4a")(net) net_a = Layers.Conv2D(96, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv5a")(net_a) net_b = Layers.Conv2D(64, [1, 1], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv4b")(net) net_b = Layers.Conv2D(64, [8, 1], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv5b")(net_b) net_b = Layers.Conv2D(64, [1, 8], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv6b")(net_b) net_b = Layers.Conv2D(96, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv7b")(net_b) net = custom_layers.add_inception_resnet_A(net, 'A1') net = custom_layers.add_inception_resnet_A(net, 'A2') if self.debug: log("Finished adding base. Took: " + s.delta) return net
def __init__(self, obs, name, debug=False): ''' Initialize Bot. Load all game state information. ''' self.debug = debug try: self.game_state = obs[0] except IOError: print("Cannot load Game State") self.full_map = self.game_state['gameMap'] self.rows = self.game_state['gameDetails']['mapHeight'] self.columns = self.game_state['gameDetails']['mapWidth'] self.command = '' self.player_buildings = self.getPlayerBuildings() self.opponent_buildings = self.getOpponentBuildings() self.projectiles = self.getProjectiles() self.player_info = self.getPlayerInfo('A') self.opponent_info = self.getPlayerInfo('B') self.round = self.game_state['gameDetails']['round'] self.prices = { "ATTACK": self.game_state['gameDetails']['buildingPrices']['ATTACK'], "DEFENSE": self.game_state['gameDetails']['buildingPrices']['DEFENSE'], "ENERGY": self.game_state['gameDetails']['buildingPrices']['ENERGY'] } if self.debug and debug_verbose: log("rows: " + str(self.rows)) log("columns: " + str(self.columns)) log("player_buildings: " + str(self.player_buildings)) log("opp_buildings: " + str(self.opponent_buildings)) log("projectiles: " + str(self.projectiles)) log("player_info: " + str(self.player_info)) log("opp_info: " + str(self.opponent_info)) log("Round: " + str(self.round)) log("Prices: " + str(self.prices)) # getting inputs with tf.name_scope("shaping_inputs") as scope: if self.debug: log("Shaping inputs...") s = Stopwatch() pb = tf.one_hot(indices=self.player_buildings, depth=4, axis=-1, name="player_buildings") # 20x20x4 ob = tf.one_hot(indices=self.opponent_buildings, depth=4, axis=-1, name="opp_buildings") # 20x20x4 proj = tf.one_hot(indices=self.projectiles, depth=3, axis=-1, name='projectiles') # 20x40x3 k = proj.get_shape().as_list() proj = tf.reshape(proj, [k[0], k[1] / 2, 6 ]) # 20x20x6. Only works for single misssiles self.non_spatial = list(self.player_info.values())[1:] + list( self.opponent_info.values())[1:] + list( self.prices.values()) # 11x1 self.non_spatial = tf.cast(self.non_spatial, dtype=tf.float32) # broadcasting the non-spatial features to the channel dimension broadcast_stats = tf.tile( tf.expand_dims(tf.expand_dims(self.non_spatial, axis=0), axis=0), [k[0], k[1] / 2, 1]) # now 20x20x11 # adding all the inputs together via the channel dimension self.spatial = tf.concat([pb, ob, proj, broadcast_stats], axis=-1) # 20x20x(14 + 11) self.spatial = tf.expand_dims(self.spatial, axis=0) if self.debug: log("Finished shaping inputs. Took " + s.delta) return None
def parallel_fight(env, matchups, max_steps, debug=False): a1s = [] a2s = [] for a, b in matchups: a1s.append(a) a2s.append(b) #a1s, a2s = [(e, f,) for e, f in zip(*matchups)] n_envs = env.num_envs assert len(matchups) == n_envs, "agent lengths must be same as env" print( ">> PARALLEL FIGHT >> Running rollouts with {} games ".format(n_envs)) pbar = metrics.ProgressBar(max_steps) early_eps = 0 failed_eps = 0 games = [] ties = 0 step = 0 suc = env.reset() if all(suc) == False: print("something f****d out. Could not reset all envs.") return #obs = env.get_base_obs() obs = util.get_initial_obs(n_envs) for aa, bb in matchups: aa.mask_output = False bb.mask_output = False while step < max_steps: pbar.show(step) if debug: ss = Stopwatch() actions = [agent.step(obs[i][0]) for i, agent in enumerate(a1s)] ref_actions = [agen2.step(obs[i][1]) for i, agen2 in enumerate(a2s)] if len(actions) != len(ref_actions): print("LEN OF ACTIONS != LEN OF REF ACTIONS!!!!") raise ValueError if debug: print(">> storm >> taking actions: ", actions, ' and ref actions ', ref_actions) obs, rews, ep_infos = env.step(actions, p2_actions=ref_actions) failure = False for i in range(n_envs): if type(rews[i][0]) == util.ControlObject: if rews[i][0].code == "EARLY": a1s[i].mask_output = True a2s[i].mask_output = True elif rews[i][0].code == "FAILURE": # redo this whole f*****g batch failed_eps += 1 failure = True break if 'winner' in ep_infos[i].keys(): early_eps += 1 if ep_infos[i]['winner'] == 'A': #matchup_dict[a.name] = 'A' games.append( Game(a1s[i].name, a2s[i].name, winner=a1s[i].name)) elif ep_infos[i]['winner'] == 'B': #matchup_dict[a.name] = 'B' games.append( Game(a1s[i].name, a2s[i].name, winner=a2s[i].name)) elif ep_infos[i]['winner'] == 'TIE': #matchup_dict[a.name] = 'TIE' games.append(Game(a1s[i].name, a2s[i].name, winner='TIE')) ties += 1 if failure: print("Failure detected. Skipping batch") break if debug: print("obs shape = ", obs.shape) print("rews shape = ", rews.shape) print('>> storm >> just took step {}. Took: {}'.format( step, ss.delta)) step = step + 1 pbar.close() return games, early_eps, failed_eps, ties
def parse_obs(game_state): full_map = game_state['gameMap'] rows = game_state['gameDetails']['mapHeight'] columns = game_state['gameDetails']['mapWidth'] player_buildings = getPlayerBuildings(full_map, rows, columns) opponent_buildings = getOpponentBuildings(full_map, rows, columns) projectiles = getProjectiles(full_map, rows, columns) player_info = getPlayerInfo('A', game_state) opponent_info = getPlayerInfo('B', game_state) round_num = game_state['gameDetails']['round'] # works for jar v1.1.2 prices = { "ATTACK": game_state['gameDetails']['buildingsStats']['ATTACK']['price'], "DEFENSE": game_state['gameDetails']['buildingsStats']['DEFENSE']['price'], "ENERGY": game_state['gameDetails']['buildingsStats']['ENERGY']['price'], "TESLTA": game_state['gameDetails']['buildingsStats']['TESLA']['price'], } with tf.name_scope("shaping_inputs") as scope: if debug: print("Shaping inputs...") s = Stopwatch() pb = tf.one_hot(indices=player_buildings, depth=5, axis=-1, name="player_buildings") # 20x20x5 ob = tf.one_hot(indices=opponent_buildings, depth=5, axis=-1, name="opp_buildings") # 20x20x5 proj = tf.one_hot(indices=projectiles, depth=3, axis=-1, name='projectiles') # 20x40x3 k = proj.get_shape().as_list() proj = tf.reshape(proj, [int(k[0]), int(k[1] / 2), 6 ]) # 20x20x6. Only works for single misssiles non_spatial = list(player_info.values())[1:] + list( opponent_info.values())[1:] + list(prices.values()) # 12x1 non_spatial = tf.cast(non_spatial, dtype=tf.float32) # broadcasting the non-spatial features to the channel dimension broadcast_stats = tf.tile( tf.expand_dims(tf.expand_dims(non_spatial, axis=0), axis=0), [int(k[0]), int(k[1] / 2), 1]) # now 20x20x11 # adding all the inputs together via the channel dimension spatial = tf.concat([pb, ob, proj, broadcast_stats], axis=-1) # 20x20x(16 + 12) if debug: print("Finished shaping inputs. Took " + s.delta + "\nShape of inputs:" + str(spatial.shape)) return spatial, rows, columns