def get_spatial(self, net): ''' Gets the spatial action of the network ''' if self.debug: log("getting spatial action") s = Stopwatch() net = tf.layers.conv2d(self.spatial, 32, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="finalConv") net = tf.layers.conv2d(net, 1, [1, 1], strides=1, padding='SAME', name="conv1x1") flat = tf.layers.flatten(net) dist = tf.distributions.Categorical(logits=flat) sample = dist.sample() coords = tf.unravel_index(sample, [self.rows, self.columns / 2]) if self.debug: log("Finished spatial action inference. Took: " + s.delta) return coords
def __init__(self, name, debug=False): ''' Build agent graph. ''' self.name = name self.debug = debug self.fitness_score = 0 self.fitness_averaging_list = [] self.refbot_position = -1 self.mask_output = False if self.debug: log("Running conv2d on " + device) # with tf.device('/' + device + ':0'): with tf.name_scope(str(self.name) + 'Model'): self.input = Layers.Input(shape=(int(constants.map_width / 2), constants.map_height, input_channels)) self.base = self.add_base() self.get_non_spatial = self.add_non_spatial(self.base) self.get_spatial = self.add_spatial(self.base, self.get_non_spatial) self.model = tf.keras.models.Model( inputs=self.input, outputs=[self.get_non_spatial, self.get_spatial]) #self.model.compile() if self.debug: print(">> SCUD2 >> Total number of parameters: ", self.model.count_params() ) # currently 561 711, max 4 000 000 in paper #self.model.compile(optimizer='rmsprop') #gives error of 'only tf native optimizers are supported in eager mode' self.tau_lineage = [] return None
def add_non_spatial(self, net): ''' Infers the non-spatial action of the network ''' if self.debug: log("Getting non-spatial action") s = Stopwatch() net = custom_layers.add_inception_resnet_B(net, '1a0') net = custom_layers.add_inception_resnet_B(net, '2a0') net = Layers.Conv2D(8, [1, 1], strides=1, padding='SAME', activation=tf.nn.relu, name="non_spat_conv2")(net) flatten = Layers.Flatten()(net) non_spatial = Layers.Dense(256, activation=tf.nn.relu, name="non_spatial")(flatten) a0_logits = Layers.Dense(constants.n_base_actions, name="a0")(non_spatial) a0_sampled = SampleCategoricalLayer()(a0_logits) if self.debug: log("Finished non-spatial action. Took: " + s.delta) return a0_sampled
def step(self, inputs, batch_predict=False): ''' Takes a step of the Scud model. If batch_predict is set to True, we assume inputs is a batch of all env obs and return an array of the corresponding actions. ''' if batch_predict == True: batch_list = [] for game_state in inputs: if type(game_state) == util.ControlObject: continue k, self.rows, self.columns = obs_parsing.parse_obs(game_state) batch_list.append(k) if len(batch_list) == 0: return [(0, 0, 3) for _ in range(len(inputs))] spatial = tf.stack(batch_list, axis=0) else: if self.mask_output == True or type(inputs) == util.ControlObject: if self.debug: print("scud ", self.name, 'output masked') return 0, 0, 3 k, self.rows, self.columns = obs_parsing.parse_obs(inputs) spatial = tf.expand_dims( k, axis=0) # now should have shape (1, 8, 8, 25) a0, a1 = self.model.predict(spatial) arr = [] rng = 1 sep_cnt = 0 if batch_predict: rng = len(inputs) for i in range(rng): if batch_predict: if type(inputs[i]) == util.ControlObject: arr.append((0, 0, 3)) continue building = a0[sep_cnt] coords = tf.unravel_index(a1[sep_cnt], [self.rows, self.columns / 2]) x = int(coords[0]) y = int(coords[1]) if self.debug: log("x, y = " + str(x) + ", " + str(y)) arr.append((x, y, building)) sep_cnt += 1 if batch_predict: return arr else: x, y, building = arr[0] return x, y, building
def add_base(self): if self.debug: log("Adding base") s = Stopwatch() with tf.name_scope("adding_base") as scope: net = self.spatial for i in range(2): net = tf.layers.conv2d(net, 32, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="conv" + str(i)) # ok well this takes 5 seconds if self.debug: log("Finished adding base. Took: " + s.delta) return net
def get_non_spatial(self, net): ''' Infers the non-spatial action of the network ''' if self.debug: log("Getting non-spatial action") s = Stopwatch() non_spatial = tf.layers.dense(tf.layers.flatten(net), 256, activation=tf.nn.relu, name="non_spatial") a0 = tf.layers.dense(non_spatial, n_base_actions, name="a0") # TODO: possibly softmax this and then transform it into an int from 0 - 4 # possibly use tf autoregressive distribution dist = tf.distributions.Categorical(logits=a0) sample = dist.sample() if self.debug: log("Finished non-spatial action. Took: " + s.delta) return sample
def add_spatial(self, net, a0): ''' Gets the spatial action of the network ''' if self.debug: log("getting spatial action") s = Stopwatch() one_hot_a0 = OneHotLayer(constants.n_base_actions)(a0) k = net.get_shape().as_list() broadcast_stats = Layers.RepeatVector(int(k[1] * k[2]))(one_hot_a0) broadcast_stats2 = Layers.Reshape( (k[1], k[2], constants.n_base_actions))(broadcast_stats) net = Layers.concatenate([net, broadcast_stats2], axis=-1) # (?, 8, 8, 38) net = Layers.Conv2D(64, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="finalConv")(net) net = custom_layers.add_inception_resnet_B(net, '1a1') net = custom_layers.add_inception_resnet_B(net, '2a1') net = Layers.Conv2D(1, [1, 1], strides=1, padding='SAME', name="conv1x1")(net) logits = Layers.Flatten()(net) a1_sampled = SampleCategoricalLayer()(logits) if self.debug: log("Finished spatial action inference. Took: " + s.delta) return a1_sampled
def add_base(self): if self.debug: log("Adding base") s = Stopwatch() with tf.name_scope("adding_base") as scope: net = self.input net = Layers.Conv2D(32, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv1")(net) net = Layers.Conv2D(32, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv2")(net) net = Layers.Conv2D(64, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv3")(net) net_a = Layers.Conv2D(64, [1, 1], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv4a")(net) net_a = Layers.Conv2D(96, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv5a")(net_a) net_b = Layers.Conv2D(64, [1, 1], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv4b")(net) net_b = Layers.Conv2D(64, [8, 1], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv5b")(net_b) net_b = Layers.Conv2D(64, [1, 8], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv6b")(net_b) net_b = Layers.Conv2D(96, [3, 3], strides=1, padding='SAME', activation=tf.nn.relu, name="baseConv7b")(net_b) net = custom_layers.add_inception_resnet_A(net, 'A1') net = custom_layers.add_inception_resnet_A(net, 'A2') if self.debug: log("Finished adding base. Took: " + s.delta) return net
str(path)[-50:]) def load(self, filepath, savename): if savename is None: path = os.path.join(filepath, str(self.name) + '.h5') else: if savename.endswith('.h5') == False: path = os.path.join(filepath, str(savename) + '.h5') else: path = os.path.join(filepath, str(savename)) self.model = tf.keras.models.load_model( path, custom_objects=custom_keras_layers) if self.refbot_position != -1: print(">> SCUD >> ", self.name, "(refbot pos ", self.refbot_position, ") had model restored from file ", str(path)[-50:]) else: print(">> SCUD >> ", self.name, " had model restored from file ", str(path)[-50:]) def __str__(self): return "SCUD2 [Name: {:20} | Masking: {:3} | Refbot pos: {:2d}]".format( self.name, self.mask_output, self.refbot_position) if __name__ == '__main__': k = Stopwatch() s = Scud('www', debug=True) we = s.get_flat_weights() log("Round-time was {}".format(k.delta))
def __init__(self, obs, name, debug=False): ''' Initialize Bot. Load all game state information. ''' self.debug = debug try: self.game_state = obs[0] except IOError: print("Cannot load Game State") self.full_map = self.game_state['gameMap'] self.rows = self.game_state['gameDetails']['mapHeight'] self.columns = self.game_state['gameDetails']['mapWidth'] self.command = '' self.player_buildings = self.getPlayerBuildings() self.opponent_buildings = self.getOpponentBuildings() self.projectiles = self.getProjectiles() self.player_info = self.getPlayerInfo('A') self.opponent_info = self.getPlayerInfo('B') self.round = self.game_state['gameDetails']['round'] self.prices = { "ATTACK": self.game_state['gameDetails']['buildingPrices']['ATTACK'], "DEFENSE": self.game_state['gameDetails']['buildingPrices']['DEFENSE'], "ENERGY": self.game_state['gameDetails']['buildingPrices']['ENERGY'] } if self.debug and debug_verbose: log("rows: " + str(self.rows)) log("columns: " + str(self.columns)) log("player_buildings: " + str(self.player_buildings)) log("opp_buildings: " + str(self.opponent_buildings)) log("projectiles: " + str(self.projectiles)) log("player_info: " + str(self.player_info)) log("opp_info: " + str(self.opponent_info)) log("Round: " + str(self.round)) log("Prices: " + str(self.prices)) # getting inputs with tf.name_scope("shaping_inputs") as scope: if self.debug: log("Shaping inputs...") s = Stopwatch() pb = tf.one_hot(indices=self.player_buildings, depth=4, axis=-1, name="player_buildings") # 20x20x4 ob = tf.one_hot(indices=self.opponent_buildings, depth=4, axis=-1, name="opp_buildings") # 20x20x4 proj = tf.one_hot(indices=self.projectiles, depth=3, axis=-1, name='projectiles') # 20x40x3 k = proj.get_shape().as_list() proj = tf.reshape(proj, [k[0], k[1] / 2, 6 ]) # 20x20x6. Only works for single misssiles self.non_spatial = list(self.player_info.values())[1:] + list( self.opponent_info.values())[1:] + list( self.prices.values()) # 11x1 self.non_spatial = tf.cast(self.non_spatial, dtype=tf.float32) # broadcasting the non-spatial features to the channel dimension broadcast_stats = tf.tile( tf.expand_dims(tf.expand_dims(self.non_spatial, axis=0), axis=0), [k[0], k[1] / 2, 1]) # now 20x20x11 # adding all the inputs together via the channel dimension self.spatial = tf.concat([pb, ob, proj, broadcast_stats], axis=-1) # 20x20x(14 + 11) self.spatial = tf.expand_dims(self.spatial, axis=0) if self.debug: log("Finished shaping inputs. Took " + s.delta) return None
def generate_action(self): ''' Scud model estimator ''' if self.debug: log("Running conv2d on " + device) with tf.device('/' + device + ':0'): net = self.add_base() #print("state shape: ", net.shape) # (1, 20, 20, 32) ## split into non-spatial and spatial action path a0 = self.get_non_spatial(net) building = int(a0) # now an int between 0 and 3 if self.debug: log("a0 = " + str(a0)) coords = self.get_spatial(net) x = int(coords[0]) y = int(coords[1]) if self.debug: log("x, y = " + str(x) + ", " + str(y)) ## loading the state (for RNN stuffs) if self.debug: log("Loading state") sss = Stopwatch() _ = np.load('scudstate.npy') # takes ~ 0.031s if self.debug: log("State loaded. Took: " + sss.delta) ## saving the state (for RNN stuffs) if self.debug: log("Saving state") ss = Stopwatch() new_state = net np.save('scudstate.npy', new_state) if self.debug: log("State saved. Took: " + ss.delta) #util.write_action(x,y,building) return x, y, building
import common.util as util Net = tf.contrib.eager.Network ''' Internal agent config ''' debug = True n_base_actions = 4 # number of base actions -- 0=NO OP, 1=DEFENSE, 2=OFFENSE, 3=ENERGY... debug_verbose = False endpoints = {} device = 'cpu' tf.enable_eager_execution() # let an example map size be 20x40, so each player's building area is 20x20 if debug_verbose and debug: log("Testing tensorflow") s = Stopwatch() print("TensorFlow version: {}".format(tf.VERSION)) print("Eager execution: {}".format(tf.executing_eagerly())) log("Finished, took: " + s.delta) class Scud(object): def __init__(self, obs, name, debug=False): ''' Initialize Bot. Load all game state information. ''' self.debug = debug