def entity_concat(inps): ''' Concat 4D tensors along the third dimension. If a 3D tensor is in the list then treat it as a single entity and expand the third dimension Args: inps (list of tensors): tensors to concatenate ''' with tf.variable_scope('concat_entities'): shapes = [shape_list(_x) for _x in inps] # For inputs that don't have entity dimension add one. inps = [_x if len(_shape) == 4 else tf.expand_dims(_x, 2) for _x, _shape in zip(inps, shapes)] shapes = [shape_list(_x) for _x in inps] assert np.all([_shape[-1] == shapes[0][-1] for _shape in shapes]),\ f"Some entities don't have the same outer or inner dimensions {shapes}" # Concatenate along entity dimension out = tf.concat(inps, -2) return out
def self_attention(inp, mask, heads, n_embd, layer_norm=False, qk_w=1.0, v_w=0.01, scope='', reuse=False): ''' Self attention over entities. Notation: T - Time NE - Number entities Args: inp (tf) -- tensor w/ shape (bs, T, NE, features) mask (tf) -- binary tensor with shape (bs, T, NE). For each batch x time, nner matrix represents entity i's ability to see entity j heads (int) -- number of attention heads n_embd (int) -- dimension of queries, keys, and values will be n_embd / heads layer_norm (bool) -- normalize embedding prior to computing qkv qk_w, v_w (float) -- scale for gaussian init for keys/queries and values Std will be sqrt(scale/n_embd) scope (string) -- tf scope reuse (bool) -- tf reuse ''' with tf.variable_scope(scope, reuse=reuse): bs, T, NE, features = shape_list(inp) # Put mask in format correct for logit matrix entity_mask = None if mask is not None: with tf.variable_scope('expand_mask'): assert np.all(np.array(mask.get_shape().as_list()) == np.array(inp.get_shape().as_list()[:3])),\ f"Mask and input should have the same first 3 dimensions. {shape_list(mask)} -- {shape_list(inp)}" entity_mask = mask mask = tf.expand_dims(mask, -2) # (BS, T, 1, NE) query, key, value = qkv_embed(inp, heads, n_embd, layer_norm=layer_norm, qk_w=qk_w, v_w=v_w, reuse=reuse) logits = tf.matmul(query, key, name="matmul_qk_parallel") # (bs, T, heads, NE, NE) logits /= np.sqrt(n_embd / heads) softmax = stable_masked_softmax(logits, mask) att_sum = tf.matmul(softmax, value, name="matmul_softmax_value") # (bs, T, heads, NE, features) with tf.variable_scope('flatten_heads'): out = tf.transpose(att_sum, (0, 1, 3, 2, 4)) # (bs, T, n_output_entities, heads, features) n_output_entities = shape_list(out)[2] out = tf.reshape(out, (bs, T, n_output_entities, n_embd)) # (bs, T, n_output_entities, n_embd) return out
def concat_entity_masks(inps, masks): ''' Concats masks together. If mask is None, then it creates a tensor of 1's with shape (BS, T, NE). Args: inps (list of tensors): tensors that masks apply to masks (list of tensors): corresponding masks ''' assert len(inps) == len(masks), "There should be the same number of inputs as masks" with tf.variable_scope('concat_masks'): shapes = [shape_list(_x) for _x in inps] new_masks = [] for inp, mask in zip(inps, masks): if mask is None: inp_shape = shape_list(inp) if len(inp_shape) == 4: # this is an entity tensor new_masks.append(tf.ones(inp_shape[:3])) elif len(inp_shape) == 3: # this is a pooled or main tensor. Set NE (outer dimension) to 1 new_masks.append(tf.ones(inp_shape[:2] + [1])) else: new_masks.append(mask) new_mask = tf.concat(new_masks, -1) return new_mask
def circ_conv1d(inp, **conv_kwargs): valid_activations = {'relu': tf.nn.relu, 'tanh': tf.tanh, '': None} assert 'kernel_size' in conv_kwargs, f"Kernel size needs to be specified for circular convolution layer." conv_kwargs['activation'] = valid_activations[conv_kwargs['activation']] # concatenate input for circular convolution kernel_size = conv_kwargs['kernel_size'] num_pad = kernel_size // 2 inp_shape = shape_list(inp) inp_rs = tf.reshape(inp, shape=[inp_shape[0] * inp_shape[1]] + inp_shape[2:]) # (BS * T, NE, feats) inp_padded = tf.concat([inp_rs[..., -num_pad:, :], inp_rs, inp_rs[..., :num_pad, :]], -2) out = tf.layers.conv1d(inp_padded, kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='valid', **conv_kwargs) out = tf.reshape(out, shape=inp_shape[:3] + [conv_kwargs['filters']]) return out
def qkv_embed(inp, heads, n_embd, layer_norm=False, qk_w=1.0, v_w=0.01, reuse=False): ''' Compute queries, keys, and values Args: inp (tf) -- tensor w/ shape (bs, T, NE, features) heads (int) -- number of attention heads n_embd (int) -- dimension of queries, keys, and values will be n_embd / heads layer_norm (bool) -- normalize embedding prior to computing qkv qk_w (float) -- Initialization scale for keys and queries. Actual scale will be sqrt(qk_w / #input features) v_w (float) -- Initialization scale for values. Actual scale will be sqrt(v_w / #input features) reuse (bool) -- tf reuse ''' with tf.variable_scope('qkv_embed'): bs, T, NE, features = shape_list(inp) if layer_norm: with tf.variable_scope('pre_sa_layer_norm'): inp = tf.contrib.layers.layer_norm(inp, begin_norm_axis=3) # qk shape (bs x T x NE x h x n_embd/h) qk_scale = np.sqrt(qk_w / features) qk = tf.layers.dense(inp, n_embd * 2, kernel_initializer=tf.random_normal_initializer(stddev=qk_scale), reuse=reuse, name="qk_embed") # bs x T x n_embd*2 qk = tf.reshape(qk, (bs, T, NE, heads, n_embd // heads, 2)) # (bs, T, NE, heads, features) query, key = [tf.squeeze(x, -1) for x in tf.split(qk, 2, -1)] v_scale = np.sqrt(v_w / features) value = tf.layers.dense(inp, n_embd, kernel_initializer=tf.random_normal_initializer(stddev=v_scale), reuse=reuse, name="v_embed") # bs x T x n_embd value = tf.reshape(value, (bs, T, NE, heads, n_embd // heads)) query = tf.transpose(query, (0, 1, 3, 2, 4), name="transpose_query") # (bs, T, heads, NE, n_embd / heads) key = tf.transpose(key, (0, 1, 3, 4, 2), name="transpose_key") # (bs, T, heads, n_embd / heads, NE) value = tf.transpose(value, (0, 1, 3, 2, 4), name="transpose_value") # (bs, T, heads, NE, n_embd / heads) return query, key, value
def load_variables(policy, weights): weights = {os.path.normpath(key): value for key, value in weights.items()} weights = {replace_base_scope(key, policy.scope): value for key, value in weights.items()} assign_ops = [] for var in policy.get_variables(): var_name = os.path.normpath(var.name) if var_name not in weights: logging.warning(f"{var_name} was not found in weights dict. This will be reinitialized.") tf.get_default_session().run(var.initializer) else: try: assert np.all(np.array(shape_list(var)) == np.array(weights[var_name].shape)) assign_ops.append(var.assign(weights[var_name])) except Exception: traceback.print_exc(file=sys.stdout) print(f"Error assigning weights of shape {weights[var_name].shape} to {var}") sys.exit() tf.get_default_session().run(assign_ops)
def _init_policy_out(self, pi, taken_actions): with tf.variable_scope('policy_out', reuse=self.reuse): self.pdparams = {} for k in self.pdtypes.keys(): with tf.variable_scope(k, reuse=self.reuse): if self.gaussian_fixed_var and isinstance( self.ac_space.spaces[k], gym.spaces.Box): mean = tf.layers.dense( pi["main"], self.pdtypes[k].param_shape()[0] // 2, kernel_initializer=normc_initializer(0.01), activation=None) logstd = tf.get_variable( name="logstd", shape=[1, self.pdtypes[k].param_shape()[0] // 2], initializer=tf.zeros_initializer()) self.pdparams[k] = tf.concat( [mean, mean * 0.0 + logstd], axis=2) elif k in pi: # This is just for the case of entity specific actions if isinstance(self.ac_space.spaces[k], (gym.spaces.Discrete)): assert pi[k].get_shape()[-1] == 1 self.pdparams[k] = pi[k][..., 0] elif isinstance(self.ac_space.spaces[k], (gym.spaces.MultiDiscrete)): assert np.prod(pi[k].get_shape()[-2:]) == self.pdtypes[k].param_shape()[0],\ f"policy had shape {pi[k].get_shape()} for action {k}, but required {self.pdtypes[k].param_shape()}" new_shape = shape_list(pi[k])[:-2] + [ np.prod(pi[k].get_shape()[-2:]).value ] self.pdparams[k] = tf.reshape(pi[k], shape=new_shape) else: assert False else: self.pdparams[k] = tf.layers.dense( pi["main"], self.pdtypes[k].param_shape()[0], kernel_initializer=normc_initializer(0.01), activation=None) with tf.variable_scope('pds', reuse=self.reuse): self.pds = { k: pdtype.pdfromflat(self.pdparams[k]) for k, pdtype in self.pdtypes.items() } with tf.variable_scope('sampled_action', reuse=self.reuse): self.sampled_action = { k: pd.sample() if self.stochastic else pd.mode() for k, pd in self.pds.items() } with tf.variable_scope('sampled_action_neglogp', reuse=self.reuse): self.sampled_action_neglogp = sum([ self.pds[k].neglogp(self.sampled_action[k]) for k in self.pdtypes.keys() ]) with tf.variable_scope('entropy', reuse=False): self.entropy = sum([pd.entropy() for pd in self.pds.values()]) with tf.variable_scope('taken_action_neglogp', reuse=False): self.taken_action_neglogp = sum([ self.pds[k].neglogp(taken_actions[k]) for k in self.pdtypes.keys() ])