def __init__(self, num_heads, multi_head_output_size, input_node_size, name=None): super(CoreNetwork, self).__init__(name=name) self.num_heads = num_heads self.multi_head_output_size = multi_head_output_size self.output_linear = snt.Linear(output_size=input_node_size) self.FFN = snt.nets.MLP([32, input_node_size], activate_final=False) # Feed forward network self.normalization = lambda x: (x - tf.reduce_mean(x) ) / tf.math.reduce_std(x) self.ln1 = snt.LayerNorm(axis=1, eps=1e-6, create_scale=True, create_offset=True) self.ln2 = snt.LayerNorm(axis=1, eps=1e-6, create_scale=True, create_offset=True) self.v_linear = MultiHeadLinear(output_size=multi_head_output_size, num_heads=num_heads) # values self.k_linear = MultiHeadLinear(output_size=multi_head_output_size, num_heads=num_heads) # keys self.q_linear = MultiHeadLinear(output_size=multi_head_output_size, num_heads=num_heads) # queries self.self_attention = SelfAttention()
def __init__(self, config, name='model'): super(Model, self).__init__(name=name) self._normal_invvar = 1 / pow(config['normal_scale'], 2) self._normal_const = math.log(2 * math.pi / self._normal_invvar) self._seg_overlap = config['seg_overlap'] with self._enter_variable_scope(check_same_graph=False): self._init = Initializer(config) self._upd = Updater(config) self._dec = Decoder(config) self._ln_grad_apc = snt.LayerNorm(axis=[-3, -2, -1], offset=False, scale=False, name='ln_grad_apc') self._ln_grad_mask = snt.LayerNorm(axis=[-3, -2, -1], offset=False, scale=False, name='ln_grad_mask') self._ln_pixel_ll = snt.LayerNorm(axis=[-3, -2, -1], offset=False, scale=False, name='ln_ll') self._ln_pixel_ll_excl = snt.LayerNorm(axis=[-3, -2, -1], offset=False, scale=False, name='ln_ll_exclude') self._ln_grad_post_param = snt.LayerNorm(axis=[-1], offset=False, scale=False, name='ln_grad_post_param')
def __init__(self, num_heads: int = 1, use_edges=False, use_globals=False, name=None): super(SelfAttentionMessagePassing, self).__init__(name=name) self.selfattention_core = TransformerLayer(num_heads=num_heads) self.layer_norm1 = snt.LayerNorm(-1, True, True, name='layer_norm_edges') self.layer_norm2 = snt.LayerNorm(-1, True, True, name='layer_norm_nodes') self.use_globals = use_globals self.use_edges = use_edges
def _build(self, x, presence=None): n_dims = int(x.shape[-1]) y = self._self_attention(x, presence) if self._dropout_rate > 0.: x = tf.nn.dropout(x, rate=self._dropout_rate) y += x if presence is not None: y *= tf.expand_dims(tf.to_float(presence), -1) if self._layer_norm: y = snt.LayerNorm(axis=-1)(y) h = snt.BatchApply(snt.nets.MLP([2*n_dims, n_dims]))(y) if self._dropout_rate > 0.: h = tf.nn.dropout(h, rate=self._dropout_rate) h += y if self._layer_norm: h = snt.LayerNorm(axis=-1)(h) return h
def __init__(self, latent_sizes_edge, latent_sizes_node, latent_sizes_global, name="MLPGraphNetwork"): super(MLPGraphNetwork, self).__init__(name=name) self.edge_fun = lambda: snt.Sequential([ snt.nets.MLP(latent_sizes_edge, activate_final=True), snt.LayerNorm() ]) self.node_fun = lambda: snt.Sequential([ snt.nets.MLP(latent_sizes_node, activate_final=True), snt.LayerNorm() ]) self.global_fun = lambda: snt.Sequential( [snt.nets.MLP(latent_sizes_global, activate_final=False)]) with self._enter_variable_scope(): self._network = modules.GraphNetwork( self.edge_fun, self.node_fun, self.global_fun, edge_block_opt=EDGE_BLOCK_OPT, node_block_opt=NODE_BLOCK_OPT, global_block_opt=GLOBAL_BLOCK_OPT)
def testInvalidInitializerParameters(self): with self.assertRaisesRegexp(KeyError, "Invalid initializer keys.*"): snt.LayerNorm( initializers={"not_gamma": tf.contrib.layers.l1_regularizer(0.5)}) err = "Initializer for 'gamma' is not a callable function" with self.assertRaisesRegexp(TypeError, err): snt.LayerNorm(initializers={"gamma": tf.zeros([1, 2, 3])})
def _build(self, inputs): if self.typ == "mlp_transform": # Transforms the outputs into the appropriate shape. net = snt.nets.MLP([self.n_neurons] * self.n_layers, activate_final=self.activation_final) seq = snt.Sequential([net, snt.LayerNorm(), snt.Linear(self.output_size)])(inputs) elif self.typ == "mlp_layer_norm": net = snt.nets.MLP([self.n_neurons] * self.n_layers, activate_final=self.activation_final) seq = snt.Sequential([net, snt.LayerNorm()])(inputs) return seq
def __init__(self, dim, n_heads, conv_hidden, dropout=0.1): super().__init__() self.dim = dim self.n_heads = n_heads self.dropout = dropout self.l1 = MultiHeadAttention(dim, n_heads=n_heads, dropout=dropout) self.l2 = snt.LayerNorm(dim, create_scale=False, create_offset=False) self.l3 = snt.LayerNorm(dim, create_scale=False, create_offset=False)
def __init__(self, num_heads, name=None): super(TransformerLayer, self).__init__(name=name) self.num_heads = num_heads self.ln1 = snt.LayerNorm(axis=-1, eps=1e-6, create_scale=True, create_offset=True, name='layer_norm1') self.ln2 = snt.LayerNorm(axis=-1, eps=1e-6, create_scale=True, create_offset=True, name='layer_norm2') self.ln_keys = snt.LayerNorm(axis=-1, eps=1e-6, create_scale=True, create_offset=True, name='layer_norm_keys') self.ln_queries = snt.LayerNorm(axis=-1, eps=1e-6, create_scale=True, create_offset=True, name='layer_norm_queries') self.self_attention = SelfAttention()
def testConstruct(self): inputs = tf.placeholder(tf.float32, shape=[None, 64]) layer_norm1 = snt.LayerNorm() layer_norm1(inputs) err = (r"Layer normalization expects inputs of rank 2. " r"Got inputs of rank \d.") with self.assertRaisesRegexp(snt.Error, err): malformed_inputs = tf.placeholder(tf.float32, shape=[None, 64, 1]) layer_norm2 = snt.LayerNorm() layer_norm2(malformed_inputs)
def _build(self, inputs, is_training=True, verbose=VERBOSITY): visual_decoded_output = self.visual_dec(inputs, name=self._name) n_non_visual_elements = 6 non_visual_latent_output = inputs[:, -n_non_visual_elements:] # get x,y,z-position and x,y,z-velocity """ map latent position/velocity (nodes) from 32d to original 6d space """ non_visual_decoded_output = snt.Sequential([ snt.nets.MLP([ EncodeProcessDecode_v3_172_visual_latent_dim_no_batchnorm. n_neurons_nodes_total_dim, n_non_visual_elements ], activate_final=True), snt.LayerNorm() ])(non_visual_latent_output) outputs = tf.concat([visual_decoded_output, non_visual_decoded_output], axis=1) if verbose: print("final decoder output shape after including non-visual data", outputs.get_shape()) return outputs
def make_mlp_model_edges(): """Instantiates a new MLP, followed by LayerNorm. The parameters of each new MLP are not shared with others generated by this function. Returns: A Sonnet module which contains the MLP and LayerNorm. """ # class AdditiveGaussianModule(snt.AbstractModule): # def __init__(self, name="additive_gaussian_module"): # super(AdditiveGaussianModule, self).__init__(name=name) # self.is_training = False # # def _build(self, inputs, verbose=False): # # if self.is_training: # inputs += tf.random.normal(shape=tf.shape(inputs), mean=0.0, stddev=EncodeProcessDecode.latent_state_noise, # seed=21, dtype=tf.float32, name="edge_noise") # if verbose: # print("Additive Gaussian noise added to edge encoding") # # return inputs #net = snt.nets.MLP([EncodeProcessDecode_v2.n_neurons_edges] * EncodeProcessDecode_v2.n_layers_edges, activate_final=True) # if EncodeProcessDecode.latent_state_noise: # output = snt.Sequential([net, snt.LayerNorm(), AdditiveGaussianModule()]) # else: return snt.Sequential([ snt.nets.MLP([EncodeProcessDecode_v2.n_neurons_edges] * EncodeProcessDecode_v2.n_layers_edges, activate_final=True), snt.LayerNorm() ])
def make_mlp(layersizes): if len(layersizes)>0: return lambda: snt.Sequential([ snt.nets.MLP(layersizes, activate_final=True), snt.LayerNorm()]) else: return None
def make_mlp(): return snt.Sequential([ snt.nets.MLP(layer_sizes, activate_final=True, activation=cast_activation(act)), snt.LayerNorm(axis=-1, create_offset=True, create_scale=True) ])
def __init__(self, make_gnn_fn, num_timesteps, weight_sharing=False, use_batch_norm=False, residual=True, test_local_stats=False, use_layer_norm=False, name="TimestepGNN"): super(TimestepGNN, self).__init__(name=name) self._weight_sharing = weight_sharing self._num_timesteps = num_timesteps self._use_batch_norm = use_batch_norm self._residual = residual self._bns = [] self._lns = [] self._test_local_stats = test_local_stats self._use_layer_norm = use_layer_norm with self._enter_variable_scope(): if not weight_sharing: self._gnn = [make_gnn_fn() for _ in range(num_timesteps)] else: self._gnn = make_gnn_fn() if use_batch_norm: self._bns = [ snt.BatchNorm(scale=True) for _ in range(num_timesteps) ] if use_layer_norm: self._lns = [snt.LayerNorm() for _ in range(num_timesteps)]
def _build(self, inputs, verbose=VERBOSITY): visual_latent_output = self._visual_enc(inputs, name=self._name) n_non_visual_elements = 6 non_visual_elements = inputs[:, -n_non_visual_elements:] # get x,y,z-position and x,y,z-velocity """ map velocity and position into a latent space, concatenate with visual latent space vector """ non_visual_latent_output = snt.Sequential([ snt.nets.MLP([ n_non_visual_elements, EncodeProcessDecode_v3_172_visual_latent_dim_no_batchnorm. n_neurons_nodes_non_visual ], activate_final=True), snt.LayerNorm() ])(non_visual_elements) outputs = tf.concat([visual_latent_output, non_visual_latent_output], axis=1) if verbose: print("final encoder output shape", outputs.get_shape()) # todo: add noise to latent vector, fix issue with passing is_training flag #if EncodeProcessDecode.latent_state_noise and self.is_training: # outputs += tf.random.normal(shape=tf.shape(outputs), mean=0.0, stddev=EncodeProcessDecode.latent_state_noise, seed=21, # dtype=tf.float32) return outputs
def _make_mlp(self, output_size, layer_norm=True): """Builds an MLP.""" widths = [self._latent_size] * self._num_layers + [output_size] network = snt.nets.MLP(widths, activate_final=False) if layer_norm: network = snt.Sequential([network, snt.LayerNorm()]) return network
def _build(self, inputs, is_training, k=-1, r=0): if not isinstance(k, int): inputs = tf.cond(tf.equal(k, 0), true_fn=lambda: mixup_process(inputs=inputs, r=r), false_fn=lambda: inputs) h = snt.Linear(output_size=self.hidden_size)(inputs) h = tf.layers.Dropout(rate=self.drop_rate)(h, is_training) if not isinstance(k, int): h = tf.cond(tf.equal(k, 1), true_fn=lambda: mixup_process(inputs=h, r=r), false_fn=lambda: h) for i in range(self.num_highways): h = Highway()(h) if self.use_batch_norm: h = snt.BatchNormV2(data_format='NC')(h, is_training) elif self.use_layer_norm: h = snt.LayerNorm(axis=1)(h) if self.activation != 'linear': h = Activation(activation=self.activation)(h) if self.use_dropout: h = tf.layers.Dropout(rate=self.drop_rate)(h, is_training) if not isinstance(k, int): h = tf.cond(tf.equal(k, i + 2), true_fn=lambda: mixup_process(inputs=h, r=r), false_fn=lambda: h) outputs = snt.Linear(output_size=self.output_size)(h) return outputs
def _build(self, inputs, is_training=True): visual_decoded_output = self.visual_dec(inputs) if "global" in self._name: n_non_visual_elements = 5 non_visual_latent_output = inputs[:, -n_non_visual_elements:] # get x,y,z-position, time-step and gravity constant else: n_non_visual_elements = 6 non_visual_latent_output = inputs[:, -n_non_visual_elements:] # get x,y,z-position and x,y,z-velocity """ map latent position/velocity (nodes) or position/gravity/time-step (global) from 32d to original 5d/6d space """ non_visual_decoded_output = snt.Sequential([ snt.nets.MLP([ EncodeProcessDecode_v2.n_neurons_mlp_nonvisual, n_non_visual_elements ], activate_final=True), snt.LayerNorm() ])(non_visual_latent_output) outputs = tf.concat([visual_decoded_output, non_visual_decoded_output], axis=1) #print("final decoder output shape after including non-visual data", outputs.get_shape()) return outputs
def __init__(self, layer_sizes: Sequence[int], w_init: Optional[ snt.initializers.Initializer] = uniform_initializer, activation: Callable[[tf.Tensor], tf.Tensor] = tf.nn.elu, activate_final: bool = False): """Construct the MLP. Args: layer_sizes: a sequence of ints specifying the size of each layer. w_init: initializer for Linear weights. activation: activation function to apply between linear layers. Defaults to ELU. Note! This is different from snt.nets.MLP's default. activate_final: whether or not to use the activation function on the final layer of the neural network. """ super().__init__(name='feedforward_mlp_torso') self._network = snt.Sequential([ snt.Linear(layer_sizes[0], w_init=w_init), snt.LayerNorm(axis=slice(1, None), create_scale=True, create_offset=True), tf.nn.tanh, snt.nets.MLP(layer_sizes[1:], w_init=w_init, activation=activation, activate_final=activate_final), ])
def make_atom_state_mlp_model(): regularizers = {"w": tf.contrib.layers.l1_regularizer(scale=0.5), "b": tf.contrib.layers.l2_regularizer(scale=0.5)} return snt.Sequential([ snt.nets.MLP([100,100,100,100], activate_final=True), snt.LayerNorm() ])
def common_embedding(features, num_types, type_embedding_dim): preexistance_feat = tf.expand_dims(tf.cast(features[:, 0], dtype=tf.float32), axis=1) type_embedder = snt.Embed(num_types, type_embedding_dim) norm = snt.LayerNorm() type_embedding = norm(type_embedder(tf.cast(features[:, 1], tf.int32))) tf.summary.histogram('type_embedding_histogram', type_embedding) return tf.concat([preexistance_feat, type_embedding], axis=1)
def __init__(self, channel, kernel, name='unet_block_up'): super(UNetBlockUp, self).__init__(name=name) with self._enter_variable_scope(check_same_graph=False): self._layers = [ snt.Conv2D(channel, kernel, use_bias=False, name='conv'), snt.LayerNorm(axis=[1, 2], offset=True, scale=False, name='instance_norm'), partial(tf.nn.relu, name='relu'), ]
def testDataType(self, dtype): inputs = tf.placeholder(dtype, shape=[None, 64]) layer_norm = snt.LayerNorm() output = layer_norm(inputs) self.assertEqual(dtype, output.dtype) self.assertEqual(dtype, layer_norm.gamma.dtype.base_dtype) self.assertEqual(dtype, layer_norm.beta.dtype.base_dtype)
def testFloat16Error(self): inputs = tf.placeholder(tf.float16, shape=[None, 64]) layer_norm = snt.LayerNorm() err = (r"LayerNorm does not support `tf\.float16`, insufficient precision " "for calculating sufficient statistics.") with self.assertRaisesRegexp(snt.NotSupportedError, err): layer_norm(inputs)
def transformer_mlp(): return Sequential(lambda: [ snt.LayerNorm(axis=-1, create_scale=True, create_offset=True), snt.Linear(channels * 2), snt.Dropout(dropout_rate), tf.nn.relu, snt.Linear(channels), snt.Dropout(dropout_rate) ], name='mlp')
def make_mlp_model_edges(): """Instantiates a new MLP, followed by LayerNorm. The parameters of each new MLP are not shared with others generated by this function. Returns: A Sonnet module which contains the MLP and LayerNorm. """ return snt.Sequential([snt.nets.MLP([EncodeProcessDecode_v1.n_neurons_edges] * EncodeProcessDecode_v1.n_layers_edges, activate_final=True), snt.LayerNorm()])
def _build(self, attribute_value): tf.summary.histogram('cont_attribute_value_histogram', attribute_value) embedding = snt.Sequential([ snt.nets.MLP([self._attr_embedding_dim] * 3, activate_final=True, use_dropout=True), snt.LayerNorm(), ])(tf.cast(attribute_value, dtype=tf.float32)) tf.summary.histogram('cont_embedding_histogram', embedding) return embedding
def _fn(batch): net = snt.BatchFlatten()(batch["image"]) for i, h in enumerate(hidden_units): net = snt.Linear(h)(net) if i != (len(hidden_units) - 1): net = snt.LayerNorm()(net) net = activation(net) loss_vec = tf.nn.softmax_cross_entropy_with_logits_v2( labels=batch["label_onehot"], logits=net) return tf.reduce_mean(loss_vec)
def make_mlp_model(latent_size, num_layers): """Multilayer Perceptron followed by layer norm, parameters not shared""" return snt.Sequential([ # relu activation snt.nets.MLP(output_sizes=[latent_size] * num_layers, activate_final=True), # normalize to mean 0, sd 1 snt.LayerNorm(), ])