def fcn(self, input): block = input with tf.variable_scope("fcn"): p = int(round(self._num_fcn_layers**0.5)) for i in range(self._num_fcn_layers): with tf.variable_scope("dense_block_{}".format(i + 1)): if i % p == 0: block = tf.contrib.layers.batch_norm( inputs=block, is_training=self.training, updates_collections=None) if i > 0: block = tf.nn.selu(block, name="selu") block = dense_block(block, self._layer_width) if i > 0 and i % p == 0: block = tf.contrib.nn.alpha_dropout( block, 1.0 - self.dropout) size = int(block.get_shape()[-1]) new_size = int(round(size * self._size_decay)) block = tf.layers.dense( inputs=block, units=new_size, kernel_initializer=tf.truncated_normal_initializer( stddev=stddev(1.0, size)), bias_initializer=tf.constant_initializer(0.1)) print("fcn layer_{} decayed size:{}".format( i, new_size)) else: print("fcn layer_{} size:{}".format( i, block.get_shape()[-1])) return block
def logits(self): layer = self.rnn(self, self.data) output = tf.compat.v1.layers.dense( inputs=layer, units=len(self._classes), kernel_initializer=tf.compat.v1.truncated_normal_initializer( stddev=stddev(1.0, int(layer.get_shape()[-1]))), bias_initializer=tf.compat.v1.constant_initializer(0.1), activation=tf.nn.relu6, name="output") return output
def rnn(self, inputs): # Deep Recurrent network. feat_size = int(inputs.get_shape()[-1]) p = int(round(self._num_rnn_layers**0.5)) output_size = feat_size block = inputs with tf.compat.v1.variable_scope("dense_rnn"): for i in range(self._num_rnn_layers): with tf.compat.v1.variable_scope("rnn_{}".format(i + 1)): if i > 0: block = tf.contrib.layers.batch_norm( inputs=block, is_training=self.training, updates_collections=None) block = tf.compat.v1.Print(block, [block], "{}_batch_norm: ".format(i), summarize=10) # block = tf.nn.selu(block, name="selu") if i == 0 or i % p != 0: output_size += self._layer_width c = DenseCellWrapper(LayerNormGRUCell( num_units=self._layer_width, kernel_initializer=tf.compat.v1. truncated_normal_initializer( stddev=stddev(1.0, feat_size)), bias_initializer=tf.compat.v1.constant_initializer( 0.1)), output_size=output_size) block, _ = self.rnn_block(block, c, self.seqlen) block = tf.compat.v1.Print(block, [block], "{}_rnn_block: ".format(i), summarize=10) print("rnn layer_{} size:{}".format(i, output_size)) else: # bottleneck output_size = int(round(output_size * self._size_decay)) c = LayerNormNASCell(num_units=output_size, use_biases=True) block, _ = self.rnn_block(block, c, self.seqlen) block = tf.compat.v1.Print(block, [block], "{}_bottleneck: ".format(i), summarize=10) block = tf.contrib.nn.alpha_dropout( block, 1.0 - self.dropout) block = tf.compat.v1.Print( block, [block], "{}_bottleneck_dropout: ".format(i), summarize=10) print("rnn layer_{} decayed size:{}".format( i, output_size)) return self.last_relevant(block, self.seqlen)
def logits(self): layer = self.rnn(self, self.data) layer = self.fcn(self, layer) layer = tf.compat.v1.Print(layer, [layer], "fcn: ", summarize=10) layer = tf.contrib.layers.batch_norm(inputs=layer, is_training=self.training, updates_collections=None) output = tf.compat.v1.layers.dense( inputs=layer, units=len(self._classes), kernel_initializer=tf.compat.v1.truncated_normal_initializer( stddev=stddev(1.0, int(layer.get_shape()[-1]))), bias_initializer=tf.compat.v1.constant_initializer(0.1), activation=tf.nn.selu, name="output") return output
def rnn(self, input): # Deep Recurrent network. cells = [] feat_size = int(input.get_shape()[-1]) p = int(round(self._num_rnn_layers**0.35)) output_size = self._layer_width + feat_size for i in range(self._num_rnn_layers): for j in range(self._rnn_layer_size): c = DenseCellWrapper(LayerNormGRUCell( num_units=self._layer_width, kernel_initializer=tf.compat.v1. truncated_normal_initializer( stddev=stddev(1.0, feat_size)), bias_initializer=tf.compat.v1.constant_initializer(0.1), input_layer_norm=(not (i == 0 and j == 0))), output_size=output_size) if not (i == 0 and j == 0): c = AlphaDropoutWrapper(c, input_keep_prob=1.0 - self.dropout) output_size += self._layer_width cells.append(c) if i == 0 or i % p != 0: c = DenseCellWrapper(LayerNormNASCell( num_units=self._layer_width, use_biases=True, input_layer_norm=True), output_size=output_size) c = AlphaDropoutWrapper(c, input_keep_prob=1.0 - self.dropout) output_size += self._layer_width print("rnn layer_{} size:{}".format(i, output_size)) cells.append(c) else: size = int(round(output_size * self._size_decay)) c = AlphaDropoutWrapper(LayerNormNASCell( num_units=size, use_biases=True, input_layer_norm=True), input_keep_prob=1.0 - self.dropout) output_size = size print("rnn layer_{} decayed size:{}".format(i, output_size)) cells.append(c) # Stack layers of cell mc = tf.compat.v1.nn.rnn_cell.MultiRNNCell(cells) output, _ = tf.compat.v1.nn.dynamic_rnn(mc, input, dtype=tf.float32, sequence_length=self.seqlen) return self.last_relevant(output, self.seqlen)
def logits(self): layer = self.rnn(self, self.data) # layer = self.fcn(self, layer) # layer = tf.Print(layer, [layer], "fcn: ", summarize=10) # layer = tf.contrib.layers.batch_norm( # inputs=layer, # is_training=self.training, # updates_collections=None # ) layer = tf.contrib.nn.alpha_dropout(layer, 1.0 - self.dropout) output = tf.layers.dense( inputs=layer, units=len(self._classes), kernel_initializer=tf.truncated_normal_initializer( stddev=stddev(1.0, int(layer.get_shape()[-1]))), bias_initializer=tf.constant_initializer(0.1), activation=tf.nn.relu6, name="output") return output