def fairness(self, vecs): r"""Build fairness metrics component. """ logits = tf.concat(vecs, axis=1) for i in range(self.num_dis_layers): with tf.variable_scope('fair_fc{}'.format(i)): if i == 0: logits = FullyConnected( 'fc', logits, self.num_dis_hidden, nl=tf.identity, kernel_initializer=tf.truncated_normal_initializer( stddev=0.1)) else: logits = FullyConnected('fc', logits, self.num_dis_hidden, nl=tf.identity) logits = tf.concat( [logits, self.batch_diversity(logits)], axis=1) logits = BatchNorm('bn', logits, center=True, scale=False) logits = Dropout(logits) logits = tf.nn.leaky_relu(logits) return FullyConnected('fair_fc_top', logits, 1, nl=tf.identity)
def _get_DQN_prediction(self, image): """ image: [0,255]""" image = image / 255.0 with argscope(Conv3D, # activation=tf.nn.relu, # brain activation=PReLU.symbolic_function, # cardiac use_bias=True): #,argscope(LeakyReLU, alpha=0.01): l = (LinearWrap(image) .Conv3D('conv0', out_channel=32, kernel_shape=[8,8,3], stride=[2,2,1]) # Nature architecture .Conv3D('conv1', out_channel=32, kernel_shape=[8,8,3], stride=[2,2,1]) .Conv3D('conv2', out_channel=64, kernel_shape=[4,4,3], stride=[2,2,1]) .Conv3D('conv3', out_channel=64, kernel_shape=[3,3,3], stride=[1,1,1]) .FullyConnected('fc0', 512) .tf.nn.leaky_relu(alpha=0.01) .FullyConnected('fc1', 256) .tf.nn.leaky_relu(alpha=0.01) .FullyConnected('fc2', 128) .tf.nn.leaky_relu(alpha=0.01)()) if 'Dueling' not in self.method: Q = FullyConnected('fct', l, self.num_actions, nl=tf.identity) else: # Dueling DQN V = FullyConnected('fctV', l, 1, activation=tf.identity) As = FullyConnected('fctA', l, self.num_actions, activation=tf.identity) Q = tf.add(As, V - tf.reduce_mean(As, 1, keepdims=True)) return tf.identity(Q, name='Qvalue')
def _get_NN_prediction(self, state): assert state.shape.rank == 5 # Batch, H, W, Channel, History state = tf.transpose( state, [0, 1, 2, 4, 3 ]) # swap channel & history, to be compatible with old models image = tf.reshape(state, [-1] + list(self.state_shape[:2]) + [self.state_shape[2] * self.frame_history]) image = tf.cast(image, tf.float32) / 255.0 with argscope(Conv2D, activation=tf.nn.relu): l = Conv2D('conv0', image, 32, 5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, 32, 5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, 64, 4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, 64, 3) l = FullyConnected('fc0', l, 512) l = PReLU('prelu', l) logits = FullyConnected('fc-pi', l, self.num_actions) # unnormalized policy value = FullyConnected('fc-v', l, 1) return logits, value
def discriminator(self, vecs): r"""Build discriminator. We use a :math:`l`-layer fully connected neural network as the discriminator. We concatenate :math:`v_{1:n_c}`, :math:`u_{1:n_c}` and :math:`d_{1:n_d}` together as the input. We compute the internal layers as .. math:: \begin{aligned} f^{(D)}_{1} &= \textrm{LeakyReLU}(\textrm{BN}(W^{(D)}_{1}(v_{1:n_c} \oplus u_{1:n_c} \oplus d_{1:n_d}) f^{(D)}_{1} &= \textrm{LeakyReLU}(\textrm{BN}(W^{(D)}_{i}(f^{(D)}_{i−1} \oplus \textrm{diversity}(f^{(D)}_{i−1})))), i = 2:l \end{aligned} where :math:`\oplus` is the concatenation operation. :math:`\textrm{diversity}(·)` is the mini-batch discrimination vector [42]. Each dimension of the diversity vector is the total distance between one sample and all other samples in the mini-batch using some learned distance metric. :math:`\textrm{BN}(·)` is batch normalization, and :math:`\textrm{LeakyReLU}(·)` is the leaky reflect linear activation function. We further compute the output of discriminator as :math:`W^{(D)}(f^{(D)}_{l} \oplus \textrm{diversity} (f^{(D)}_{l}))` which is a scalar. Args: vecs(list[tensorflow.Tensor]): List of tensors matching the spec of :meth:`inputs` Returns: tensorpack.FullyConected: a (b, 1) logits """ logits = tf.concat(vecs, axis=1) with tf.variable_scope('discrim'): for i in range(self.num_dis_layers): with tf.variable_scope('dis_fc{}'.format(i)): if i == 0: logits = FullyConnected( 'fc', logits, self.num_dis_hidden, nl=tf.identity, kernel_initializer=tf.truncated_normal_initializer( stddev=0.1)) else: logits = FullyConnected('fc', logits, self.num_dis_hidden, nl=tf.identity) logits = tf.concat( [logits, self.batch_diversity(logits)], axis=1) logits = LayerNorm('ln', logits) logits = Dropout(logits) logits = tf.nn.leaky_relu(logits) return FullyConnected('dis_fc_top', logits, 1, nl=tf.identity)
def _get_DQN_prediction(self, image): """ image: [0,255] :returns predicted Q values""" # FIXME norm not needed # normalize image values to [0, 1] image = image / 255.0 with argscope(Conv3D, nl=PReLU.symbolic_function, use_bias=True): # core layers of the network conv = ( LinearWrap(image) # TODO: use obsrvation dimensions? .Conv3D('conv0', out_channel=32, kernel_shape=[5, 5, 5], stride=[1, 1, 1]).MaxPooling3D('pool0', 2).Conv3D( 'conv1', out_channel=32, kernel_shape=[5, 5, 5], stride=[1, 1, 1]).MaxPooling3D('pool1', 2).Conv3D( 'conv2', out_channel=64, kernel_shape=[4, 4, 4], stride=[1, 1, 1]).MaxPooling3D( 'pool2', 2).Conv3D('conv3', out_channel=64, kernel_shape=[3, 3, 3], stride=[1, 1, 1]) # .MaxPooling3D('pool3',2) ) if 'Dueling' not in self.method: lq = (conv.FullyConnected( 'fc0', 512).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc1', 256).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc2', 128).tf.nn.leaky_relu(alpha=0.01)()) Q = FullyConnected('fct', lq, self.num_actions, nl=tf.identity) else: # Dueling DQN or Double Dueling # state value function lv = (conv.FullyConnected( 'fc0V', 512).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc1V', 256).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc2V', 128).tf.nn.leaky_relu(alpha=0.01)()) V = FullyConnected('fctV', lv, 1, nl=tf.identity) # advantage value function la = (conv.FullyConnected( 'fc0A', 512).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc1A', 256).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc2A', 128).tf.nn.leaky_relu(alpha=0.01)()) As = FullyConnected('fctA', la, self.num_actions, nl=tf.identity) Q = tf.add(As, V - tf.reduce_mean(As, 1, keepdims=True)) return tf.identity(Q, name='Qvalue')
def batch_diversity(l, n_kernel=10, kernel_dim=10): r"""Return the minibatch discrimination vector. Let :math:`f(x_i) \in \mathbb{R}^A` denote a vector of features for input :math:`x_i`, produced by some intermediate layer in the discriminator. We then multiply the vector :math:`f(x_i)` by a tensor :math:`T \in \mathbb{R}^{A×B×C}`, which results in a matrix :math:`M_i \in \mathbb{R}^{B×C}`. We then compute the :math:`L_1`-distance between the rows of the resulting matrix :math:`M_i` across samples :math:`i \in {1, 2, ... , n}` and apply a negative exponential: .. math:: cb(x_i, x_j) = exp(−||M_{i,b} − M_{j,b}||_{L_1} ) \in \mathbb{R}. The output :math:`o(x_i)` for this *minibatch layer* for a sample :math:`x_i` is then defined as the sum of the cb(xi, xj )’s to all other samples: .. math:: :nowrap: \begin{aligned} &o(x_i)_b = \sum^{n}_{j=1} cb(x_i , x_j) \in \mathbb{R}\\ &o(x_i) = \Big[ o(x_i)_1, o(x_i)_2, . . . , o(x_i)_B \Big] \in \mathbb{R}^B\\ &o(X) ∈ R^{n×B}\\ \end{aligned} Note: This is extracted from `Improved techniques for training GANs`_ (Section 3.2) by Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, and Xi Chen. .. _Improved techniques for training GANs: https://arxiv.org/pdf/1606.03498.pdf Args: l(tf.Tensor) n_kernel(int) kernel_dim(int) Returns: tensorflow.Tensor """ M = FullyConnected('fc_diversity', l, n_kernel * kernel_dim, nl=tf.identity) M = tf.reshape(M, [-1, n_kernel, kernel_dim]) M1 = tf.reshape(M, [-1, 1, n_kernel, kernel_dim]) M2 = tf.reshape(M, [1, -1, n_kernel, kernel_dim]) diff = tf.exp(-tf.reduce_sum(tf.abs(M1 - M2), axis=3)) return tf.reduce_sum(diff, axis=0)
def build_graph(self, image, label): image = image / 128.0 assert tf.test.is_gpu_available() with tf.variable_scope(self._name): x = ScaleNormConv2D(image, 16, 3, 1, name="conv_input") # shape = [batchsize, 32, 32, 16] x = CifarResNet.build_group(x, self._n, 16, stride=1, mult_decay=self._mult_decay, name="g1") # shape = [batchsize, 16, 16, 32] x = CifarResNet.build_group(x, self._n, 32, stride=2, mult_decay=self._mult_decay, name="g2") # shape = [batchsize, 8, 8, 64] x = CifarResNet.build_group(x, self._n, 64, stride=2, mult_decay=self._mult_decay, name="g3") # normalise the final output by the accumulated multiplier #x = BatchNorm("bn_last", x, epsilon=EPSILON, center=False, scale=True) x = ActBias(x, name="act_top") # x = GlobalAvgPooling("gap", x) logits = FullyConnected("linear", x, self._n_classes) prob = tf.nn.softmax(logits, name="prob") cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name="cross_entropy_loss") wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name="wrong_vector") add_moving_summary(tf.reduce_mean(wrong, name="train_error")) wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(cost, wd_cost) return tf.add_n([cost, wd_cost], name="cost")
def encode(self, x): with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE): with argscope(Conv2D, activation=tf.nn.relu): h = Conv2D('conv3x3_1', x, 32, 3, strides=(2, 2), padding='valid') h = Conv2D('conv3x3_2', h, 64, 3, strides=(2, 2), padding='valid') h = tf.layers.Flatten()(h) h = FullyConnected('fc', h, 2 * self._latent_dim) mean, logvar = tf.split(h, num_or_size_splits=2, axis=1) return mean, logvar
def decode(self, z, apply_sigmoid=False): pre_convT_shape = [ -1, int(self._image_shape[0] / 4), int(self._image_shape[1] / 4), 32 ] pre_convT_unit = pre_convT_shape[1] * \ pre_convT_shape[2] * pre_convT_shape[3] with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): with argscope([Conv2D, FullyConnected], activation=tf.nn.relu): h = FullyConnected('fc', z, pre_convT_unit) h = tf.reshape(h, pre_convT_shape) h = Conv2DTranspose('convT3x3_1', h, 64, 3, strides=(2, 2)) h = Conv2DTranspose('convT3x3_2', h, 32, 3, strides=(2, 2)) h = Conv2DTranspose('convT1x1_1', h, self._image_shape[2], 3, strides=(1, 1)) if apply_sigmoid: h = tf.sigmoid(h) return h
def _get_DQN_prediction(self, images): """ image: [0,255] :returns predicted Q values""" # normalize image values to [0, 1] agents = len(images) Q_list = [] with argscope(Conv3D, nl=PReLU.symbolic_function, use_bias=True): for i in range(0, agents): images[i] = images[i] / 255.0 with argscope(Conv3D, nl=PReLU.symbolic_function, use_bias=True): if i == 0: conv_0 = tf.layers.conv3d( images[i], name='conv0', filters=32, kernel_size=[5, 5, 5], strides=[1, 1, 1], padding='same', kernel_initializer=tf.contrib.layers. variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer()) max_pool_0 = tf.layers.max_pooling3d(conv_0, 2, 2, name='max_pool0') conv_1 = tf.layers.conv3d( max_pool_0, name='conv1', filters=32, kernel_size=[5, 5, 5], strides=[1, 1, 1], padding='same', kernel_initializer=tf.contrib.layers. variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer()) max_pool1 = tf.layers.max_pooling3d(conv_1, 2, 2, name='max_pool1') conv_2 = tf.layers.conv3d( max_pool1, name='conv2', filters=64, kernel_size=[4, 4, 4], strides=[1, 1, 1], padding='same', kernel_initializer=tf.contrib.layers. variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer()) max_pool2 = tf.layers.max_pooling3d(conv_2, 2, 2, name='max_pool2') conv3 = tf.layers.conv3d( max_pool2, name='conv3', filters=64, kernel_size=[3, 3, 3], strides=[1, 1, 1], padding='same', kernel_initializer=tf.contrib.layers. variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer()) else: conv_0 = tf.layers.conv3d( images[i], name='conv0', reuse=True, filters=32, kernel_size=[5, 5, 5], strides=[1, 1, 1], padding='same', kernel_initializer=tf.contrib.layers. variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer()) max_pool_0 = tf.layers.max_pooling3d(conv_0, 2, 2, name='max_pool0') conv_1 = tf.layers.conv3d( max_pool_0, name='conv1', reuse=True, filters=32, kernel_size=[5, 5, 5], strides=[1, 1, 1], padding='same', kernel_initializer=tf.contrib.layers. variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer()) max_pool1 = tf.layers.max_pooling3d(conv_1, 2, 2, name='max_pool1') conv_2 = tf.layers.conv3d( max_pool1, name='conv2', reuse=True, filters=64, kernel_size=[4, 4, 4], strides=[1, 1, 1], padding='same', kernel_initializer=tf.contrib.layers. variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer()) max_pool2 = tf.layers.max_pooling3d(conv_2, 2, 2, name='max_pool2') conv3 = tf.layers.conv3d( max_pool2, name='conv3', reuse=True, filters=64, kernel_size=[3, 3, 3], strides=[1, 1, 1], padding='same', kernel_initializer=tf.contrib.layers. variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer()) ### now for the dense layers## if 'Dueling' not in self.method: fc0 = FullyConnected('fc0_{}'.format(i), conv3, 512, activation=tf.nn.relu) fc1 = FullyConnected('fc1_{}'.format(i), fc0, 256, activation=tf.nn.relu) fc2 = FullyConnected('fc2_{}'.format(i), fc1, 128, activation=tf.nn.relu) Q = FullyConnected('fct_{}'.format(i), fc2, self.num_actions, nl=tf.identity) Q_list.append(tf.identity(Q, name='Qvalue_{}'.format(i))) else: fc0 = FullyConnected('fc0V_{}'.format(i), conv3, 512, activation=tf.nn.relu) fc1 = FullyConnected('fc1V_{}'.format(i), fc0, 256, activation=tf.nn.relu) fc2 = FullyConnected('fc2V_{}'.format(i), fc1, 128, activation=tf.nn.relu) V = FullyConnected('fctV_{}'.format(i), fc2, 1, nl=tf.identity) fcA0 = FullyConnected('fc0V_{}'.format(i), conv3, 512, activation=tf.nn.relu) fcA1 = FullyConnected('fc1V_{}'.format(i), fcA0, 256, activation=tf.nn.relu) fcA2 = FullyConnected('fc2V_{}'.format(i), fcA1, 128, activation=tf.nn.relu) A = FullyConnected('fctV_{}'.format(i), fcA2, self.num_actions, nl=tf.identity) Q = tf.add(A, V - tf.reduce_mean(A, 1, keepdims=True)) Q_list.append(tf.identity(Q, name='Qvalue_{}'.format(i))) return Q_list
def _get_DQN_prediction(self, image): """ image: [0,255] :returns predicted Q values""" # normalize image values to [0, 1] image = image / 255.0 with argscope(Conv3D, nl=PReLU.symbolic_function, use_bias=True): # core layers of the network with freeze_variables(stop_gradient=False, skip_collection=self.conv_freeze): #conv conv = ( LinearWrap(image).Conv3D( 'conv0', out_channel=32, kernel_shape=[5, 5, 5], stride=[1, 1, 1]).MaxPooling3D('pool0', 2).Conv3D( 'conv1', out_channel=32, kernel_shape=[5, 5, 5], stride=[1, 1, 1]).MaxPooling3D('pool1', 2).Conv3D( 'conv2', out_channel=64, kernel_shape=[4, 4, 4], stride=[1, 1, 1]).MaxPooling3D( 'pool2', 2).Conv3D('conv3', out_channel=64, kernel_shape=[3, 3, 3], stride=[1, 1, 1]) # .MaxPooling3D('pool3',2) ) if 'Dueling' not in self.method: with freeze_variables(stop_gradient=False, skip_collection=self.fc_freeze): #fc lq = (conv.FullyConnected( 'fc0', 512).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc1', 256).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc2', 128).tf.nn.leaky_relu(alpha=0.01)()) with freeze_variables( stop_gradient=False, skip_collection=self.final_layer_freeze): #fclast Q = FullyConnected('fct', lq, self.num_actions, nl=tf.identity) else: # Dueling DQN or Double Dueling # state value function with freeze_variables(stop_gradient=False, skip_collection=self.fc_freeze): #fc lv = (conv.FullyConnected( 'fc0V', 512).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc1V', 256).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc2V', 128).tf.nn.leaky_relu(alpha=0.01)()) with freeze_variables( stop_gradient=False, skip_collection=self.final_layer_freeze): #fclast V = FullyConnected('fctV', lv, 1, nl=tf.identity) # advantage value function la = (conv.FullyConnected( 'fc0A', 512).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc1A', 256).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc2A', 128).tf.nn.leaky_relu(alpha=0.01)()) with freeze_variables( stop_gradient=False, skip_collection=self.final_layer_freeze): #fclast As = FullyConnected('fctA', la, self.num_actions, nl=tf.identity) Q = tf.add(As, V - tf.reduce_mean(As, 1, keepdims=True)) return tf.identity(Q, name='Qvalue')
def generator(self, z): r"""Build generator graph. We generate a numerical variable in 2 steps. We first generate the value scalar :math:`v_i`, then generate the cluster vector :math:`u_i`. We generate categorical feature in 1 step as a probability distribution over all possible labels. The output and hidden state size of LSTM is :math:`n_h`. The input to the LSTM in each step :math:`t` is the random variable :math:`z`, the previous hidden vector :math:`f_{t−1}` or an embedding vector :math:`f^{\prime}_{t−1}` depending on the type of previous output, and the weighted context vector :math:`a_{t−1}`. The random variable :math:`z` has :math:`n_z` dimensions. Each dimension is sampled from :math:`\mathcal{N}(0, 1)`. The attention-based context vector at is a weighted average over all the previous LSTM outputs :math:`h_{1:t}`. So :math:`a_t` is a :math:`n_h`-dimensional vector. We learn a attention weight vector :math:`α_t \in \mathbb{R}^t` and compute context as .. math:: a_t = \sum_{k=1}^{t} \frac{\textrm{exp} {\alpha}_{t, j}} {\sum_{j} \textrm{exp} \alpha_{t,j}} h_k. We set :math: `a_0` = 0. The output of LSTM is :math:`h_t` and we project the output to a hidden vector :math:`f_t = \textrm{tanh}(W_h h_t)`, where :math:`W_h` is a learned parameter in the network. The size of :math:`f_t` is :math:`n_f` . We further convert the hidden vector to an output variable. * If the output is the value part of a continuous variable, we compute the output as :math:`v_i = \textrm{tanh}(W_t f_t)`. The hidden vector for :math:`t + 1` step is :math:`f_t`. * If the output is the cluster part of a continuous variable, we compute the output as :math:`u_i = \textrm{softmax}(W_t f_t)`. The feature vector for :math:`t + 1` step is :math:`f_t`. * If the output is a discrete variable, we compute the output as :math:`d_i = \textrm{softmax}(W_t f_t)`. The hidden vector for :math:`t + 1` step is :math:`f^{\prime}_{t} = E_i [arg_k \hspace{0.25em} \textrm{max} \hspace{0.25em} d_i ]`, where :math:`E \in R^{|D_i|×n_f}` is an embedding matrix for discrete variable :math:`D_i`. * :math:`f_0` is a special vector :math:`\texttt{<GO>}` and we learn it during the training. Args: z: Returns: list[tensorflow.Tensor]: Outpu Raises: ValueError: If any of the elements in self.metadata['details'] has an unsupported value in the `type` key. """ with tf.variable_scope('LSTM'): cell = tf.nn.rnn_cell.LSTMCell(self.num_gen_rnn) state = cell.zero_state(self.batch_size, dtype='float32') attention = tf.zeros(shape=(self.batch_size, self.num_gen_rnn), dtype='float32') input = tf.get_variable(name='go', shape=(1, self.num_gen_feature)) # <GO> input = tf.tile(input, [self.batch_size, 1]) input = tf.concat([input, z], axis=1) ptr = 0 outputs = [] states = [] for col_id, col_info in enumerate(self.metadata['details']): if col_info['type'] == 'value': output, state = cell(tf.concat([input, attention], axis=1), state) states.append(state[1]) gaussian_components = col_info['n'] with tf.variable_scope("%02d" % ptr): h = FullyConnected('FC', output, self.num_gen_feature, nl=tf.tanh) outputs.append(FullyConnected('FC2', h, 1, nl=tf.tanh)) input = tf.concat([h, z], axis=1) attw = tf.get_variable("attw", shape=(len(states), 1, 1)) attw = tf.nn.softmax(attw, axis=0) attention = tf.reduce_sum(tf.stack(states, axis=0) * attw, axis=0) ptr += 1 output, state = cell(tf.concat([input, attention], axis=1), state) states.append(state[1]) with tf.variable_scope("%02d" % ptr): h = FullyConnected('FC', output, self.num_gen_feature, nl=tf.tanh) w = FullyConnected('FC2', h, gaussian_components, nl=tf.nn.softmax) outputs.append(w) input = FullyConnected('FC3', w, self.num_gen_feature, nl=tf.identity) input = tf.concat([input, z], axis=1) attw = tf.get_variable("attw", shape=(len(states), 1, 1)) attw = tf.nn.softmax(attw, axis=0) attention = tf.reduce_sum(tf.stack(states, axis=0) * attw, axis=0) ptr += 1 elif col_info['type'] == 'category': output, state = cell(tf.concat([input, attention], axis=1), state) states.append(state[1]) with tf.variable_scope("%02d" % ptr): h = FullyConnected('FC', output, self.num_gen_feature, nl=tf.tanh) w = FullyConnected('FC2', h, col_info['n'], nl=tf.nn.softmax) outputs.append(w) one_hot = tf.one_hot(tf.argmax(w, axis=1), col_info['n']) input = FullyConnected('FC3', one_hot, self.num_gen_feature, nl=tf.identity) input = tf.concat([input, z], axis=1) attw = tf.get_variable("attw", shape=(len(states), 1, 1)) attw = tf.nn.softmax(attw, axis=0) attention = tf.reduce_sum(tf.stack(states, axis=0) * attw, axis=0) ptr += 1 else: raise ValueError( "self.metadata['details'][{}]['type'] must be either `category` or `values`. Instead it was {}." .format(col_id, col_info['type'])) return outputs
def build_graph(self, image, label): scale_image = 1. / 128.0 image = image * scale_image image_moment2 = CIFAR_TRAIN_PIXEL_MOMENT2 * scale_image * scale_image assert tf.test.is_gpu_available() with tf.variable_scope(self._name): x = NormConv2DScale(image, 16, 3, 1, center=self._center, input_moment2=image_moment2, name="conv_input") add_activation_summary(x, types=["mean", "rms", "histogram"]) # shape = [batchsize, 32, 32, 16] x = CifarResNet.build_group(x, self._n, 16, stride=1, center=self._center, theta_init=self._theta_init, theta_lr_mult=self._theta_lr_mult, name="g1") add_activation_summary(x, types=["mean", "rms", "histogram"]) # shape = [batchsize, 16, 16, 32] x = CifarResNet.build_group(x, self._n, 32, stride=2, center=self._center, theta_init=self._theta_init, theta_lr_mult=self._theta_lr_mult, name="g2") add_activation_summary(x, types=["mean", "rms", "histogram"]) # shape = [batchsize, 8, 8, 64] x = CifarResNet.build_group(x, self._n, 64, stride=2, center=self._center, theta_init=self._theta_init, theta_lr_mult=self._theta_lr_mult, name="g3") add_activation_summary(x, types=["mean", "rms", "histogram"]) x = ActBias(x, name="act_top") # x = GlobalAvgPooling("gap", x) logits = FullyConnected("linear", x, self._n_classes) prob = tf.nn.softmax(logits, name="prob") cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name="cross_entropy_loss") wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name="wrong_vector") add_moving_summary(tf.reduce_mean(wrong, name="train_error")) wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(cost, wd_cost) add_param_summary(('.*/theta', ['histogram'])) add_param_summary(('.*/ma_mu', ['histogram'])) return tf.add_n([cost, wd_cost], name="cost")