def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with tf.variable_scope(scope or "lowrank_gru_cell"): if self._use_attention: attn_input = tf.concat( [state[:, self._label_space_size:], inputs], 1) logits = util.linear(attn_input, self._label_space_size) if self._sigmoid_attn: mask = tf.nn.sigmoid(logits) else: mask = tf.nn.softmax(logits) a_state = state * tf.concat([ mask, tf.ones([mask.get_shape()[0].value, self._control_size]) ], 1) else: a_state = state with tf.variable_scope("r_gate"): r = tf.sigmoid( self.lowrank_linear(tf.concat([a_state, inputs], 1), 'r_gate')) with tf.variable_scope("u_gate"): u = tf.sigmoid( self.lowrank_linear(tf.concat([a_state, inputs], 1), 'u_gate')) with tf.variable_scope("candidate"): c = self._activation( self.lowrank_linear(tf.concat([r * state, inputs], 1), 'candidate')) new_h = u * state + (1 - u) * c return new_h, new_h
def __init__(self, config, vocab, label_space_size): super(NeuralBagOfWordsModel, self).__init__(config, vocab, label_space_size) self.notes = tf.placeholder(tf.int32, [config.batch_size, None], name='notes') self.lengths = tf.placeholder(tf.int32, [config.batch_size], name='lengths') self.labels = tf.placeholder(tf.float32, [config.batch_size, label_space_size], name='labels') with tf.device('/cpu:0'): init_width = 0.5 / config.word_emb_size self.embeddings = tf.get_variable( 'embeddings', [len(vocab.vocab), config.word_emb_size], initializer=tf.random_uniform_initializer( -init_width, init_width), trainable=config.train_embs) embed = tf.nn.embedding_lookup(self.embeddings, self.notes) embed *= tf.to_float(tf.expand_dims(tf.greater(self.notes, 0), -1)) data = self.summarize(embed) logits = util.linear(data, self.label_space_size) self.probs = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.labels)) self.train_op = self.minimize_loss(self.loss)
def linear_propagate(self, inputs): from util import linear outputs = [] for neuron, bias in zip(self.neurons, self.bias_weights): activation = activate(neuron['weights'], inputs, bias) neuron['output'] = linear(activation) outputs.append(neuron['output']) return outputs
def __init__(self, config, vocab, label_space_size): super(NormalizedLSTMModel, self).__init__(config, vocab, label_space_size) self.notes = tf.placeholder(tf.int32, [config.batch_size, None], name='notes') self.lengths = tf.placeholder(tf.int32, [config.batch_size], name='lengths') self.labels = tf.placeholder(tf.float32, [config.batch_size, label_space_size], name='labels') with tf.device('/cpu:0'): init_width = 0.5 / config.word_emb_size self.embeddings = tf.get_variable( 'embeddings', [len(vocab.vocab), config.word_emb_size], initializer=tf.random_uniform_initializer( -init_width, init_width), trainable=config.train_embs) embed = tf.nn.embedding_lookup(self.embeddings, self.notes) with tf.variable_scope( 'lstm', initializer=tf.contrib.layers.xavier_initializer()): if config.normlstm_mem: cell = tf.contrib.rnn.LayerNormBasicLSTMCell( label_space_size + config.hidden_size) else: cell = tf.contrib.rnn.LayerNormBasicLSTMCell( config.hidden_size) # recurrence _, last_state = tf.nn.dynamic_rnn(cell, embed, sequence_length=self.lengths, swap_memory=True, dtype=tf.float32) if config.lstm_hidden == 'c': last_state = last_state.c elif config.lstm_hidden == 'h': last_state = last_state.h if config.normlstm_mem: state = last_state[:, :label_space_size] multipliers = tf.get_variable('mult', [1, label_space_size], initializer=tf.ones_initializer()) bias = tf.get_variable('bias', [label_space_size], initializer=tf.zeros_initializer()) logits = tf.nn.bias_add(state * multipliers, bias) else: logits = util.linear(last_state, label_space_size) self.probs = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.labels)) self.train_op = self.minimize_loss(self.loss)
def background_attention(decoder_state): with tf.variable_scope("background_attention"): # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper) pass through decoder_features = util.linear(decoder_state, attention_vec_size, True) # shape (batch_size, attention_vec_size) decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1), 1) # reshape to (batch_size, 1, 1, attention_vec_size) def masked_background_attention(e): """Take softmax of e then apply enc_padding_mask""" attn_dist = tf.nn.softmax(util.mask_softmax(enc_padding_mask, e)) # take softmax. shape (batch_size, attn_length) return attn_dist # Calculate v^T tanh(W_h h_i + W_s s_t + b_attn) e = tf.reduce_sum(v * tf.tanh(encoder_features + decoder_features), [2, 3]) # calculate e # Calculate attention distribution attn_dist = masked_background_attention(e) # batch_size,attn_length # Calculate the context vector from attn_dist and encoder_states context_vector = tf.reduce_sum(tf.reshape(attn_dist, [batch_size, -1, 1, 1]) * encoder_states, [1, 2]) context_vector = tf.reshape(context_vector, [-1, attn_size]) return context_vector, attn_dist
def context_attention(decoder_state): with tf.variable_scope("context_attention"): # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper) decoder_features = util.linear(decoder_state, q_attention_vec_size, True) # shape (batch_size, q_attention_vec_size) decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1),1) # reshape to (batch_size, 1, 1, attention_vec_size) def masked_context_attention(e): """Take softmax of e then apply enc_padding_mask""" attn_dist = tf.nn.softmax(util.mask_softmax(que_padding_mask, e)) # take softmax. shape (batch_size, attn_length) return attn_dist # Calculate v^T tanh(W_q q_i + W_s s_t + b_attn) f = tf.reduce_sum(v_q * tf.tanh(query_features + decoder_features), [2, 3]) # Calculate attention distribution q_attn_dist = masked_context_attention(f) # Calculate the context vector from attn_dist and encoder_states q_context_vector = tf.reduce_sum(tf.reshape(q_attn_dist, [batch_size, -1, 1, 1]) * query_states, [1, 2]) # shape (batch_size, attn_size). q_context_vector = tf.reshape(q_context_vector, [-1, q_attn_size]) return q_context_vector, q_attn_dist
def __init__(self, config, vocab, label_space_size, l1_regs=None, scope=None): super(BagOfWordsModel, self).__init__(config, vocab, label_space_size, scope=scope) self.l1_regs = l1_regs if config.bow_stopwords: stop_words = None else: stop_words = 'english' if config.bow_norm: norm = config.bow_norm else: norm = None self.vectorizer = TfidfVectorizer(vocabulary=self.vocab.vocab_lookup, use_idf=False, sublinear_tf=config.bow_log_tf, stop_words=stop_words, norm=norm) self.data = tf.placeholder(tf.float32, [None, len(vocab.vocab)], name='data') self.labels = tf.placeholder(tf.float32, [None, label_space_size], name='labels') data_size = tf.to_float(tf.shape(self.data)[0]) self.logits = util.linear(self.data, self.label_space_size) self.probs = tf.sigmoid(self.logits) self.loss = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.labels)) self.loss += (data_size / config.batch_size) * tf.reduce_sum( self.l1_regularization()) self.train_op = self.minimize_loss(self.loss)
def hybrid_decoder(decoder_inputs, initial_state, encoder_states, enc_padding_mask, query_states, que_padding_mask, cell, initial_state_attention=False): with tf.variable_scope("attention_decoder"): batch_size = encoder_states.get_shape()[0].value # batch_size if this line fails, it's because the batch size isn't defined attn_size = encoder_states.get_shape()[2].value # 2*hz if this line fails, it's because the attention length isn't defined q_attn_size = query_states.get_shape()[2].value # 2*hz # Reshape encoder_states (need to insert a dim) encoder_states = tf.expand_dims(encoder_states, 2) # now is shape (batch_size, attn_len, 1, attn_size) query_states = tf.expand_dims(query_states, 2) # To calculate attention, we calculate # v^T tanh(W_h h_i + W_s s_t + b_attn) # where h_i is an encoder state, and s_t a decoder state. # attn_vec_size is the length of the vectors v, b_attn, (W_h h_i) and (W_s s_t). # We set it to be equal to the size of the encoder states. attention_vec_size = attn_size q_attention_vec_size = q_attn_size # Get the weight matrix W_h and apply it to each encoder state to get (W_h h_i), the encoder features W_h = tf.get_variable("W_h", [1, 1, attn_size, attention_vec_size]) encoder_features = tf.nn.conv2d(encoder_states, W_h, [1, 1, 1, 1],"SAME") # shape (batch_size,attn_length,1,attention_vec_size) # Get the weight vectors v v = tf.get_variable("v", [attention_vec_size]) # Get the weight matrix W_q and apply it to each encoder state to get (W_q q_i), the query features W_q = tf.get_variable("W_q", [1, 1, q_attn_size, q_attention_vec_size]) query_features = tf.nn.conv2d(query_states, W_q, [1, 1, 1, 1],"SAME") # shape (batch_size,q_attn_length,1,q_attention_vec_size) # Get the weight vectors v_q v_q = tf.get_variable("v_q", [q_attention_vec_size]) def background_attention(decoder_state): with tf.variable_scope("background_attention"): # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper) pass through decoder_features = util.linear(decoder_state, attention_vec_size, True) # shape (batch_size, attention_vec_size) decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1), 1) # reshape to (batch_size, 1, 1, attention_vec_size) def masked_background_attention(e): """Take softmax of e then apply enc_padding_mask""" attn_dist = tf.nn.softmax(util.mask_softmax(enc_padding_mask, e)) # take softmax. shape (batch_size, attn_length) return attn_dist # Calculate v^T tanh(W_h h_i + W_s s_t + b_attn) e = tf.reduce_sum(v * tf.tanh(encoder_features + decoder_features), [2, 3]) # calculate e # Calculate attention distribution attn_dist = masked_background_attention(e) # batch_size,attn_length # Calculate the context vector from attn_dist and encoder_states context_vector = tf.reduce_sum(tf.reshape(attn_dist, [batch_size, -1, 1, 1]) * encoder_states, [1, 2]) context_vector = tf.reshape(context_vector, [-1, attn_size]) return context_vector, attn_dist def context_attention(decoder_state): with tf.variable_scope("context_attention"): # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper) decoder_features = util.linear(decoder_state, q_attention_vec_size, True) # shape (batch_size, q_attention_vec_size) decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1),1) # reshape to (batch_size, 1, 1, attention_vec_size) def masked_context_attention(e): """Take softmax of e then apply enc_padding_mask""" attn_dist = tf.nn.softmax(util.mask_softmax(que_padding_mask, e)) # take softmax. shape (batch_size, attn_length) return attn_dist # Calculate v^T tanh(W_q q_i + W_s s_t + b_attn) f = tf.reduce_sum(v_q * tf.tanh(query_features + decoder_features), [2, 3]) # Calculate attention distribution q_attn_dist = masked_context_attention(f) # Calculate the context vector from attn_dist and encoder_states q_context_vector = tf.reduce_sum(tf.reshape(q_attn_dist, [batch_size, -1, 1, 1]) * query_states, [1, 2]) # shape (batch_size, attn_size). q_context_vector = tf.reshape(q_context_vector, [-1, q_attn_size]) return q_context_vector, q_attn_dist outputs = [] background_attn_dists = [] switcher_gen_pred_time_step = [] switcher_gen_copy_time_step = [] switcher_ref_time_step = [] switcher_gen_time_step = [] state = initial_state context_vector = tf.zeros([batch_size, attn_size]) context_vector.set_shape([None, attn_size]) q_context_vector = tf.zeros([batch_size, q_attn_size]) q_context_vector.set_shape([None, q_attn_size]) if initial_state_attention: # true in decode mode context_vector, _ = background_attention(initial_state) q_context_vector, _ = context_attention(initial_state) for i, inp in enumerate(decoder_inputs): tf.logging.info("Adding hybrid_decoder timestep %i of %i", i + 1, len(decoder_inputs)) if i > 0: tf.get_variable_scope().reuse_variables() # Merge input and previous attentions into one vector x of the same size as inp input_size = inp.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from input: %s" % inp.name) x = util.linear([inp] + [context_vector] + [q_context_vector], input_size, True) # Run the decoder RNN cell. cell_output = decoder state cell_output, state = cell(x, state) # Run the attention mechanism. if i == 0 and initial_state_attention: # always true in decode mode with tf.variable_scope(tf.get_variable_scope(), reuse=True): # you need this because you've already run the initial attention(...) call context_vector, attn_dist = background_attention(state) with tf.variable_scope(tf.get_variable_scope(), reuse=True): # you need this because you've already run the initial attention(...) call q_context_vector, q_attn_dist = context_attention(state) else: context_vector, attn_dist = background_attention(state) q_context_vector, q_attn_dist = context_attention(state) background_attn_dists.append(attn_dist) # Calculate switcher with tf.variable_scope('calculate_switcher'): switcher_matrix = util.linear([context_vector, q_context_vector, state.c, state.h, x], 3, True) switcher_matrix = tf.nn.softmax(switcher_matrix) switcher_gen_pred_prob = tf.expand_dims(switcher_matrix[:, 0], 1) # batch*1 switcher_gen_copy_prob = tf.expand_dims(switcher_matrix[:, 1], 1) # batch*1 switcher_gen_prob = switcher_gen_pred_prob + switcher_gen_copy_prob # batch*1 switcher_ref_prob = tf.expand_dims(switcher_matrix[:, 2], 1) # batch*1 switcher_gen_pred_time_step.append(switcher_gen_pred_prob) switcher_gen_copy_time_step.append(switcher_gen_copy_prob) switcher_gen_time_step.append(switcher_gen_prob) switcher_ref_time_step.append(switcher_ref_prob) with tf.variable_scope("AttnOutputProjection"): output = util.linear([cell_output] + [context_vector] + [q_context_vector], cell.output_size, True) outputs.append(output) return outputs, state, background_attn_dists, switcher_ref_time_step, switcher_gen_time_step, switcher_gen_pred_time_step, switcher_gen_copy_time_step
def __init__(self, config, vocab, label_space_size, verbose=True): super(RecurrentNetworkModel, self).__init__(config, vocab, label_space_size) self.lengths = tf.placeholder(tf.int32, [config.batch_size], name='lengths') self.labels = tf.placeholder(tf.float32, [config.batch_size, label_space_size], name='labels') with tf.device('/cpu:0'): self.notes = tf.placeholder(tf.int32, [config.batch_size, None], name='notes') if config.multilayer and config.bidirectional: rev_notes = tf.reverse_sequence(self.notes[:, 1:], tf.maximum(self.lengths - 1, 0), seq_axis=1, batch_axis=0) rev_notes = tf.concat([tf.constant(vocab.eos_index, dtype=tf.int32, shape=[config.batch_size, 1]), rev_notes], 1) init_width = 0.5 / config.word_emb_size self.embeddings = tf.get_variable('embeddings', [len(vocab.vocab), config.word_emb_size], initializer=tf.random_uniform_initializer(-init_width, init_width), trainable=config.train_embs) embed = tf.nn.embedding_lookup(self.embeddings, self.notes) if config.multilayer and config.bidirectional: rev_embed = tf.nn.embedding_lookup(self.embeddings, rev_notes) if config.rnn_grnn_size: C = config.hidden_size G = label_space_size E = config.word_emb_size N = np.square(C) + C + (2*C*G) + (C*E) + (G*E) + (2*G) hidden_size = int((np.sqrt(np.square(E+1+(G/3)) - (4*((G/3) - N))) - (E+1+(G/3))) / 2) if verbose: print('Computed RNN hidden size:', hidden_size) else: hidden_size = config.hidden_size if config.rnn_type == 'entnet': keys = [tf.get_variable('key_{}'.format(j), [config.word_emb_size]) for j in range(config.num_blocks)] cell = DynamicMemoryCell(config.num_blocks, config.word_emb_size, keys, initializer=tf.contrib.layers.xavier_initializer(), activation=util.prelu) elif config.rnn_type == 'gru': cell = tf.contrib.rnn.GRUCell(hidden_size) elif config.rnn_type == 'lstm': cell = tf.contrib.rnn.BasicLSTMCell(hidden_size) if config.multilayer or config.bidirectional: with tf.variable_scope('gru_rev', initializer=tf.contrib.layers.xavier_initializer()): rev_cell = tf.contrib.rnn.GRUCell(hidden_size) inputs = embed if config.multilayer or not config.bidirectional: if config.multilayer: with tf.variable_scope('gru_rev'): if config.bidirectional: embed_ = rev_embed else: embed_ = embed rev_out, _ = tf.nn.dynamic_rnn(rev_cell, embed_, sequence_length=self.lengths, swap_memory=True, dtype=tf.float32) if config.bidirectional: rev_out = tf.reverse_sequence(rev_out, self.lengths, seq_axis=1, batch_axis=0) if config.reconcat_input: inputs = tf.concat([inputs, rev_out], 2) else: inputs = rev_out # recurrence outs, last_state = tf.nn.dynamic_rnn(cell, inputs, sequence_length=self.lengths, swap_memory=True, dtype=tf.float32) else: # not multilayer and bidirectional _, last_state = tf.nn.bidirectional_dynamic_rnn(cell, rev_cell, inputs, sequence_length=self.lengths, swap_memory=True, dtype=tf.float32) last_state = tf.concat(last_state, 1) if config.rnn_type == 'lstm': last_state = tf.concat(last_state, 1) if config.rnn_type == 'entnet' and config.use_attention: # start with uniform attention attention = tf.get_variable('attention', [label_space_size, config.num_blocks], initializer=tf.zeros_initializer()) self.attention = tf.nn.softmax(attention) # replicate each column of the attention matrix emb_size times (11112222...) attention = tf.tile(self.attention, [1, config.word_emb_size]) attention = tf.reshape(attention, [label_space_size, config.word_emb_size, config.num_blocks]) attention = tf.transpose(attention, [0, 2, 1]) attention = tf.reshape(attention, [label_space_size, -1]) # weight matrix from emb_size to label_space_size. this is the weight matrix that acts # on the post-attention embeddings from last_state. weight = tf.get_variable('weight', [label_space_size, config.word_emb_size], initializer=tf.contrib.layers.xavier_initializer()) # tile the weight matrix num_blocks times in the second dimension and multiply the # attention to it. this is equivalent to doing attention + sum over all the blocks for # each label. weight = tf.tile(weight, [1, config.num_blocks]) attended_weight = weight * attention # label bias bias = tf.get_variable("bias", [label_space_size], initializer=tf.zeros_initializer()) logits = tf.nn.bias_add(tf.matmul(last_state, tf.transpose(attended_weight)), bias) else: logits = util.linear(last_state, self.label_space_size) if config.rnn_type == 'gru' and (config.multilayer or not config.bidirectional): flat_outs = tf.reshape(outs, [-1, hidden_size]) flat_logits = util.linear(flat_outs, self.label_space_size, reuse=True) step_logits = tf.reshape(flat_logits, [config.batch_size, -1, self.label_space_size]) self.step_probs = tf.sigmoid(step_logits) self.probs = tf.sigmoid(logits) self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.labels)) self.train_op = self.minimize_loss(self.loss)
def linear_color(value, m, x, c1, c2) -> tuple: return (linear(value, m, x, c1[0], c2[0]), linear(value, m, x, c1[1], c2[1]), linear(value, m, x, c1[2], c2[2]))
def discriminator(x_image, y_label, batch_size=10, dim_con=64, dim_fc=1024, reuse=False): """ Returns the discriminator network. It takes an image and returns a real/fake classification across each label. The discriminator network is structured as a Convolution Neural Net with two layers of convolution and pooling, followed by two fully-connected layers. Args: x_image: y_label: batch_size: dim_con: dim_fc: reuse: Returns: The discriminator network. """ with tf.variable_scope("discriminator") as scope: if reuse: scope.reuse_variables() # create x as the joint 4-D feature representation of the image and the label y_4d = tf.reshape(y_label, [batch_size, 1, 1, DIM_Y]) x_4d = tf.reshape(x_image, [batch_size, 28, 28, 1]) x = concat(x_4d, y_4d) tf.summary.histogram('act_d0', x) # first convolution-activation-pooling layer d1 = cnn_block(x, 1 + DIM_Y, 'd1') # join the output of the previous layer with the labels vector d1 = concat(d1, y_4d) tf.summary.histogram('act_d1', d1) # second convolution-activation-pooling layer d2 = cnn_block(d1, dim_con + DIM_Y, 'd2') # flatten the output of the second layer to a 2-D matrix with shape - [batch, ?] d2 = tf.reshape(d2, [batch_size, -1]) # join the flattened output with the labels vector and apply this as input to # a series of fully connected layers. d2 = tf.concat([d2, y_label], 1) tf.summary.histogram('act_d2', d2) # first fully connected layer d3 = tf.nn.dropout(lrelu(linear( x_input=d2, dim_in=d2.get_shape().as_list()[-1], dim_out=dim_fc, name='d3')), KEEP_PROB) # join the output of the previous layer with the labels vector d3 = tf.concat([d3, y_label], 1) tf.summary.histogram('act_d3', d3) # second and last fully connected layer # calculate the un-normalized log probability of each label d4_logits = linear(d3, dim_fc + DIM_Y, 1, 'd4') # calculate the activation values, dimension - [batch, 1] d4 = tf.nn.sigmoid(d4_logits) tf.summary.histogram('act_d4', d4) return d4, d4_logits
def generator(z_input, y_label, batch_size=10, dim_con=64, dim_fc=1024, reuse=False): """ Args: z_input: input noise tensor, float - [batch_size, DIM_Z=100] y_label: input label tensor, float - [batch_size, DIM_Y=10] batch_size: dim_con: dim_fc: reuse: Returns: x': the generated image tensor, float - [batch_size, DIM_IMAGE=784] """ with tf.variable_scope("generator") as scope: if reuse: scope.reuse_variables() # create z as the joint representation of the input noise and the label z = tf.concat([z_input, y_label], 1) tf.summary.histogram('act_g0', z) # first fully-connected layer g1 = tf.nn.relu(tf.contrib.layers.batch_norm(linear( x_input=z, dim_in=DIM_Z + DIM_Y, dim_out=dim_fc, name='g1'), epsilon=1e-5, scope='g1_bn')) # join the output of the previous layer with the labels vector g1 = tf.concat([g1, y_label], 1) tf.summary.histogram('act_g1', g1) # second fully-connected layer g2 = tf.nn.relu(tf.contrib.layers.batch_norm(linear( x_input=g1, dim_in=g1.get_shape().as_list()[-1], dim_out=dim_con * 2 * IMAGE_SIZE / 4 * IMAGE_SIZE / 4, name='g2'), epsilon=1e-5, scope='g2_bn')) # create a joint 4-D feature representation of the output of the previous layer and the label # to serve as a 7x7 input image for the next de-convolution layer y_ = tf.reshape(y_label, [batch_size, 1, 1, DIM_Y]) g2 = tf.reshape(g2, [batch_size, IMAGE_SIZE / 4, IMAGE_SIZE / 4, dim_con * 2]) g2 = concat(g2, y_) tf.summary.histogram('act_g2', g2) # first layer of deconvolution produces a larger 14x14 image g3 = deconv2d(g2, [batch_size, IMAGE_SIZE / 2, IMAGE_SIZE / 2, dim_con * 2], 'g3') # apply batch normalization to ___ # apply ReLU to stabilize the output of this layer g3 = tf.nn.relu(tf.contrib.layers.batch_norm(g3, epsilon=1e-5, scope='g3_bn')) # join the output of the previous layer with the labels vector g3 = concat(g3, y_) tf.summary.histogram('act_g3', g3) # second layer of deconvolution produces the final sized 28x28 image g4 = deconv2d(g3, [batch_size, IMAGE_SIZE, IMAGE_SIZE, 1], 'x') # no batch normalization in the final layer but a sigmoid activation function is used to # generate a sharp and crisp image vector; dimension - [28, 28, 1] g4 = tf.nn.sigmoid(g4) tf.summary.histogram('act_g4', g4) return g4