Beispiel #1
0
 def __call__(self, inputs, state, scope=None):
     """Gated recurrent unit (GRU) with nunits cells."""
     with tf.variable_scope(scope or "lowrank_gru_cell"):
         if self._use_attention:
             attn_input = tf.concat(
                 [state[:, self._label_space_size:], inputs], 1)
             logits = util.linear(attn_input, self._label_space_size)
             if self._sigmoid_attn:
                 mask = tf.nn.sigmoid(logits)
             else:
                 mask = tf.nn.softmax(logits)
             a_state = state * tf.concat([
                 mask,
                 tf.ones([mask.get_shape()[0].value, self._control_size])
             ], 1)
         else:
             a_state = state
         with tf.variable_scope("r_gate"):
             r = tf.sigmoid(
                 self.lowrank_linear(tf.concat([a_state, inputs], 1),
                                     'r_gate'))
         with tf.variable_scope("u_gate"):
             u = tf.sigmoid(
                 self.lowrank_linear(tf.concat([a_state, inputs], 1),
                                     'u_gate'))
         with tf.variable_scope("candidate"):
             c = self._activation(
                 self.lowrank_linear(tf.concat([r * state, inputs], 1),
                                     'candidate'))
         new_h = u * state + (1 - u) * c
     return new_h, new_h
Beispiel #2
0
 def __init__(self, config, vocab, label_space_size):
     super(NeuralBagOfWordsModel, self).__init__(config, vocab,
                                                 label_space_size)
     self.notes = tf.placeholder(tf.int32, [config.batch_size, None],
                                 name='notes')
     self.lengths = tf.placeholder(tf.int32, [config.batch_size],
                                   name='lengths')
     self.labels = tf.placeholder(tf.float32,
                                  [config.batch_size, label_space_size],
                                  name='labels')
     with tf.device('/cpu:0'):
         init_width = 0.5 / config.word_emb_size
         self.embeddings = tf.get_variable(
             'embeddings', [len(vocab.vocab), config.word_emb_size],
             initializer=tf.random_uniform_initializer(
                 -init_width, init_width),
             trainable=config.train_embs)
         embed = tf.nn.embedding_lookup(self.embeddings, self.notes)
     embed *= tf.to_float(tf.expand_dims(tf.greater(self.notes, 0), -1))
     data = self.summarize(embed)
     logits = util.linear(data, self.label_space_size)
     self.probs = tf.sigmoid(logits)
     self.loss = tf.reduce_mean(
         tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                 labels=self.labels))
     self.train_op = self.minimize_loss(self.loss)
 def linear_propagate(self, inputs):
     from util import linear
     outputs = []
     for neuron, bias in zip(self.neurons, self.bias_weights):
         activation = activate(neuron['weights'], inputs, bias)
         neuron['output'] = linear(activation)
         outputs.append(neuron['output'])
     return outputs
Beispiel #4
0
    def __init__(self, config, vocab, label_space_size):
        super(NormalizedLSTMModel, self).__init__(config, vocab,
                                                  label_space_size)
        self.notes = tf.placeholder(tf.int32, [config.batch_size, None],
                                    name='notes')
        self.lengths = tf.placeholder(tf.int32, [config.batch_size],
                                      name='lengths')
        self.labels = tf.placeholder(tf.float32,
                                     [config.batch_size, label_space_size],
                                     name='labels')
        with tf.device('/cpu:0'):
            init_width = 0.5 / config.word_emb_size
            self.embeddings = tf.get_variable(
                'embeddings', [len(vocab.vocab), config.word_emb_size],
                initializer=tf.random_uniform_initializer(
                    -init_width, init_width),
                trainable=config.train_embs)
            embed = tf.nn.embedding_lookup(self.embeddings, self.notes)

        with tf.variable_scope(
                'lstm', initializer=tf.contrib.layers.xavier_initializer()):
            if config.normlstm_mem:
                cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                    label_space_size + config.hidden_size)
            else:
                cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                    config.hidden_size)

        # recurrence
        _, last_state = tf.nn.dynamic_rnn(cell,
                                          embed,
                                          sequence_length=self.lengths,
                                          swap_memory=True,
                                          dtype=tf.float32)
        if config.lstm_hidden == 'c':
            last_state = last_state.c
        elif config.lstm_hidden == 'h':
            last_state = last_state.h
        if config.normlstm_mem:
            state = last_state[:, :label_space_size]
            multipliers = tf.get_variable('mult', [1, label_space_size],
                                          initializer=tf.ones_initializer())
            bias = tf.get_variable('bias', [label_space_size],
                                   initializer=tf.zeros_initializer())
            logits = tf.nn.bias_add(state * multipliers, bias)
        else:
            logits = util.linear(last_state, label_space_size)
        self.probs = tf.sigmoid(logits)
        self.loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                    labels=self.labels))
        self.train_op = self.minimize_loss(self.loss)
Beispiel #5
0
        def background_attention(decoder_state):
            with tf.variable_scope("background_attention"):
                # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper)  pass through
                decoder_features = util.linear(decoder_state, attention_vec_size, True)  # shape (batch_size, attention_vec_size)
                decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1), 1)  # reshape to (batch_size, 1, 1, attention_vec_size)

                def masked_background_attention(e):
                    """Take softmax of e then apply enc_padding_mask"""
                    attn_dist = tf.nn.softmax(util.mask_softmax(enc_padding_mask, e))  # take softmax. shape (batch_size, attn_length)
                    return attn_dist

                # Calculate v^T tanh(W_h h_i + W_s s_t + b_attn)
                e = tf.reduce_sum(v * tf.tanh(encoder_features + decoder_features), [2, 3])  # calculate e
                # Calculate attention distribution
                attn_dist = masked_background_attention(e) # batch_size,attn_length

                # Calculate the context vector from attn_dist and encoder_states
                context_vector = tf.reduce_sum(tf.reshape(attn_dist, [batch_size, -1, 1, 1]) * encoder_states, [1, 2])
                context_vector = tf.reshape(context_vector, [-1, attn_size])
            return context_vector, attn_dist
Beispiel #6
0
        def context_attention(decoder_state):
            with tf.variable_scope("context_attention"):
                # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper)
                decoder_features = util.linear(decoder_state, q_attention_vec_size, True) # shape (batch_size, q_attention_vec_size)
                decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1),1) # reshape to (batch_size, 1, 1, attention_vec_size)

                def masked_context_attention(e):
                    """Take softmax of e then apply enc_padding_mask"""
                    attn_dist = tf.nn.softmax(util.mask_softmax(que_padding_mask, e))  # take softmax. shape (batch_size, attn_length)
                    return attn_dist

                # Calculate v^T tanh(W_q q_i + W_s s_t + b_attn)
                f = tf.reduce_sum(v_q * tf.tanh(query_features + decoder_features), [2, 3])

                # Calculate attention distribution
                q_attn_dist = masked_context_attention(f)

                # Calculate the context vector from attn_dist and encoder_states
                q_context_vector = tf.reduce_sum(tf.reshape(q_attn_dist, [batch_size, -1, 1, 1]) * query_states, [1, 2])  # shape (batch_size, attn_size).
                q_context_vector = tf.reshape(q_context_vector, [-1, q_attn_size])

            return q_context_vector, q_attn_dist
Beispiel #7
0
 def __init__(self,
              config,
              vocab,
              label_space_size,
              l1_regs=None,
              scope=None):
     super(BagOfWordsModel, self).__init__(config,
                                           vocab,
                                           label_space_size,
                                           scope=scope)
     self.l1_regs = l1_regs
     if config.bow_stopwords:
         stop_words = None
     else:
         stop_words = 'english'
     if config.bow_norm:
         norm = config.bow_norm
     else:
         norm = None
     self.vectorizer = TfidfVectorizer(vocabulary=self.vocab.vocab_lookup,
                                       use_idf=False,
                                       sublinear_tf=config.bow_log_tf,
                                       stop_words=stop_words,
                                       norm=norm)
     self.data = tf.placeholder(tf.float32, [None, len(vocab.vocab)],
                                name='data')
     self.labels = tf.placeholder(tf.float32, [None, label_space_size],
                                  name='labels')
     data_size = tf.to_float(tf.shape(self.data)[0])
     self.logits = util.linear(self.data, self.label_space_size)
     self.probs = tf.sigmoid(self.logits)
     self.loss = tf.reduce_sum(
         tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                 labels=self.labels))
     self.loss += (data_size / config.batch_size) * tf.reduce_sum(
         self.l1_regularization())
     self.train_op = self.minimize_loss(self.loss)
Beispiel #8
0
def hybrid_decoder(decoder_inputs, initial_state, encoder_states, enc_padding_mask, query_states, que_padding_mask, cell, initial_state_attention=False):
    with tf.variable_scope("attention_decoder"):
        batch_size = encoder_states.get_shape()[0].value  # batch_size if this line fails, it's because the batch size isn't defined
        attn_size = encoder_states.get_shape()[2].value  # 2*hz  if this line fails, it's because the attention length isn't defined
        q_attn_size = query_states.get_shape()[2].value  # 2*hz
        # Reshape encoder_states (need to insert a dim)
        encoder_states = tf.expand_dims(encoder_states, 2)  # now is shape (batch_size, attn_len, 1, attn_size)
        query_states = tf.expand_dims(query_states, 2)
        # To calculate attention, we calculate
        #   v^T tanh(W_h h_i + W_s s_t + b_attn)
        # where h_i is an encoder state, and s_t a decoder state.
        # attn_vec_size is the length of the vectors v, b_attn, (W_h h_i) and (W_s s_t).
        # We set it to be equal to the size of the encoder states.
        attention_vec_size = attn_size
        q_attention_vec_size = q_attn_size

        # Get the weight matrix W_h and apply it to each encoder state to get (W_h h_i), the encoder features
        W_h = tf.get_variable("W_h", [1, 1, attn_size, attention_vec_size])
        encoder_features = tf.nn.conv2d(encoder_states, W_h, [1, 1, 1, 1],"SAME")  # shape (batch_size,attn_length,1,attention_vec_size)

        # Get the weight vectors v
        v = tf.get_variable("v", [attention_vec_size])

        # Get the weight matrix W_q and apply it to each encoder state to get (W_q q_i), the query features
        W_q = tf.get_variable("W_q", [1, 1, q_attn_size, q_attention_vec_size])
        query_features = tf.nn.conv2d(query_states, W_q, [1, 1, 1, 1],"SAME")  # shape (batch_size,q_attn_length,1,q_attention_vec_size)

        # Get the weight vectors v_q
        v_q = tf.get_variable("v_q", [q_attention_vec_size])

        def background_attention(decoder_state):
            with tf.variable_scope("background_attention"):
                # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper)  pass through
                decoder_features = util.linear(decoder_state, attention_vec_size, True)  # shape (batch_size, attention_vec_size)
                decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1), 1)  # reshape to (batch_size, 1, 1, attention_vec_size)

                def masked_background_attention(e):
                    """Take softmax of e then apply enc_padding_mask"""
                    attn_dist = tf.nn.softmax(util.mask_softmax(enc_padding_mask, e))  # take softmax. shape (batch_size, attn_length)
                    return attn_dist

                # Calculate v^T tanh(W_h h_i + W_s s_t + b_attn)
                e = tf.reduce_sum(v * tf.tanh(encoder_features + decoder_features), [2, 3])  # calculate e
                # Calculate attention distribution
                attn_dist = masked_background_attention(e) # batch_size,attn_length

                # Calculate the context vector from attn_dist and encoder_states
                context_vector = tf.reduce_sum(tf.reshape(attn_dist, [batch_size, -1, 1, 1]) * encoder_states, [1, 2])
                context_vector = tf.reshape(context_vector, [-1, attn_size])
            return context_vector, attn_dist

        def context_attention(decoder_state):
            with tf.variable_scope("context_attention"):
                # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper)
                decoder_features = util.linear(decoder_state, q_attention_vec_size, True) # shape (batch_size, q_attention_vec_size)
                decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1),1) # reshape to (batch_size, 1, 1, attention_vec_size)

                def masked_context_attention(e):
                    """Take softmax of e then apply enc_padding_mask"""
                    attn_dist = tf.nn.softmax(util.mask_softmax(que_padding_mask, e))  # take softmax. shape (batch_size, attn_length)
                    return attn_dist

                # Calculate v^T tanh(W_q q_i + W_s s_t + b_attn)
                f = tf.reduce_sum(v_q * tf.tanh(query_features + decoder_features), [2, 3])

                # Calculate attention distribution
                q_attn_dist = masked_context_attention(f)

                # Calculate the context vector from attn_dist and encoder_states
                q_context_vector = tf.reduce_sum(tf.reshape(q_attn_dist, [batch_size, -1, 1, 1]) * query_states, [1, 2])  # shape (batch_size, attn_size).
                q_context_vector = tf.reshape(q_context_vector, [-1, q_attn_size])

            return q_context_vector, q_attn_dist

        outputs = []
        background_attn_dists = []

        switcher_gen_pred_time_step = []
        switcher_gen_copy_time_step = []
        switcher_ref_time_step = []
        switcher_gen_time_step = []

        state = initial_state
        context_vector = tf.zeros([batch_size, attn_size])
        context_vector.set_shape([None, attn_size])
        q_context_vector = tf.zeros([batch_size, q_attn_size])
        q_context_vector.set_shape([None, q_attn_size])

        if initial_state_attention:  # true in decode mode
            context_vector, _ = background_attention(initial_state)
            q_context_vector, _ = context_attention(initial_state)

        for i, inp in enumerate(decoder_inputs):
            tf.logging.info("Adding hybrid_decoder timestep %i of %i", i + 1, len(decoder_inputs))
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            # Merge input and previous attentions into one vector x of the same size as inp
            input_size = inp.get_shape().with_rank(2)[1]
            if input_size.value is None:
                raise ValueError("Could not infer input size from input: %s" % inp.name)
            x = util.linear([inp] + [context_vector] + [q_context_vector], input_size, True)

            # Run the decoder RNN cell. cell_output = decoder state
            cell_output, state = cell(x, state)

            # Run the attention mechanism.
            if i == 0 and initial_state_attention:  # always true in decode mode
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):  # you need this because you've already run the initial attention(...) call
                    context_vector, attn_dist = background_attention(state)
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):  # you need this because you've already run the initial attention(...) call
                    q_context_vector, q_attn_dist = context_attention(state)
            else:
                context_vector, attn_dist = background_attention(state)
                q_context_vector, q_attn_dist = context_attention(state)

            background_attn_dists.append(attn_dist)

            # Calculate  switcher
            with tf.variable_scope('calculate_switcher'):
                switcher_matrix = util.linear([context_vector, q_context_vector, state.c, state.h, x], 3, True)
                switcher_matrix = tf.nn.softmax(switcher_matrix)

                switcher_gen_pred_prob = tf.expand_dims(switcher_matrix[:, 0], 1)  # batch*1
                switcher_gen_copy_prob = tf.expand_dims(switcher_matrix[:, 1], 1)  # batch*1
                switcher_gen_prob = switcher_gen_pred_prob + switcher_gen_copy_prob  # batch*1
                switcher_ref_prob = tf.expand_dims(switcher_matrix[:, 2], 1)  # batch*1

                switcher_gen_pred_time_step.append(switcher_gen_pred_prob)
                switcher_gen_copy_time_step.append(switcher_gen_copy_prob)
                switcher_gen_time_step.append(switcher_gen_prob)
                switcher_ref_time_step.append(switcher_ref_prob)

            with tf.variable_scope("AttnOutputProjection"):
                output = util.linear([cell_output] + [context_vector] + [q_context_vector], cell.output_size, True)
            outputs.append(output)

        return outputs, state, background_attn_dists, switcher_ref_time_step, switcher_gen_time_step, switcher_gen_pred_time_step, switcher_gen_copy_time_step
Beispiel #9
0
    def __init__(self, config, vocab, label_space_size, verbose=True):
        super(RecurrentNetworkModel, self).__init__(config, vocab, label_space_size)
        self.lengths = tf.placeholder(tf.int32, [config.batch_size], name='lengths')
        self.labels = tf.placeholder(tf.float32, [config.batch_size, label_space_size],
                                     name='labels')
        with tf.device('/cpu:0'):
            self.notes = tf.placeholder(tf.int32, [config.batch_size, None], name='notes')
            if config.multilayer and config.bidirectional:
                rev_notes = tf.reverse_sequence(self.notes[:, 1:], tf.maximum(self.lengths - 1, 0),
                                                seq_axis=1, batch_axis=0)
                rev_notes = tf.concat([tf.constant(vocab.eos_index,
                                       dtype=tf.int32, shape=[config.batch_size, 1]), rev_notes], 1)

            init_width = 0.5 / config.word_emb_size
            self.embeddings = tf.get_variable('embeddings', [len(vocab.vocab),
                                                             config.word_emb_size],
                                              initializer=tf.random_uniform_initializer(-init_width,
                                                                                        init_width),
                                              trainable=config.train_embs)
            embed = tf.nn.embedding_lookup(self.embeddings, self.notes)
            if config.multilayer and config.bidirectional:
                rev_embed = tf.nn.embedding_lookup(self.embeddings, rev_notes)

        if config.rnn_grnn_size:
            C = config.hidden_size
            G = label_space_size
            E = config.word_emb_size
            N = np.square(C) + C + (2*C*G) + (C*E) + (G*E) + (2*G)
            hidden_size = int((np.sqrt(np.square(E+1+(G/3)) - (4*((G/3) - N))) - (E+1+(G/3))) / 2)
            if verbose:
                print('Computed RNN hidden size:', hidden_size)
        else:
            hidden_size = config.hidden_size

        if config.rnn_type == 'entnet':
            keys = [tf.get_variable('key_{}'.format(j), [config.word_emb_size])
                    for j in range(config.num_blocks)]
            cell = DynamicMemoryCell(config.num_blocks, config.word_emb_size, keys,
                                     initializer=tf.contrib.layers.xavier_initializer(),
                                     activation=util.prelu)
        elif config.rnn_type == 'gru':
            cell = tf.contrib.rnn.GRUCell(hidden_size)
        elif config.rnn_type == 'lstm':
            cell = tf.contrib.rnn.BasicLSTMCell(hidden_size)

        if config.multilayer or config.bidirectional:
            with tf.variable_scope('gru_rev', initializer=tf.contrib.layers.xavier_initializer()):
                rev_cell = tf.contrib.rnn.GRUCell(hidden_size)
        inputs = embed
        if config.multilayer or not config.bidirectional:
            if config.multilayer:
                with tf.variable_scope('gru_rev'):
                    if config.bidirectional:
                        embed_ = rev_embed
                    else:
                        embed_ = embed
                    rev_out, _ = tf.nn.dynamic_rnn(rev_cell, embed_, sequence_length=self.lengths,
                                                   swap_memory=True, dtype=tf.float32)
                    if config.bidirectional:
                        rev_out = tf.reverse_sequence(rev_out, self.lengths, seq_axis=1,
                                                      batch_axis=0)
                    if config.reconcat_input:
                        inputs = tf.concat([inputs, rev_out], 2)
                    else:
                        inputs = rev_out
            # recurrence
            outs, last_state = tf.nn.dynamic_rnn(cell, inputs, sequence_length=self.lengths,
                                                 swap_memory=True, dtype=tf.float32)
        else:  # not multilayer and bidirectional
            _, last_state = tf.nn.bidirectional_dynamic_rnn(cell, rev_cell, inputs,
                                                            sequence_length=self.lengths,
                                                            swap_memory=True, dtype=tf.float32)
            last_state = tf.concat(last_state, 1)

        if config.rnn_type == 'lstm':
            last_state = tf.concat(last_state, 1)

        if config.rnn_type == 'entnet' and config.use_attention:
            # start with uniform attention
            attention = tf.get_variable('attention', [label_space_size, config.num_blocks],
                                        initializer=tf.zeros_initializer())
            self.attention = tf.nn.softmax(attention)

            # replicate each column of the attention matrix emb_size times (11112222...)
            attention = tf.tile(self.attention, [1, config.word_emb_size])
            attention = tf.reshape(attention, [label_space_size, config.word_emb_size,
                                               config.num_blocks])
            attention = tf.transpose(attention, [0, 2, 1])
            attention = tf.reshape(attention, [label_space_size, -1])

            # weight matrix from emb_size to label_space_size. this is the weight matrix that acts
            # on the post-attention embeddings from last_state.
            weight = tf.get_variable('weight', [label_space_size, config.word_emb_size],
                                     initializer=tf.contrib.layers.xavier_initializer())

            # tile the weight matrix num_blocks times in the second dimension and multiply the
            # attention to it. this is equivalent to doing attention + sum over all the blocks for
            # each label.
            weight = tf.tile(weight, [1, config.num_blocks])
            attended_weight = weight * attention

            # label bias
            bias = tf.get_variable("bias", [label_space_size], initializer=tf.zeros_initializer())

            logits = tf.nn.bias_add(tf.matmul(last_state, tf.transpose(attended_weight)), bias)
        else:
            logits = util.linear(last_state, self.label_space_size)
            if config.rnn_type == 'gru' and (config.multilayer or not config.bidirectional):
                flat_outs = tf.reshape(outs, [-1, hidden_size])
                flat_logits = util.linear(flat_outs, self.label_space_size, reuse=True)
                step_logits = tf.reshape(flat_logits, [config.batch_size, -1,
                                                       self.label_space_size])
                self.step_probs = tf.sigmoid(step_logits)

        self.probs = tf.sigmoid(logits)
        self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                                           labels=self.labels))
        self.train_op = self.minimize_loss(self.loss)
Beispiel #10
0
def linear_color(value, m, x, c1, c2) -> tuple:
    return (linear(value, m, x, c1[0],
                   c2[0]), linear(value, m, x, c1[1],
                                  c2[1]), linear(value, m, x, c1[2], c2[2]))
Beispiel #11
0
def discriminator(x_image, y_label,
                  batch_size=10,
                  dim_con=64,
                  dim_fc=1024,
                  reuse=False):
    """
    Returns the discriminator network. It takes an image and returns a real/fake classification across each label.
    The discriminator network is structured as a Convolution Neural Net with two layers of convolution and pooling,
    followed by two fully-connected layers.

    Args:
        x_image:
        y_label:
        batch_size:
        dim_con:
        dim_fc:
        reuse:

    Returns:
        The discriminator network.
    """
    with tf.variable_scope("discriminator") as scope:
        if reuse:
            scope.reuse_variables()

        # create x as the joint 4-D feature representation of the image and the label
        y_4d = tf.reshape(y_label, [batch_size, 1, 1, DIM_Y])
        x_4d = tf.reshape(x_image, [batch_size, 28, 28, 1])
        x = concat(x_4d, y_4d)

        tf.summary.histogram('act_d0', x)

        # first convolution-activation-pooling layer
        d1 = cnn_block(x, 1 + DIM_Y, 'd1')

        # join the output of the previous layer with the labels vector
        d1 = concat(d1, y_4d)

        tf.summary.histogram('act_d1', d1)

        # second convolution-activation-pooling layer
        d2 = cnn_block(d1, dim_con + DIM_Y, 'd2')

        # flatten the output of the second layer to a 2-D matrix with shape - [batch, ?]
        d2 = tf.reshape(d2, [batch_size, -1])

        # join the flattened output with the labels vector and apply this as input to
        # a series of fully connected layers.
        d2 = tf.concat([d2, y_label], 1)

        tf.summary.histogram('act_d2', d2)

        # first fully connected layer
        d3 = tf.nn.dropout(lrelu(linear(
            x_input=d2,
            dim_in=d2.get_shape().as_list()[-1],
            dim_out=dim_fc,
            name='d3')), KEEP_PROB)

        # join the output of the previous layer with the labels vector
        d3 = tf.concat([d3, y_label], 1)

        tf.summary.histogram('act_d3', d3)

        # second and last fully connected layer
        # calculate the un-normalized log probability of each label
        d4_logits = linear(d3, dim_fc + DIM_Y, 1, 'd4')

        # calculate the activation values, dimension - [batch, 1]
        d4 = tf.nn.sigmoid(d4_logits)

        tf.summary.histogram('act_d4', d4)

        return d4, d4_logits
Beispiel #12
0
def generator(z_input, y_label,
              batch_size=10,
              dim_con=64,
              dim_fc=1024,
              reuse=False):
    """
    Args:
        z_input: input noise tensor, float - [batch_size, DIM_Z=100]
        y_label: input label tensor, float - [batch_size, DIM_Y=10]
        batch_size:
        dim_con:
        dim_fc:
        reuse:
    Returns:
        x': the generated image tensor, float - [batch_size, DIM_IMAGE=784]
    """
    with tf.variable_scope("generator") as scope:
        if reuse:
            scope.reuse_variables()

        # create z as the joint representation of the input noise and the label
        z = tf.concat([z_input, y_label], 1)

        tf.summary.histogram('act_g0', z)

        # first fully-connected layer
        g1 = tf.nn.relu(tf.contrib.layers.batch_norm(linear(
            x_input=z,
            dim_in=DIM_Z + DIM_Y,
            dim_out=dim_fc,
            name='g1'), epsilon=1e-5, scope='g1_bn'))

        # join the output of the previous layer with the labels vector
        g1 = tf.concat([g1, y_label], 1)

        tf.summary.histogram('act_g1', g1)

        # second fully-connected layer
        g2 = tf.nn.relu(tf.contrib.layers.batch_norm(linear(
            x_input=g1,
            dim_in=g1.get_shape().as_list()[-1],
            dim_out=dim_con * 2 * IMAGE_SIZE / 4 * IMAGE_SIZE / 4,
            name='g2'), epsilon=1e-5, scope='g2_bn'))

        # create a joint 4-D feature representation of the output of the previous layer and the label
        # to serve as a 7x7 input image for the next de-convolution layer
        y_ = tf.reshape(y_label, [batch_size, 1, 1, DIM_Y])
        g2 = tf.reshape(g2, [batch_size, IMAGE_SIZE / 4, IMAGE_SIZE / 4, dim_con * 2])
        g2 = concat(g2, y_)

        tf.summary.histogram('act_g2', g2)

        # first layer of deconvolution produces a larger 14x14 image
        g3 = deconv2d(g2, [batch_size, IMAGE_SIZE / 2, IMAGE_SIZE / 2, dim_con * 2], 'g3')

        # apply batch normalization to ___
        # apply ReLU to stabilize the output of this layer
        g3 = tf.nn.relu(tf.contrib.layers.batch_norm(g3, epsilon=1e-5, scope='g3_bn'))

        # join the output of the previous layer with the labels vector
        g3 = concat(g3, y_)

        tf.summary.histogram('act_g3', g3)

        # second layer of deconvolution produces the final sized 28x28 image
        g4 = deconv2d(g3, [batch_size, IMAGE_SIZE, IMAGE_SIZE, 1], 'x')

        # no batch normalization in the final layer but a sigmoid activation function is used to
        # generate a sharp and crisp image vector; dimension - [28, 28, 1]
        g4 = tf.nn.sigmoid(g4)

        tf.summary.histogram('act_g4', g4)

        return g4