Exemple #1
0
	def write(self, value_matrix, correlation_weight, qa_embedded, reuse=False):
		'''
			Value matrix : [batch size, memory size, memory state dim(d_k)]
			Correlation weight : [batch size, memory size]
			qa_embedded : (q, r) pair embedded, [batch size, memory state dim(d_v)]
		'''
		erase_vector = operations.linear(qa_embedded, self.memory_state_dim, name=self.name+'/Erase_Vector', reuse=reuse)
		# [batch size, memory state dim(d_v)]
		erase_signal = tf.sigmoid(erase_vector)
		add_vector = operations.linear(qa_embedded, self.memory_state_dim, name=self.name+'/Add_Vector', reuse=reuse)
		# [batch size, memory state dim(d_v)]
		add_signal = tf.tanh(add_vector)

		# Add vector after erase
		# [batch size, 1, memory state dim(d_v)]
		erase_reshaped = tf.reshape(erase_signal, [-1,1,self.memory_state_dim])
		# [batch size, memory size, 1]
		cw_reshaped = tf.reshape(correlation_weight, [-1,self.memory_size,1])
		# w_t(i) * e_t
		erase_mul = tf.multiply(erase_reshaped, cw_reshaped)
		# Elementwise multiply between [batch size, memory size, memory state dim(d_v)]
		erase = value_matrix * (1 - erase_mul)
		# [batch size, 1, memory state dim(d_v)]
		add_reshaped = tf.reshape(add_signal, [-1, 1, self.memory_state_dim])
		add_mul = tf.multiply(add_reshaped, cw_reshaped)
		
		new_memory = erase + add_mul
		# [batch size, memory size, memory value staet dim]
		print('Memory shape : %s' % (new_memory.get_shape()))
		return new_memory
 def g_rinv(layer, x1_target, x0_activation):
   with tf.variable_scope(vscope[layer], reuse=True):
     V_ = tf.get_variable('V')
     c_ = tf.get_variable('c')
   relu_inv = tf.py_func(ops.relu().f_inv, [x1_target, x0_activation], [tf.float32], name='x3_')[0]
   add_inv = tf.sub(relu_inv, b[layer], name='x2_')
   return tf.py_func(ops.linear().f_inv, [add_inv,  x0_activation, W[layer]], [tf.float32], name='x1_')[0]
Exemple #3
0
 def decoder(self, embedding, use_bn=False, is_training=True):
     with tf.variable_scope('decoder'):
         print('Embedding shape %s' % embedding.get_shape())
         embedding_fc = operations.linear(embedding,
                                          self.args.target_size // 8 *
                                          self.args.target_size // 8 *
                                          self.args.filter_depth,
                                          name='dec_linear')
         x = tf.reshape(embedding_fc, [
             -1, self.args.target_size // 8, self.args.target_size // 8,
             self.args.filter_depth
         ])
         print('Shape %s' % x.get_shape())
         for i in range(4):
             enc_conv1 = operations.conv2d(x,
                                           self.args.filter_depth,
                                           filter_height=3,
                                           filter_width=3,
                                           stride_h=1,
                                           stride_v=1,
                                           use_bn=use_bn,
                                           name='dec_conv_%d1' % (i + 1))
             enc_conv1_elu = operations.elu(enc_conv1,
                                            name='enc_conv_%d1_elu' %
                                            (i + 1),
                                            is_training=is_training)
             enc_conv2 = operations.conv2d(enc_conv1_elu,
                                           self.args.filter_depth,
                                           filter_height=3,
                                           filter_width=3,
                                           stride_h=1,
                                           stride_v=1,
                                           use_bn=use_bn,
                                           name='dec_conv_%d2' % (i + 1))
             enc_conv2_elu = operations.elu(enc_conv2,
                                            name='enc_conv_%d2_elu' %
                                            (i + 1),
                                            is_training=is_training)
             if i != 3:
                 # Upsampling via nearest neighbor
                 x = tf.image.resize_nearest_neighbor(
                     enc_conv2_elu,
                     size=(int(self.args.target_size // (2**(2 - i))),
                           int(self.args.target_size // (2**(2 - i)))))
             else:
                 x = enc_conv2_elu
         decoder_result = operations.conv2d(x,
                                            self.args.num_channels,
                                            filter_height=3,
                                            filter_width=3,
                                            stride_h=1,
                                            stride_v=1,
                                            use_bn=use_bn,
                                            name='dec_conv_last')
         decoder_result = operations.elu(decoder_result,
                                         name='dec_conv_last_elu',
                                         is_training=is_training)
         return decoder_result
Exemple #4
0
    def build_network(self, reuse_flag):
        print('Building network')

        self.q_data = tf.placeholder(tf.int32, [self.args.batch_size], name='q_data') 
        self.qa_data = tf.placeholder(tf.int32, [self.args.batch_size], name='qa_data')
        self.target = tf.placeholder(tf.float32, [self.args.batch_size], name='target')


        # Embedding to [batch size, seq_len, memory key state dim]
        q_embed_data = tf.nn.embedding_lookup(self.q_embed_mtx, self.q_data)
        # List of [batch size, 1, memory key state dim] with 'seq_len' elements
        #print('Q_embedding shape : %s' % q_embed_data.get_shape())
        #slice_q_embed_data = tf.split(q_embed_data, self.args.seq_len, 1)
        #print(len(slice_q_embed_data), type(slice_q_embed_data), slice_q_embed_data[0].get_shape())
        # Embedding to [batch size, seq_len, memory value state dim]
        qa_embed_data = tf.nn.embedding_lookup(self.qa_embed_mtx, self.qa_data)
        #print('QA_embedding shape: %s' % qa_embed_data.get_shape())
        # List of [batch size, 1, memory value state dim] with 'seq_len' elements
        #slice_qa_embed_data = tf.split(qa_embed_data, self.args.seq_len, 1)
        
        #prediction = list()
        #reuse_flag = False

        # k_t : [batch size, memory key state dim]
        #q = tf.squeeze(slice_q_embed_data[i], 1)
        # Attention, [batch size, memory size]
        self.correlation_weight = self.memory.attention(q_embed_data)
        
        # Read process, [batch size, memory value state dim]
        self.read_content = self.memory.read(self.correlation_weight)
        
        # Write process, [batch size, memory size, memory value state dim]
        # qa : [batch size, memory value state dim]
        #qa = tf.squeeze(slice_qa_embed_data[i], 1)
        # Only last time step value is necessary

        self.new_memory_value = self.memory.write(self.correlation_weight, qa_embed_data, reuse=reuse_flag)

        mastery_level_prior_difficulty = tf.concat([self.read_content, q_embed_data], 1)
        # f_t
        summary_vector = tf.tanh(operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag))
        # p_t
        pred_logits = operations.linear(summary_vector, 1, name='Prediction', reuse=reuse_flag)

        return pred_logits
Exemple #5
0
    def inference(self, q_embed, correlation_weight, value_matrix, reuse_flag):
        read_content = self.memory.value.read(value_matrix, correlation_weight)

        ##### ADD new FC layer for q_embedding. There is an layer in MXnet implementation
        q_embed_content_logit = operations.linear(q_embed,
                                                  50,
                                                  name='input_embed_content',
                                                  reuse=reuse_flag)
        q_embed_content = tf.tanh(q_embed_content_logit)

        mastery_level_prior_difficulty = tf.concat(
            [read_content, q_embed_content], 1)
        #mastery_level_prior_difficulty = tf.concat([read_content, q_embed], 1)

        # f_t
        summary_logit = operations.linear(mastery_level_prior_difficulty,
                                          self.args.final_fc_dim,
                                          name='Summary_Vector',
                                          reuse=reuse_flag)
        if self.args.summary_activation == 'tanh':
            summary_vector = tf.tanh(summary_logit)
        elif self.args.summary_activation == 'sigmoid':
            summary_vector = tf.sigmoid(summary_logit)
        elif self.args.summary_activation == 'relu':
            summary_vector = tf.nn.relu(summary_logit)

        #summary_vector = tf.sigmoid(operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag))
        #summary_vector = tf.tanh(operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag))
        # p_t
        pred_logits = operations.linear(summary_vector,
                                        1,
                                        name='Prediction',
                                        reuse=reuse_flag)

        pred_prob = tf.sigmoid(pred_logits)

        return read_content, summary_vector, pred_logits, pred_prob
Exemple #6
0
    def add(self,
            value_matrix,
            correlation_weight,
            knowledge_growth,
            reuse=False):
        add_vector = operations.linear(knowledge_growth,
                                       self.memory_state_dim,
                                       name=self.name + '/Add_Vector',
                                       reuse=reuse)
        add_signal = self.activate_add_signal(add_vector)
        cw_reshaped = tf.reshape(correlation_weight, [-1, self.memory_size, 1])
        add_reshaped = tf.reshape(add_signal, [-1, 1, self.memory_state_dim])
        add_mul = tf.multiply(add_reshaped, cw_reshaped)

        return add_mul
Exemple #7
0
 def encoder(self, imgs, use_bn=False, is_training=True):
     with tf.variable_scope('encoder'):
         x = imgs  # [batch, 64, 64, 3]
         for i in range(4):
             enc_conv1 = operations.conv2d(x,
                                           self.args.filter_depth * (i + 1),
                                           filter_height=3,
                                           filter_width=3,
                                           stride_h=1,
                                           stride_v=1,
                                           name='enc_conv_%d1' % (i + 1))
             enc_conv1_elu = operations.elu(enc_conv1,
                                            name='enc_conv_%d1_elu' %
                                            (i + 1),
                                            is_training=is_training)
             enc_conv2 = operations.conv2d(enc_conv1_elu,
                                           self.args.filter_depth * (i + 1),
                                           filter_height=3,
                                           filter_width=3,
                                           stride_h=1,
                                           stride_v=1,
                                           name='enc_conv_%d2' % (i + 1))
             enc_conv2_elu = operations.elu(enc_conv2,
                                            name='enc_conv_%d2_elu' %
                                            (i + 1),
                                            is_training=is_training)
             # Down sampling with strides 2
             if i < 3:
                 x = operations.conv2d(enc_conv2_elu,
                                       self.args.filter_depth * (i + 2),
                                       filter_height=3,
                                       filter_width=3,
                                       stride_h=2,
                                       stride_v=2,
                                       name='enc_downsample_%d' % (i + 1))
                 x = operations.elu(x,
                                    name='enc_downsample_%d_elu' % (i + 1),
                                    is_training=is_training)
             else:
                 x = enc_conv2_elu
         final_shape = x.get_shape().as_list()
         flattend_conv = tf.reshape(
             x, [-1, final_shape[1] * final_shape[2] * final_shape[3]])
         embedding = operations.linear(flattend_conv,
                                       self.args.embedding_size,
                                       name='enc_fc_layer')
         # This embedding tensor is mapped via fc not followed by any non-linearities
         return embedding
Exemple #8
0
    def erase(self,
              value_matrix,
              correlation_weight,
              knowledge_growth,
              reuse=False):
        erase_vector = operations.linear(knowledge_growth,
                                         self.memory_state_dim,
                                         name=self.name + '/Erase_Vector',
                                         reuse=reuse)
        erase_signal = self.activate_erase_signal(erase_vector)
        erase_reshaped = tf.reshape(erase_signal,
                                    [-1, 1, self.memory_state_dim])
        cw_reshaped = tf.reshape(correlation_weight, [-1, self.memory_size, 1])
        erase_mul = tf.multiply(erase_reshaped, cw_reshaped)
        erase = tf.multiply(value_matrix, 1 - erase_mul)
        #erase = value_matrix * (1 - erase_mul)

        return erase
Exemple #9
0
    def create_model(self):
        # 'seq_len' means question sequences
        self.q_data = tf.placeholder(tf.int32,
                                     [self.args.batch_size, self.args.seq_len],
                                     name='q_data')
        self.qa_data = tf.placeholder(
            tf.int32, [self.args.batch_size, self.args.seq_len],
            name='qa_data')
        self.target = tf.placeholder(tf.float32,
                                     [self.args.batch_size, self.args.seq_len],
                                     name='target')
        self.kg = tf.placeholder(tf.int32,
                                 [self.args.batch_size, self.args.seq_len, 3],
                                 name='knowledge_tag')
        self.kg_hot = tf.placeholder(
            tf.float32, [self.args.batch_size, self.args.seq_len, 188],
            name='knowledge_hot')
        self.timebin = tf.placeholder(
            tf.int32, [self.args.batch_size, self.args.seq_len])
        self.diff = tf.placeholder(tf.int32,
                                   [self.args.batch_size, self.args.seq_len])
        self.guan = tf.placeholder(tf.int32,
                                   [self.args.batch_size, self.args.seq_len])

        with tf.variable_scope('Memory'):
            init_memory_key = tf.get_variable('key', [self.args.memory_size, self.args.memory_key_state_dim], \
             initializer=tf.truncated_normal_initializer(stddev=0.1))
            init_memory_value = tf.get_variable('value', [self.args.memory_size,self.args.memory_value_state_dim], \
             initializer=tf.truncated_normal_initializer(stddev=0.1))
        with tf.variable_scope('time'):
            time_embed_mtx = tf.get_variable('timebin', [12, self.args.memory_value_state_dim],\
             initializer=tf.truncated_normal_initializer(stddev=0.1))
        with tf.variable_scope('diff'):
            guan_embed_mtx = tf.get_variable('diff', [12, self.args.memory_value_state_dim],\
             initializer=tf.truncated_normal_initializer(stddev=0.1))

        with tf.variable_scope('gate'):
            diff_embed_mtx = tf.get_variable('gate', [12, self.args.memory_value_state_dim],\
             initializer=tf.truncated_normal_initializer(stddev=0.1))

        init_memory_value = tf.tile(tf.expand_dims(init_memory_value, 0),
                                    tf.stack([self.args.batch_size, 1, 1]))
        print(init_memory_value.get_shape())

        self.memory = DKVMN(self.args.memory_size, self.args.memory_key_state_dim, \
          self.args.memory_value_state_dim, init_memory_key=init_memory_key, init_memory_value=init_memory_value, batch_size=self.args.batch_size, name='DKVMN')

        with tf.variable_scope('Embedding'):
            # A
            q_embed_mtx = tf.get_variable('q_embed', [self.args.n_questions+1, self.args.memory_key_state_dim],\
             initializer=tf.truncated_normal_initializer(stddev=0.1))
            # B
            qa_embed_mtx = tf.get_variable(
                'qa_embed', [
                    2 * self.args.n_questions + 1,
                    self.args.memory_value_state_dim
                ],
                initializer=tf.truncated_normal_initializer(stddev=0.1))

        q_embed_data = tf.nn.embedding_lookup(q_embed_mtx, self.q_data)
        slice_q_embed_data = tf.split(q_embed_data, self.args.seq_len, 1)

        qa_embed_data = tf.nn.embedding_lookup(qa_embed_mtx, self.qa_data)
        slice_qa_embed_data = tf.split(qa_embed_data, self.args.seq_len, 1)

        time_embedding = tf.nn.embedding_lookup(time_embed_mtx, self.timebin)
        slice_time_embedding = tf.split(time_embedding, self.args.seq_len, 1)

        guan_embedding = tf.nn.embedding_lookup(diff_embed_mtx, self.diff)
        slice_guan_embedding = tf.split(guan_embedding, self.args.seq_len, 1)

        diff_embedding = tf.nn.embedding_lookup(diff_embed_mtx, self.diff)
        slice_diff_embedding = tf.split(diff_embedding, self.args.seq_len, 1)

        slice_kg = tf.split(self.kg, self.args.seq_len, 1)

        slice_kg_hot = tf.split(self.kg_hot, self.args.seq_len, 1)

        reuse_flag = False

        prediction = list()

        # Logics
        for i in range(self.args.seq_len):
            # To reuse linear vectors
            if i != 0:
                reuse_flag = True

            q = tf.squeeze(slice_q_embed_data[i], 1)
            qa = tf.squeeze(slice_qa_embed_data[i], 1)
            kg = tf.squeeze(slice_kg[i], 1)
            kg_hot = tf.squeeze(slice_kg_hot[i], 1)
            dotime = tf.squeeze(slice_time_embedding[i], 1)
            dodiff = tf.squeeze(slice_diff_embedding[i], 1)
            doguan = tf.squeeze(slice_guan_embedding[i], 1)

            self.correlation_weight = self.memory.attention(q, kg, kg_hot)

            # # Read process, [batch size, memory value state dim]
            self.read_content = self.memory.read(self.correlation_weight)

            mastery_level_prior_difficulty = tf.concat(
                [self.read_content, q, doguan], 1)

            # f_t
            summary_vector = tf.tanh(
                operations.linear(mastery_level_prior_difficulty,
                                  self.args.final_fc_dim,
                                  name='Summary_Vector',
                                  reuse=reuse_flag))
            # p_t
            pred_logits = operations.linear(summary_vector,
                                            1,
                                            name='Prediction',
                                            reuse=reuse_flag)

            prediction.append(pred_logits)

            qa_time = tf.concat([qa, dotime], axis=1)

            self.new_memory_value = self.memory.write(self.correlation_weight,
                                                      qa_time,
                                                      reuse=reuse_flag)

# 'prediction' : seq_len length list of [batch size ,1], make it [batch size, seq_len] tensor
# tf.stack convert to [batch size, seq_len, 1]
        self.pred_logits = tf.reshape(tf.stack(
            prediction, axis=1), [self.args.batch_size, self.args.seq_len])

        # Define loss : standard cross entropy loss, need to ignore '-1' label example
        # Make target/label 1-d array
        target_1d = tf.reshape(self.target, [-1])
        pred_logits_1d = tf.reshape(self.pred_logits, [-1])
        index = tf.where(
            tf.not_equal(target_1d, tf.constant(-1., dtype=tf.float32)))
        # tf.gather(params, indices) : Gather slices from params according to indices
        filtered_target = tf.gather(target_1d, index)
        filtered_logits = tf.gather(pred_logits_1d, index)

        self.loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=filtered_logits,
                                                    labels=filtered_target))
        self.pred = tf.sigmoid(self.pred_logits)

        # Optimizer : SGD + MOMENTUM with learning rate decay
        self.global_step = tf.Variable(0, trainable=False)
        self.lr = tf.placeholder(tf.float32, [], name='learning_rate')

        optimizer = tf.train.MomentumOptimizer(self.lr, self.args.momentum)
        grads, vrbs = zip(*optimizer.compute_gradients(self.loss))
        grad, _ = tf.clip_by_global_norm(grads, self.args.maxgradnorm)
        self.train_op = optimizer.apply_gradients(zip(grad, vrbs),
                                                  global_step=self.global_step)
        self.tr_vrbs = tf.trainable_variables()
        self.params = {}
        for i in self.tr_vrbs:
            print(i.name)
            self.params[i.name] = tf.get_default_graph().get_tensor_by_name(
                i.name)
        self.saver = tf.train.Saver()
Exemple #10
0
    def create_model(self):
        # 'seq_len' means question sequences
        self.q_data_seq = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len], name='q_data_seq') 
        self.qa_data_seq = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len], name='qa_data')
        self.target_seq = tf.placeholder(tf.float32, [self.args.batch_size, self.args.seq_len], name='target')
          
        '''
        # Initialize Memory
        with tf.variable_scope('Memory'):
            init_memory_key = tf.get_variable('key', [self.args.memory_size, self.args.memory_key_state_dim], \
                initializer=tf.truncated_normal_initializer(stddev=0.1))
            init_memory_value = tf.get_variable('value', [self.args.memory_size,self.args.memory_value_state_dim], \
                initializer=tf.truncated_normal_initializer(stddev=0.1))
        # Broadcast memory value tensor to match [batch size, memory size, memory state dim]
        # First expand dim at axis 0 so that makes 'batch size' axis and tile it along 'batch size' axis
        # tf.tile(inputs, multiples) : multiples length must be thes saame as the number of dimensions in input
        # tf.stack takes a list and convert each element to a tensor
        init_memory_value = tf.tile(tf.expand_dims(init_memory_value, 0), tf.stack([self.args.batch_size, 1, 1]))
        print(init_memory_value.get_shape())
                
        self.memory = DKVMN(self.args.memory_size, self.args.memory_key_state_dim, \
                self.args.memory_value_state_dim, init_memory_key=init_memory_key, init_memory_value=init_memory_value, name='DKVMN')

        # Embedding to [batch size, seq_len, memory_state_dim(d_k or d_v)]
        with tf.variable_scope('Embedding'):
            # A
            q_embed_mtx = tf.get_variable('q_embed', [self.args.n_questions+1, self.args.memory_key_state_dim],\
                initializer=tf.truncated_normal_initializer(stddev=0.1))
            # B
            qa_embed_mtx = tf.get_variable('qa_embed', [2*self.args.n_questions+1, self.args.memory_value_state_dim], initializer=tf.truncated_normal_initializer(stddev=0.1))        

        '''
        # Embedding to [batch size, seq_len, memory key state dim]
        q_embed_data = tf.nn.embedding_lookup(self.q_embed_mtx, self.q_data_seq)
        # List of [batch size, 1, memory key state dim] with 'seq_len' elements
        #print('Q_embedding shape : %s' % q_embed_data.get_shape())
        slice_q_embed_data = tf.split(q_embed_data, self.args.seq_len, 1)
        #print(len(slice_q_embed_data), type(slice_q_embed_data), slice_q_embed_data[0].get_shape())
        # Embedding to [batch size, seq_len, memory value state dim]
        qa_embed_data = tf.nn.embedding_lookup(self.qa_embed_mtx, self.qa_data_seq)
        #print('QA_embedding shape: %s' % qa_embed_data.get_shape())
        # List of [batch size, 1, memory value state dim] with 'seq_len' elements
        slice_qa_embed_data = tf.split(qa_embed_data, self.args.seq_len, 1)
        
        prediction = list()
        reuse_flag = False

        # Logics
        for i in range(self.args.seq_len):
            # To reuse linear vectors
            if i != 0:
                reuse_flag = True
            # k_t : [batch size, memory key state dim]
            q = tf.squeeze(slice_q_embed_data[i], 1)
            # Attention, [batch size, memory size]
            self.correlation_weight = self.memory.attention(q)
            
            # Read process, [batch size, memory value state dim]
            self.read_content = self.memory.read(self.correlation_weight)
            
            # Write process, [batch size, memory size, memory value state dim]
            # qa : [batch size, memory value state dim]
            qa = tf.squeeze(slice_qa_embed_data[i], 1)
            # Only last time step value is necessary
            self.new_memory_value = self.memory.write(self.correlation_weight, qa, reuse=reuse_flag)

            mastery_level_prior_difficulty = tf.concat([self.read_content, q], 1)
            # f_t
            summary_vector = tf.tanh(operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag))
            # p_t
            pred_logits = operations.linear(summary_vector, 1, name='Prediction', reuse=reuse_flag)

            prediction.append(pred_logits)

        # 'prediction' : seq_len length list of [batch size ,1], make it [batch size, seq_len] tensor
        # tf.stack convert to [batch size, seq_len, 1]
        self.pred_logits = tf.reshape(tf.stack(prediction, axis=1), [self.args.batch_size, self.args.seq_len]) 

        # Define loss : standard cross entropy loss, need to ignore '-1' label example
        # Make target/label 1-d array
        target_1d = tf.reshape(self.target_seq, [-1])
        pred_logits_1d = tf.reshape(self.pred_logits, [-1])
        index = tf.where(tf.not_equal(target_1d, tf.constant(-1., dtype=tf.float32)))
        # tf.gather(params, indices) : Gather slices from params according to indices
        filtered_target = tf.gather(target_1d, index)
        filtered_logits = tf.gather(pred_logits_1d, index)
        self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=filtered_logits, labels=filtered_target))
        self.pred = tf.sigmoid(self.pred_logits)

        # Optimizer : SGD + MOMENTUM with learning rate decay
        self.global_step = tf.Variable(0, trainable=False)
        self.lr = tf.placeholder(tf.float32, [], name='learning_rate')
#        self.lr_decay = tf.train.exponential_decay(self.args.initial_lr, global_step=global_step, decay_steps=10000, decay_rate=0.667, staircase=True)
#        self.learning_rate = tf.maximum(lr, self.args.lr_lowerbound)
        optimizer = tf.train.MomentumOptimizer(self.lr, self.args.momentum)
        grads, vrbs = zip(*optimizer.compute_gradients(self.loss))
        grad, _ = tf.clip_by_global_norm(grads, self.args.maxgradnorm)
        self.train_op = optimizer.apply_gradients(zip(grad, vrbs), global_step=self.global_step)
#        grad_clip = [(tf.clip_by_value(grad, -self.args.maxgradnorm, self.args.maxgradnorm), var) for grad, var in grads]
        self.tr_vrbs = tf.trainable_variables()
        for i in self.tr_vrbs:
            print(i.name)

        self.saver = tf.train.Saver()
Exemple #11
0
    def generator(self,
                  z,
                  is_training=True,
                  name='generator',
                  reuse=True,
                  use_bn=False):
        with tf.variable_scope(name) as scope:
            if reuse:
                scope.reuse_variables()

            generator_fc = operations.linear(z,
                                             self.args.target_size // 8 *
                                             self.args.target_size // 8 *
                                             self.args.filter_depth,
                                             name='gen_linear')
            x = tf.reshape(generator_fc, [
                -1, self.args.target_size // 8, self.args.target_size // 8,
                self.args.filter_depth
            ])
            for i in range(4):
                gen_conv1 = operations.conv2d(x,
                                              self.args.filter_depth,
                                              filter_height=3,
                                              filter_width=3,
                                              stride_h=1,
                                              stride_v=1,
                                              use_bn=use_bn,
                                              name='gen_conv_%d1' % (i + 1))
                gen_conv1_elu = operations.elu(gen_conv1,
                                               name='gen_conv_%d1_elu' %
                                               (i + 1),
                                               is_training=is_training)
                gen_conv2 = operations.conv2d(gen_conv1_elu,
                                              self.args.filter_depth,
                                              filter_height=3,
                                              filter_width=3,
                                              stride_h=1,
                                              stride_v=1,
                                              use_bn=use_bn,
                                              name='gen_conv_%d2' % (i + 1))
                gen_conv2_elu = operations.elu(gen_conv2,
                                               name='gen_conv_%d2_elu' %
                                               (i + 1),
                                               is_training=is_training)
                if i < 3:
                    # Upsampling via nearest neighbor
                    x = tf.image.resize_nearest_neighbor(
                        gen_conv2_elu,
                        size=(int(self.args.target_size // (2**(2 - i))),
                              int(self.args.target_size // (2**(2 - i)))))
                else:
                    x = gen_conv2_elu
            generator_result = operations.conv2d(x,
                                                 self.args.num_channels,
                                                 filter_height=3,
                                                 filter_width=3,
                                                 stride_h=1,
                                                 stride_v=1,
                                                 use_bn=use_bn,
                                                 name='gen_conv_last')
            generator_result = operations.elu(generator_result,
                                              name='gen_conv_last_elu',
                                              is_training=is_training)
            return generator_result