def write(self, value_matrix, correlation_weight, qa_embedded, reuse=False): ''' Value matrix : [batch size, memory size, memory state dim(d_k)] Correlation weight : [batch size, memory size] qa_embedded : (q, r) pair embedded, [batch size, memory state dim(d_v)] ''' erase_vector = operations.linear(qa_embedded, self.memory_state_dim, name=self.name+'/Erase_Vector', reuse=reuse) # [batch size, memory state dim(d_v)] erase_signal = tf.sigmoid(erase_vector) add_vector = operations.linear(qa_embedded, self.memory_state_dim, name=self.name+'/Add_Vector', reuse=reuse) # [batch size, memory state dim(d_v)] add_signal = tf.tanh(add_vector) # Add vector after erase # [batch size, 1, memory state dim(d_v)] erase_reshaped = tf.reshape(erase_signal, [-1,1,self.memory_state_dim]) # [batch size, memory size, 1] cw_reshaped = tf.reshape(correlation_weight, [-1,self.memory_size,1]) # w_t(i) * e_t erase_mul = tf.multiply(erase_reshaped, cw_reshaped) # Elementwise multiply between [batch size, memory size, memory state dim(d_v)] erase = value_matrix * (1 - erase_mul) # [batch size, 1, memory state dim(d_v)] add_reshaped = tf.reshape(add_signal, [-1, 1, self.memory_state_dim]) add_mul = tf.multiply(add_reshaped, cw_reshaped) new_memory = erase + add_mul # [batch size, memory size, memory value staet dim] print('Memory shape : %s' % (new_memory.get_shape())) return new_memory
def g_rinv(layer, x1_target, x0_activation): with tf.variable_scope(vscope[layer], reuse=True): V_ = tf.get_variable('V') c_ = tf.get_variable('c') relu_inv = tf.py_func(ops.relu().f_inv, [x1_target, x0_activation], [tf.float32], name='x3_')[0] add_inv = tf.sub(relu_inv, b[layer], name='x2_') return tf.py_func(ops.linear().f_inv, [add_inv, x0_activation, W[layer]], [tf.float32], name='x1_')[0]
def decoder(self, embedding, use_bn=False, is_training=True): with tf.variable_scope('decoder'): print('Embedding shape %s' % embedding.get_shape()) embedding_fc = operations.linear(embedding, self.args.target_size // 8 * self.args.target_size // 8 * self.args.filter_depth, name='dec_linear') x = tf.reshape(embedding_fc, [ -1, self.args.target_size // 8, self.args.target_size // 8, self.args.filter_depth ]) print('Shape %s' % x.get_shape()) for i in range(4): enc_conv1 = operations.conv2d(x, self.args.filter_depth, filter_height=3, filter_width=3, stride_h=1, stride_v=1, use_bn=use_bn, name='dec_conv_%d1' % (i + 1)) enc_conv1_elu = operations.elu(enc_conv1, name='enc_conv_%d1_elu' % (i + 1), is_training=is_training) enc_conv2 = operations.conv2d(enc_conv1_elu, self.args.filter_depth, filter_height=3, filter_width=3, stride_h=1, stride_v=1, use_bn=use_bn, name='dec_conv_%d2' % (i + 1)) enc_conv2_elu = operations.elu(enc_conv2, name='enc_conv_%d2_elu' % (i + 1), is_training=is_training) if i != 3: # Upsampling via nearest neighbor x = tf.image.resize_nearest_neighbor( enc_conv2_elu, size=(int(self.args.target_size // (2**(2 - i))), int(self.args.target_size // (2**(2 - i))))) else: x = enc_conv2_elu decoder_result = operations.conv2d(x, self.args.num_channels, filter_height=3, filter_width=3, stride_h=1, stride_v=1, use_bn=use_bn, name='dec_conv_last') decoder_result = operations.elu(decoder_result, name='dec_conv_last_elu', is_training=is_training) return decoder_result
def build_network(self, reuse_flag): print('Building network') self.q_data = tf.placeholder(tf.int32, [self.args.batch_size], name='q_data') self.qa_data = tf.placeholder(tf.int32, [self.args.batch_size], name='qa_data') self.target = tf.placeholder(tf.float32, [self.args.batch_size], name='target') # Embedding to [batch size, seq_len, memory key state dim] q_embed_data = tf.nn.embedding_lookup(self.q_embed_mtx, self.q_data) # List of [batch size, 1, memory key state dim] with 'seq_len' elements #print('Q_embedding shape : %s' % q_embed_data.get_shape()) #slice_q_embed_data = tf.split(q_embed_data, self.args.seq_len, 1) #print(len(slice_q_embed_data), type(slice_q_embed_data), slice_q_embed_data[0].get_shape()) # Embedding to [batch size, seq_len, memory value state dim] qa_embed_data = tf.nn.embedding_lookup(self.qa_embed_mtx, self.qa_data) #print('QA_embedding shape: %s' % qa_embed_data.get_shape()) # List of [batch size, 1, memory value state dim] with 'seq_len' elements #slice_qa_embed_data = tf.split(qa_embed_data, self.args.seq_len, 1) #prediction = list() #reuse_flag = False # k_t : [batch size, memory key state dim] #q = tf.squeeze(slice_q_embed_data[i], 1) # Attention, [batch size, memory size] self.correlation_weight = self.memory.attention(q_embed_data) # Read process, [batch size, memory value state dim] self.read_content = self.memory.read(self.correlation_weight) # Write process, [batch size, memory size, memory value state dim] # qa : [batch size, memory value state dim] #qa = tf.squeeze(slice_qa_embed_data[i], 1) # Only last time step value is necessary self.new_memory_value = self.memory.write(self.correlation_weight, qa_embed_data, reuse=reuse_flag) mastery_level_prior_difficulty = tf.concat([self.read_content, q_embed_data], 1) # f_t summary_vector = tf.tanh(operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag)) # p_t pred_logits = operations.linear(summary_vector, 1, name='Prediction', reuse=reuse_flag) return pred_logits
def inference(self, q_embed, correlation_weight, value_matrix, reuse_flag): read_content = self.memory.value.read(value_matrix, correlation_weight) ##### ADD new FC layer for q_embedding. There is an layer in MXnet implementation q_embed_content_logit = operations.linear(q_embed, 50, name='input_embed_content', reuse=reuse_flag) q_embed_content = tf.tanh(q_embed_content_logit) mastery_level_prior_difficulty = tf.concat( [read_content, q_embed_content], 1) #mastery_level_prior_difficulty = tf.concat([read_content, q_embed], 1) # f_t summary_logit = operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag) if self.args.summary_activation == 'tanh': summary_vector = tf.tanh(summary_logit) elif self.args.summary_activation == 'sigmoid': summary_vector = tf.sigmoid(summary_logit) elif self.args.summary_activation == 'relu': summary_vector = tf.nn.relu(summary_logit) #summary_vector = tf.sigmoid(operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag)) #summary_vector = tf.tanh(operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag)) # p_t pred_logits = operations.linear(summary_vector, 1, name='Prediction', reuse=reuse_flag) pred_prob = tf.sigmoid(pred_logits) return read_content, summary_vector, pred_logits, pred_prob
def add(self, value_matrix, correlation_weight, knowledge_growth, reuse=False): add_vector = operations.linear(knowledge_growth, self.memory_state_dim, name=self.name + '/Add_Vector', reuse=reuse) add_signal = self.activate_add_signal(add_vector) cw_reshaped = tf.reshape(correlation_weight, [-1, self.memory_size, 1]) add_reshaped = tf.reshape(add_signal, [-1, 1, self.memory_state_dim]) add_mul = tf.multiply(add_reshaped, cw_reshaped) return add_mul
def encoder(self, imgs, use_bn=False, is_training=True): with tf.variable_scope('encoder'): x = imgs # [batch, 64, 64, 3] for i in range(4): enc_conv1 = operations.conv2d(x, self.args.filter_depth * (i + 1), filter_height=3, filter_width=3, stride_h=1, stride_v=1, name='enc_conv_%d1' % (i + 1)) enc_conv1_elu = operations.elu(enc_conv1, name='enc_conv_%d1_elu' % (i + 1), is_training=is_training) enc_conv2 = operations.conv2d(enc_conv1_elu, self.args.filter_depth * (i + 1), filter_height=3, filter_width=3, stride_h=1, stride_v=1, name='enc_conv_%d2' % (i + 1)) enc_conv2_elu = operations.elu(enc_conv2, name='enc_conv_%d2_elu' % (i + 1), is_training=is_training) # Down sampling with strides 2 if i < 3: x = operations.conv2d(enc_conv2_elu, self.args.filter_depth * (i + 2), filter_height=3, filter_width=3, stride_h=2, stride_v=2, name='enc_downsample_%d' % (i + 1)) x = operations.elu(x, name='enc_downsample_%d_elu' % (i + 1), is_training=is_training) else: x = enc_conv2_elu final_shape = x.get_shape().as_list() flattend_conv = tf.reshape( x, [-1, final_shape[1] * final_shape[2] * final_shape[3]]) embedding = operations.linear(flattend_conv, self.args.embedding_size, name='enc_fc_layer') # This embedding tensor is mapped via fc not followed by any non-linearities return embedding
def erase(self, value_matrix, correlation_weight, knowledge_growth, reuse=False): erase_vector = operations.linear(knowledge_growth, self.memory_state_dim, name=self.name + '/Erase_Vector', reuse=reuse) erase_signal = self.activate_erase_signal(erase_vector) erase_reshaped = tf.reshape(erase_signal, [-1, 1, self.memory_state_dim]) cw_reshaped = tf.reshape(correlation_weight, [-1, self.memory_size, 1]) erase_mul = tf.multiply(erase_reshaped, cw_reshaped) erase = tf.multiply(value_matrix, 1 - erase_mul) #erase = value_matrix * (1 - erase_mul) return erase
def create_model(self): # 'seq_len' means question sequences self.q_data = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len], name='q_data') self.qa_data = tf.placeholder( tf.int32, [self.args.batch_size, self.args.seq_len], name='qa_data') self.target = tf.placeholder(tf.float32, [self.args.batch_size, self.args.seq_len], name='target') self.kg = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len, 3], name='knowledge_tag') self.kg_hot = tf.placeholder( tf.float32, [self.args.batch_size, self.args.seq_len, 188], name='knowledge_hot') self.timebin = tf.placeholder( tf.int32, [self.args.batch_size, self.args.seq_len]) self.diff = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len]) self.guan = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len]) with tf.variable_scope('Memory'): init_memory_key = tf.get_variable('key', [self.args.memory_size, self.args.memory_key_state_dim], \ initializer=tf.truncated_normal_initializer(stddev=0.1)) init_memory_value = tf.get_variable('value', [self.args.memory_size,self.args.memory_value_state_dim], \ initializer=tf.truncated_normal_initializer(stddev=0.1)) with tf.variable_scope('time'): time_embed_mtx = tf.get_variable('timebin', [12, self.args.memory_value_state_dim],\ initializer=tf.truncated_normal_initializer(stddev=0.1)) with tf.variable_scope('diff'): guan_embed_mtx = tf.get_variable('diff', [12, self.args.memory_value_state_dim],\ initializer=tf.truncated_normal_initializer(stddev=0.1)) with tf.variable_scope('gate'): diff_embed_mtx = tf.get_variable('gate', [12, self.args.memory_value_state_dim],\ initializer=tf.truncated_normal_initializer(stddev=0.1)) init_memory_value = tf.tile(tf.expand_dims(init_memory_value, 0), tf.stack([self.args.batch_size, 1, 1])) print(init_memory_value.get_shape()) self.memory = DKVMN(self.args.memory_size, self.args.memory_key_state_dim, \ self.args.memory_value_state_dim, init_memory_key=init_memory_key, init_memory_value=init_memory_value, batch_size=self.args.batch_size, name='DKVMN') with tf.variable_scope('Embedding'): # A q_embed_mtx = tf.get_variable('q_embed', [self.args.n_questions+1, self.args.memory_key_state_dim],\ initializer=tf.truncated_normal_initializer(stddev=0.1)) # B qa_embed_mtx = tf.get_variable( 'qa_embed', [ 2 * self.args.n_questions + 1, self.args.memory_value_state_dim ], initializer=tf.truncated_normal_initializer(stddev=0.1)) q_embed_data = tf.nn.embedding_lookup(q_embed_mtx, self.q_data) slice_q_embed_data = tf.split(q_embed_data, self.args.seq_len, 1) qa_embed_data = tf.nn.embedding_lookup(qa_embed_mtx, self.qa_data) slice_qa_embed_data = tf.split(qa_embed_data, self.args.seq_len, 1) time_embedding = tf.nn.embedding_lookup(time_embed_mtx, self.timebin) slice_time_embedding = tf.split(time_embedding, self.args.seq_len, 1) guan_embedding = tf.nn.embedding_lookup(diff_embed_mtx, self.diff) slice_guan_embedding = tf.split(guan_embedding, self.args.seq_len, 1) diff_embedding = tf.nn.embedding_lookup(diff_embed_mtx, self.diff) slice_diff_embedding = tf.split(diff_embedding, self.args.seq_len, 1) slice_kg = tf.split(self.kg, self.args.seq_len, 1) slice_kg_hot = tf.split(self.kg_hot, self.args.seq_len, 1) reuse_flag = False prediction = list() # Logics for i in range(self.args.seq_len): # To reuse linear vectors if i != 0: reuse_flag = True q = tf.squeeze(slice_q_embed_data[i], 1) qa = tf.squeeze(slice_qa_embed_data[i], 1) kg = tf.squeeze(slice_kg[i], 1) kg_hot = tf.squeeze(slice_kg_hot[i], 1) dotime = tf.squeeze(slice_time_embedding[i], 1) dodiff = tf.squeeze(slice_diff_embedding[i], 1) doguan = tf.squeeze(slice_guan_embedding[i], 1) self.correlation_weight = self.memory.attention(q, kg, kg_hot) # # Read process, [batch size, memory value state dim] self.read_content = self.memory.read(self.correlation_weight) mastery_level_prior_difficulty = tf.concat( [self.read_content, q, doguan], 1) # f_t summary_vector = tf.tanh( operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag)) # p_t pred_logits = operations.linear(summary_vector, 1, name='Prediction', reuse=reuse_flag) prediction.append(pred_logits) qa_time = tf.concat([qa, dotime], axis=1) self.new_memory_value = self.memory.write(self.correlation_weight, qa_time, reuse=reuse_flag) # 'prediction' : seq_len length list of [batch size ,1], make it [batch size, seq_len] tensor # tf.stack convert to [batch size, seq_len, 1] self.pred_logits = tf.reshape(tf.stack( prediction, axis=1), [self.args.batch_size, self.args.seq_len]) # Define loss : standard cross entropy loss, need to ignore '-1' label example # Make target/label 1-d array target_1d = tf.reshape(self.target, [-1]) pred_logits_1d = tf.reshape(self.pred_logits, [-1]) index = tf.where( tf.not_equal(target_1d, tf.constant(-1., dtype=tf.float32))) # tf.gather(params, indices) : Gather slices from params according to indices filtered_target = tf.gather(target_1d, index) filtered_logits = tf.gather(pred_logits_1d, index) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=filtered_logits, labels=filtered_target)) self.pred = tf.sigmoid(self.pred_logits) # Optimizer : SGD + MOMENTUM with learning rate decay self.global_step = tf.Variable(0, trainable=False) self.lr = tf.placeholder(tf.float32, [], name='learning_rate') optimizer = tf.train.MomentumOptimizer(self.lr, self.args.momentum) grads, vrbs = zip(*optimizer.compute_gradients(self.loss)) grad, _ = tf.clip_by_global_norm(grads, self.args.maxgradnorm) self.train_op = optimizer.apply_gradients(zip(grad, vrbs), global_step=self.global_step) self.tr_vrbs = tf.trainable_variables() self.params = {} for i in self.tr_vrbs: print(i.name) self.params[i.name] = tf.get_default_graph().get_tensor_by_name( i.name) self.saver = tf.train.Saver()
def create_model(self): # 'seq_len' means question sequences self.q_data_seq = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len], name='q_data_seq') self.qa_data_seq = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len], name='qa_data') self.target_seq = tf.placeholder(tf.float32, [self.args.batch_size, self.args.seq_len], name='target') ''' # Initialize Memory with tf.variable_scope('Memory'): init_memory_key = tf.get_variable('key', [self.args.memory_size, self.args.memory_key_state_dim], \ initializer=tf.truncated_normal_initializer(stddev=0.1)) init_memory_value = tf.get_variable('value', [self.args.memory_size,self.args.memory_value_state_dim], \ initializer=tf.truncated_normal_initializer(stddev=0.1)) # Broadcast memory value tensor to match [batch size, memory size, memory state dim] # First expand dim at axis 0 so that makes 'batch size' axis and tile it along 'batch size' axis # tf.tile(inputs, multiples) : multiples length must be thes saame as the number of dimensions in input # tf.stack takes a list and convert each element to a tensor init_memory_value = tf.tile(tf.expand_dims(init_memory_value, 0), tf.stack([self.args.batch_size, 1, 1])) print(init_memory_value.get_shape()) self.memory = DKVMN(self.args.memory_size, self.args.memory_key_state_dim, \ self.args.memory_value_state_dim, init_memory_key=init_memory_key, init_memory_value=init_memory_value, name='DKVMN') # Embedding to [batch size, seq_len, memory_state_dim(d_k or d_v)] with tf.variable_scope('Embedding'): # A q_embed_mtx = tf.get_variable('q_embed', [self.args.n_questions+1, self.args.memory_key_state_dim],\ initializer=tf.truncated_normal_initializer(stddev=0.1)) # B qa_embed_mtx = tf.get_variable('qa_embed', [2*self.args.n_questions+1, self.args.memory_value_state_dim], initializer=tf.truncated_normal_initializer(stddev=0.1)) ''' # Embedding to [batch size, seq_len, memory key state dim] q_embed_data = tf.nn.embedding_lookup(self.q_embed_mtx, self.q_data_seq) # List of [batch size, 1, memory key state dim] with 'seq_len' elements #print('Q_embedding shape : %s' % q_embed_data.get_shape()) slice_q_embed_data = tf.split(q_embed_data, self.args.seq_len, 1) #print(len(slice_q_embed_data), type(slice_q_embed_data), slice_q_embed_data[0].get_shape()) # Embedding to [batch size, seq_len, memory value state dim] qa_embed_data = tf.nn.embedding_lookup(self.qa_embed_mtx, self.qa_data_seq) #print('QA_embedding shape: %s' % qa_embed_data.get_shape()) # List of [batch size, 1, memory value state dim] with 'seq_len' elements slice_qa_embed_data = tf.split(qa_embed_data, self.args.seq_len, 1) prediction = list() reuse_flag = False # Logics for i in range(self.args.seq_len): # To reuse linear vectors if i != 0: reuse_flag = True # k_t : [batch size, memory key state dim] q = tf.squeeze(slice_q_embed_data[i], 1) # Attention, [batch size, memory size] self.correlation_weight = self.memory.attention(q) # Read process, [batch size, memory value state dim] self.read_content = self.memory.read(self.correlation_weight) # Write process, [batch size, memory size, memory value state dim] # qa : [batch size, memory value state dim] qa = tf.squeeze(slice_qa_embed_data[i], 1) # Only last time step value is necessary self.new_memory_value = self.memory.write(self.correlation_weight, qa, reuse=reuse_flag) mastery_level_prior_difficulty = tf.concat([self.read_content, q], 1) # f_t summary_vector = tf.tanh(operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag)) # p_t pred_logits = operations.linear(summary_vector, 1, name='Prediction', reuse=reuse_flag) prediction.append(pred_logits) # 'prediction' : seq_len length list of [batch size ,1], make it [batch size, seq_len] tensor # tf.stack convert to [batch size, seq_len, 1] self.pred_logits = tf.reshape(tf.stack(prediction, axis=1), [self.args.batch_size, self.args.seq_len]) # Define loss : standard cross entropy loss, need to ignore '-1' label example # Make target/label 1-d array target_1d = tf.reshape(self.target_seq, [-1]) pred_logits_1d = tf.reshape(self.pred_logits, [-1]) index = tf.where(tf.not_equal(target_1d, tf.constant(-1., dtype=tf.float32))) # tf.gather(params, indices) : Gather slices from params according to indices filtered_target = tf.gather(target_1d, index) filtered_logits = tf.gather(pred_logits_1d, index) self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=filtered_logits, labels=filtered_target)) self.pred = tf.sigmoid(self.pred_logits) # Optimizer : SGD + MOMENTUM with learning rate decay self.global_step = tf.Variable(0, trainable=False) self.lr = tf.placeholder(tf.float32, [], name='learning_rate') # self.lr_decay = tf.train.exponential_decay(self.args.initial_lr, global_step=global_step, decay_steps=10000, decay_rate=0.667, staircase=True) # self.learning_rate = tf.maximum(lr, self.args.lr_lowerbound) optimizer = tf.train.MomentumOptimizer(self.lr, self.args.momentum) grads, vrbs = zip(*optimizer.compute_gradients(self.loss)) grad, _ = tf.clip_by_global_norm(grads, self.args.maxgradnorm) self.train_op = optimizer.apply_gradients(zip(grad, vrbs), global_step=self.global_step) # grad_clip = [(tf.clip_by_value(grad, -self.args.maxgradnorm, self.args.maxgradnorm), var) for grad, var in grads] self.tr_vrbs = tf.trainable_variables() for i in self.tr_vrbs: print(i.name) self.saver = tf.train.Saver()
def generator(self, z, is_training=True, name='generator', reuse=True, use_bn=False): with tf.variable_scope(name) as scope: if reuse: scope.reuse_variables() generator_fc = operations.linear(z, self.args.target_size // 8 * self.args.target_size // 8 * self.args.filter_depth, name='gen_linear') x = tf.reshape(generator_fc, [ -1, self.args.target_size // 8, self.args.target_size // 8, self.args.filter_depth ]) for i in range(4): gen_conv1 = operations.conv2d(x, self.args.filter_depth, filter_height=3, filter_width=3, stride_h=1, stride_v=1, use_bn=use_bn, name='gen_conv_%d1' % (i + 1)) gen_conv1_elu = operations.elu(gen_conv1, name='gen_conv_%d1_elu' % (i + 1), is_training=is_training) gen_conv2 = operations.conv2d(gen_conv1_elu, self.args.filter_depth, filter_height=3, filter_width=3, stride_h=1, stride_v=1, use_bn=use_bn, name='gen_conv_%d2' % (i + 1)) gen_conv2_elu = operations.elu(gen_conv2, name='gen_conv_%d2_elu' % (i + 1), is_training=is_training) if i < 3: # Upsampling via nearest neighbor x = tf.image.resize_nearest_neighbor( gen_conv2_elu, size=(int(self.args.target_size // (2**(2 - i))), int(self.args.target_size // (2**(2 - i))))) else: x = gen_conv2_elu generator_result = operations.conv2d(x, self.args.num_channels, filter_height=3, filter_width=3, stride_h=1, stride_v=1, use_bn=use_bn, name='gen_conv_last') generator_result = operations.elu(generator_result, name='gen_conv_last_elu', is_training=is_training) return generator_result