def __init__(self, I, O, N, W, G, sess, batch_size=1024, story_size=1000, max_hops=3, controller_layer_size=1, controller_hidden_size=256, min_grad=-10, max_grad=+10, lr=1e-4, epsilon=0.1, weight_decay=0, scope="PTRModel", forward_only=False): self.controller = PTRCell([(N, W), (N, W)], [1, 1], [0, 0], (1, W), controller_layer_size=controller_layer_size, controller_hidden_size=controller_hidden_size, addr_mode=1) self.sess = sess self.scope = scope self.input_dim = I self.output_dim = O self.W = W self.batch_size = batch_size self.story_size = story_size self.max_hops = max_hops self.min_grad = min_grad self.max_grad = max_grad self.weight_decay = weight_decay self.outputs = [] self.saver = None self.params = None self.global_step = tf.Variable(0, trainable=False) self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon) #self.opt = tf.train.RMSPropOptimizer(lr, decay=0.0, epsilon=epsilon, momentum=0.9) #self.opt = tf.train.AdagradOptimizer(lr) self.build_model(forward_only)
def __init__(self, I, O, N, W, K, G, sess, min_length=1, max_length=10, controller_layer_size=1, min_grad=-10, max_grad=+10, lr=1e-3, epsilon=1e-8, momentum=0.9, decay=0.0, weight_decay=0, scope="PTRModel", forward_only=False): self.controller = PTRCell([(N, W), (N, W), (N * N, W)], [1, 1, 0], [0, 0, 1], (1, W), controller_layer_size=controller_layer_size, addr_mode=0) self.sess = sess self.scope = scope self.input_dim = I self.output_dim = O self.W = W self.min_length = min_length self.max_length = max_length self.min_grad = min_grad self.max_grad = max_grad self.weight_decay = weight_decay self.inputs_1 = [] self.inputs_2 = [] self.true_outputs = [] self.outputs = {} self.prev_states = {} self.losses = {} self.optims = {} self.grads = {} self.collect_states = {0: []} self.debug = {0: []} self.saver = None self.params = None self.global_step = tf.Variable(0, trainable=False) #self.opt = tf.train.RMSPropOptimizer(lr, decay=decay, epsilon=epsilon, momentum=momentum) #self.opt = tf.train.AdagradOptimizer(lr) self.opt = tf.train.AdamOptimizer(lr) #self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon) self.build_model(forward_only)
class PTRModel(object): def __init__(self, I, O, N, W, sess, min_length=1, max_length=10, controller_layer_size=1, min_grad=-10, max_grad=+10, lr=1e-3, epsilon=1e-8, momentum=0.9, decay=0.0, weight_decay=0, MAX_STEP=20, scope="PTRModel", forward_only=False): self.controller = PTRCell([(N, W), (N, W)], [1, 1], [0, 1], (1, W), controller_layer_size=controller_layer_size) self.sess = sess self.scope = scope self.input_dim = I self.output_dim = O self.W = W self.min_length = min_length self.max_length = max_length self.min_grad = min_grad self.max_grad = max_grad self.weight_decay = weight_decay self.MAX_STEP = MAX_STEP self.inputs_1 = [] self.true_outputs = [] self.stops = {} self.outputs = {} self.prev_states = {} self.losses = {} self.optims = {} self.grads = {} self.collect_states = {0: []} self.saver = None self.params = None self.global_step = tf.Variable(0, trainable=False) #self.opt = tf.train.RMSPropOptimizer(lr, decay=decay, epsilon=epsilon, momentum=momentum) #self.opt = tf.train.AdagradOptimizer(lr) self.opt = tf.train.AdamOptimizer(lr) self.build_model(forward_only) def build_model(self, forward_only): print("[*] Building a PTRModel math model") with tf.variable_scope(self.scope): #embedding_matrix = tf.eye(self.input_dim, self.W) #embedding_matrix = weight('embedding', [self.input_dim, self.W], init='xavier') self.exp = weight('exp', [1, 1], init='constant', value=1.6) self.exp_weight = tf.constant( [[1.0]], dtype=tf.float32) #weight('exp_weight', [1, 1]) self.exp_bias = weight('exp_bias', [1, 1], init='constant', value=0.0) self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1])) prev_state = self.controller.init_state() tf.get_variable_scope().reuse_variables() for seq_length in range(1, self.max_length + 1): input_1 = tf.placeholder(tf.float32, [self.input_dim], name='input_1_%s' % seq_length) true_output = tf.placeholder(tf.float32, [self.output_dim], name='true_output_%s' % seq_length) self.inputs_1.append(input_1) self.true_outputs.append(true_output) # present inputs prev_state = self.controller.update_memory( prev_state, [ tf.reshape(input_1, [1, -1]), tf.reshape(input_1, [1, -1]) ]) self.collect_states[seq_length] = self.collect_states[ seq_length - 1][0:(seq_length - 1)] + [self.copy_state(prev_state)] state = prev_state self.prev_states[seq_length] = state stops = [] candidate_outputs = [] for j in range(self.MAX_STEP): state, _ = self.controller(state, j) self.collect_states[seq_length].append( self.copy_state(state)) candidate_outputs.append( tf.unstack(state['M'][-1][0:seq_length])) # stops.append(state['stop']) self.outputs[seq_length] = candidate_outputs self.stops[seq_length] = stops if not forward_only: for seq_length in range(self.min_length, self.max_length + 1): print(" [*] Building a loss model for seq_length %s" % seq_length) all_losses = [] for index in range(self.MAX_STEP): loss = sequence_loss( logits=self.outputs[seq_length][index], targets=self.true_outputs[0:seq_length], weights=[1] * seq_length, average_across_timesteps=False, average_across_batch=False, softmax_loss_function=l2_loss) all_losses.append(loss) all_losses = tf.stack(all_losses) #step_dist = tf.nn.softmax(tf.concat(self.stops[seq_length], 1)) #step_dist = tf.nn.softmax(tf.nn.embedding_lookup(self.length2stepdist, [seq_length])) #max_pos = tf.to_float(tf.argmax(step_dist)) max_pos = tf.clip_by_value( self.exp_weight * tf.pow(tf.to_float(seq_length), self.exp) + self.exp_bias, 0, self.MAX_STEP - 1) stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta) loss1 = tf.reduce_sum( tf.expand_dims(all_losses, 0) * stop_pos) + 0.001 * tf.reduce_sum(max_pos) self.losses[seq_length] = loss1 if not self.params: self.params = tf.trainable_variables() grads = [] for grad in tf.gradients( loss1, self.params ): # + self.weight_decay*tf.add_n(tf.get_collection('l2')) if grad is not None: grads.append( tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: grads.append(grad) self.grads[seq_length] = grads with tf.variable_scope("opt", reuse=None): if not forward_only: for seq_length in range(self.min_length, self.max_length + 1): self.optims[seq_length] = self.opt.apply_gradients( zip(self.grads[seq_length], self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a PTRModel math model finished") def get_outputs(self, seq_length, index): return self.outputs[seq_length][index] def get_stop_pos(self, seq_length): #stop_pos = tf.nn.softmax(tf.nn.embedding_lookup(self.length2stepdist, [seq_length]) * 50) max_pos = tf.clip_by_value( self.exp_weight * tf.pow(tf.to_float(seq_length), self.exp) + self.exp_bias, 0, self.MAX_STEP - 1) stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta) return tf.argmax( stop_pos[0] ), max_pos, self.exp, self.exp_weight, self.exp_bias, self.beta def get_loss(self, seq_length): if seq_length not in self.losses: index, _, _, _, _, _ = self.get_stop_pos(seq_length) loss = sequence_loss(logits=self.outputs[seq_length][index], targets=self.true_outputs[0:seq_length], weights=[1] * seq_length, average_across_timesteps=False, average_across_batch=False, softmax_loss_function=l2_loss) self.losses[seq_length] = loss return self.losses[seq_length] def copy_state(self, state): new_state = {} for k, v in state.items(): if k != 'seq_length': new_state[k] = v return new_state def get_collect_state(self, seq_length): return self.collect_states[seq_length] def save(self, checkpoint_dir, task_name, step): task_dir = os.path.join(checkpoint_dir, "%s" % (task_name)) file_name = "PTRModel_%s.model" % task_name if not os.path.exists(task_dir): os.makedirs(task_dir) self.saver.save(self.sess, os.path.join(task_dir, file_name), global_step=step.astype(int)) def load(self, checkpoint_dir, task_name): print(" [*] Reading checkpoints...") checkpoint_dir = os.path.join(checkpoint_dir, "%s" % (task_name)) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name)) else: raise Exception(" [!] Testing, but %s not found" % checkpoint_dir)
class PTRModel(object): def __init__(self, I, O, N, W, K, G, sess, min_length = 1, max_length = 12, controller_layer_size=1, min_grad=-10, max_grad=+10, lr=1e-3, epsilon=1e-8, momentum=0.9, decay=0.0, weight_decay=0, scope="PTRModel", forward_only=False): self.controller = PTRCell([(N,W), (N,W)], [3, 0], [0, 1], (1, W), controller_layer_size=controller_layer_size, addr_mode=0) self.sess = sess self.scope = scope self.input_dim = I self.output_dim = O self.W = W self.min_length = min_length self.max_length = max_length self.min_grad = min_grad self.max_grad = max_grad self.weight_decay = weight_decay self.MAX_STEP = max_length self.inputs_1 = [] self.inputs_2 = [] self.true_outputs = [] self.outputs = {} self.prev_states = {} self.losses = {} self.optims = {} self.grads = {} self.collect_states = {0:[]} self.debug = {0:[]} self.saver = None self.params = None self.global_step = tf.Variable(0, trainable=False) #self.opt = tf.train.RMSPropOptimizer(lr, decay=decay, epsilon=epsilon, momentum=momentum) #self.opt = tf.train.AdagradOptimizer(lr) self.opt = tf.train.AdamOptimizer(lr) #self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon) self.build_model(forward_only) def build_model(self, forward_only): print("[*] Building a PTRModel math model") with tf.variable_scope(self.scope): self.a = weight('a', [1,1]) # self.c = weight('c', [1,1]) # self.d = weight('d', [1,1]) self.b = weight('b', [1, 1], init = 'constant') self.beta = 1 + tf.nn.softplus(weight('beta', [1,1])) prev_state = self.controller.init_state() tf.get_variable_scope().reuse_variables() for seq_length in range(1, self.max_length + 1): true_output = tf.placeholder(tf.float32, [self.output_dim], name='true_output_%s' % seq_length) self.true_outputs.append(true_output) for seq_length in range(1, self.max_length + 1): input_1 = tf.placeholder(tf.float32, [self.input_dim], name='input_1_%s' % seq_length) self.inputs_1.append(input_1) # present inputs prev_state = self.controller.update_memory(prev_state, [tf.reshape(input_1, [1, -1]), tf.zeros((1, self.W))]) self.collect_states[seq_length] = self.collect_states[seq_length-1][0:(seq_length-1)] + [self.copy_state(prev_state)] self.debug[seq_length] = [] state = prev_state self.prev_states[seq_length] = state for j in range(seq_length): state,_ = self.controller(state,j) new_state = self.copy_state(state) self.collect_states[seq_length].append(new_state) self.debug[seq_length].append((new_state['ptr'],new_state['dptr'])) self.outputs[seq_length] = tf.unstack(state['M'][-1][0:seq_length]) if not forward_only: for seq_length in range(self.min_length, self.max_length ): print(" [*] Building a loss model for seq_length %s" % seq_length) print(len(self.outputs[seq_length]),len(self.true_outputs[0:seq_length]),len([1] * (seq_length))) loss = sequence_loss(logits=self.outputs[seq_length] , targets=self.true_outputs[0:seq_length], weights=[1] * (seq_length), average_across_timesteps=False, average_across_batch=False, softmax_loss_function=l2_loss) self.losses[seq_length] = loss if not self.params: self.params = tf.trainable_variables() grads = [] for grad in tf.gradients(loss, self.params): # + self.weight_decay*tf.add_n(tf.get_collection('l2')) if grad is not None: grads.append(tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: grads.append(grad) self.grads[seq_length] = grads with tf.variable_scope("opt", reuse=None): if not forward_only: for seq_length in range(self.min_length, self.max_length ): self.optims[seq_length] = self.opt.apply_gradients( zip(self.grads[seq_length], self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a PTRModel math model finished") def get_outputs(self, seq_length): return self.outputs[seq_length] def get_loss(self, seq_length): if seq_length not in self.losses: loss = sequence_loss(logits=self.outputs[seq_length], targets=self.true_outputs[0:seq_length], weights=[1] * seq_length, average_across_timesteps=False, average_across_batch=False, softmax_loss_function=l2_loss) self.losses[seq_length] = loss return self.losses[seq_length] def copy_state(self, state): new_state = {} for k,v in state.items(): if k != 'seq_length': new_state[k] = v return new_state def get_collect_state(self, seq_length): return self.collect_states[seq_length] def save(self, checkpoint_dir, task_name, step): task_dir = os.path.join(checkpoint_dir, "%s" % (task_name)) file_name = "PTRModel_%s.model" % task_name if not os.path.exists(task_dir): os.makedirs(task_dir) self.saver.save(self.sess, os.path.join(task_dir, file_name), global_step = step.astype(int)) def load(self, checkpoint_dir, task_name): print(" [*] Reading checkpoints...") checkpoint_dir = os.path.join(checkpoint_dir, "%s" % (task_name)) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name)) else: raise Exception(" [!] Testing, but %s not found" % checkpoint_dir)
class PTRModel(object): def __init__(self, I, O, N, W, K, G, sess, min_length=1, max_length=10, controller_layer_size=1, min_grad=-10, max_grad=+10, lr=1e-3, epsilon=1e-8, momentum=0.9, decay=0.0, weight_decay=0, scope="PTRModel", forward_only=False): self.controller = PTRCell([(N, W), (N, W), (N, W)], [1, 1, 0], [0, 0, 1], (1, W), controller_layer_size=controller_layer_size, addr_mode=0) self.sess = sess self.scope = scope self.input_dim = I self.output_dim = O self.W = W self.min_length = min_length self.max_length = max_length self.min_grad = min_grad self.max_grad = max_grad self.weight_decay = weight_decay self.inputs_1 = [] self.inputs_2 = [] self.true_outputs = [] self.outputs = {} self.prev_states = {} self.losses = {} self.optims = {} self.grads = {} self.collect_states = {0: []} self.saver = None self.params = None self.global_step = tf.Variable(0, trainable=False) #self.opt = tf.train.RMSPropOptimizer(lr, decay=decay, epsilon=epsilon, momentum=momentum) #self.opt = tf.train.AdagradOptimizer(lr) self.opt = tf.train.AdamOptimizer(lr) #self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon) self.build_model(forward_only) def build_model(self, forward_only): print("[*] Building a PTRModel math model") with tf.variable_scope(self.scope): self.length2stepdist = tf.get_variable( 'embedding', [self.max_length + 1, self.max_length + 1] ) # for each length we have a vector with max_length representing the step dist # self.length2stepdist = tf.zeros( [self.max_length+1, 1], dtype = tf.int32, name = 'embedding') # for each length we have a vector with max_length representing the step dist self.beta = tf.get_variable('beta', [1], dtype=tf.float32) # self.step_dist = tf.nn.softmax(self.step_dist1[:self.input_seqlen+1]) # k=100 # fx = tf.exp(k * self.step_dist ) - 1 + 0.00001 # sm = fx / tf.reduce_sum(fx) # self.step_dist = sm #embedding_matrix = tf.eye(self.input_dim, self.W) #embedding_matrix = weight('embedding', [self.input_dim, self.W], init='xavier') #tf.gather(embedding_matrix, tf.to_int32(input_1)) #tf.gather(embedding_matrix, tf.to_int32(input_2)) prev_state = self.controller.init_state() tf.get_variable_scope().reuse_variables() for seq_length in range(1, self.max_length + 1): input_1 = tf.placeholder(tf.float32, [self.input_dim], name='input_1_%s' % seq_length) input_2 = tf.placeholder(tf.float32, [self.input_dim], name='input_2_%s' % seq_length) true_output = tf.placeholder(tf.float32, [self.output_dim], name='true_output_%s' % seq_length) self.step_dist1 = tf.nn.embedding_lookup( self.length2stepdist, [seq_length]) # self.step_dist = D(self.max_length+1, tf.to_float(self.step_dist1[0][0]), 1, self.beta[0] * self.beta[0] ) self.step_dist = tf.nn.softmax(self.step_dist1) k = 50 #self.beta * self.beta fx = tf.exp(k * self.step_dist) - 1 + 0.00001 sm = fx / tf.reduce_sum(fx) self.step_dist = sm self.inputs_1.append(input_1) self.inputs_2.append(input_2) self.true_outputs.append(true_output) # present inputs prev_state = self.controller.update_memory( prev_state, [ tf.reshape(input_1, [1, -1]), tf.reshape(input_2, [1, -1]), tf.zeros((1, self.W)) ]) self.collect_states[seq_length] = self.collect_states[ seq_length - 1][0:(seq_length - 1)] + [self.copy_state(prev_state)] state = prev_state self.prev_states[seq_length] = state earlystop = False if earlystop: self.stops = [] stop_time = seq_length self.collect_base = len(self.collect_states[seq_length]) # print ('collect_base:', collect_base) candidate_outputs = [] for j in range(0, self.max_length + 1): state, stop = self.controller(state, j) self.collect_states[seq_length].append( self.copy_state(state)) candidate_outputs.append( tf.unstack(state['M'][-1][0:seq_length])) if earlystop: self.stops.append(stop) self.outputs[ seq_length] = candidate_outputs #tf.unstack(state['M'][-1][0:seq_length]) # print('sd,.kfnzdjkfbskjvbfiew', state['M']) if earlystop: for j in range(0, seq_length + 1)[::-1]: # print(self.stops[j].get_shape()) # print(j) # stop_time = tf.cond(self.stops[j][1]>self.stops[j][0], lambda: j, lambda: stop_time) cur_state = self.collect_states[seq_length][ self.collect_base + j] self.outputs[seq_length] = tf.cond( self.stops[j][1] > self.stops[j][0], lambda: tf. unstack(cur_state['M'][-1][0:seq_length]), lambda: self.outputs[seq_length]) # print(type(self.outputs[seq_length])) if type(self.outputs[seq_length]) != type([]): self.outputs[seq_length] = [self.outputs[seq_length]] if not forward_only: for seq_length in range(self.min_length, self.max_length + 1): print(" [*] Building a loss model for seq_length %s" % seq_length) all_losses = [] for index in range(self.max_length + 1): loss = sequence_loss( logits=self.outputs[seq_length][index], targets=self.true_outputs[0:seq_length], weights=[1] * seq_length, average_across_timesteps=False, average_across_batch=False, softmax_loss_function=l2_loss) all_losses.append(loss) all_losses = tf.stack(all_losses) # all_loss_diff = tf.abs(all_losses - tf.concat([[0], all_losses[:-1]], axis = 0))*1000 maxpos = tf.argmax(self.step_dist) loss1 = tf.reduce_sum( tf.expand_dims(all_losses, 0) * self.step_dist, axis=1) + 0.002 * tf.to_float(maxpos[1]) self.losses[seq_length] = loss1 if not self.params: self.params = tf.trainable_variables() print(self.params) grads = [] for grad in tf.gradients( loss1, self.params ): # + self.weight_decay*tf.add_n(tf.get_collection('l2')) if grad is not None: grads.append( tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: grads.append(grad) self.grads[seq_length] = grads with tf.variable_scope("opt", reuse=None): if not forward_only: for seq_length in range(self.min_length, self.max_length + 1): self.optims[seq_length] = self.opt.apply_gradients( zip(self.grads[seq_length], self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a PTRModel math model finished") def get_outputs(self, seq_length, index): return self.outputs[seq_length][index] def get_stop_pos(self, seq_length): # stop_pos = tf.nn.embedding_lookup(self.length2stepdist, [seq_length]) # stop_pos = tf.nn.embedding_lookup(self.length2stepdist, [seq_length]) return self.step_dist, tf.argmax(self.step_dist[0]), self.step_dist1 def get_loss(self, seq_length): if seq_length not in self.losses: _, index, _ = self.get_stop_pos(seq_length) loss = sequence_loss(logits=self.outputs[seq_length][index], targets=self.true_outputs[0:seq_length], weights=[1] * seq_length, average_across_timesteps=False, average_across_batch=False, softmax_loss_function=l2_loss) self.losses[seq_length] = loss return self.losses[seq_length] def copy_state(self, state): new_state = {} for k, v in state.items(): if k != 'seq_length': new_state[k] = v return new_state def get_collect_state(self, seq_length): return self.collect_states[seq_length] def save(self, checkpoint_dir, task_name, step): task_dir = os.path.join(checkpoint_dir, "%s" % (task_name)) file_name = "PTRModel_%s.model" % task_name if not os.path.exists(task_dir): os.makedirs(task_dir) self.saver.save(self.sess, os.path.join(task_dir, file_name), global_step=step.astype(int)) def load(self, checkpoint_dir, task_name): print(" [*] Reading checkpoints...") checkpoint_dir = os.path.join(checkpoint_dir, "%s" % (task_name)) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name)) else: raise Exception(" [!] Testing, but %s not found" % checkpoint_dir)
class PTRModel(object): def __init__(self, I, O, N, W, G, sess, batch_size=1024, story_size=1000, max_hops=3, controller_layer_size=1, controller_hidden_size=256, min_grad=-10, max_grad=+10, lr=1e-4, epsilon=0.1, weight_decay=0, scope="PTRModel", forward_only=False): self.controller = PTRCell([(N, W), (N, W)], [1, 1], [0, 0], (1, W), controller_layer_size=controller_layer_size, controller_hidden_size=controller_hidden_size, addr_mode=1) self.sess = sess self.scope = scope self.input_dim = I self.output_dim = O self.W = W self.batch_size = batch_size self.story_size = story_size self.max_hops = max_hops self.min_grad = min_grad self.max_grad = max_grad self.weight_decay = weight_decay self.outputs = [] self.saver = None self.params = None self.global_step = tf.Variable(0, trainable=False) self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon) #self.opt = tf.train.RMSPropOptimizer(lr, decay=0.0, epsilon=epsilon, momentum=0.9) #self.opt = tf.train.AdagradOptimizer(lr) self.build_model(forward_only) def build_model(self, forward_only): print("[*] Building a PTRModel QA model") self.storys = tf.placeholder(tf.int32, [self.batch_size, self.story_size, self.input_dim], name='story') self.querys = tf.placeholder(tf.int32, [self.batch_size, self.input_dim], name='query') self.labels = tf.placeholder(tf.int32, [self.batch_size], name='label') self.embedding_matrix = weight('embedding', [self.output_dim, self.W], init='xavier') self.mask = tf.ones([self.input_dim, self.W]) #weight('mask', [self.input_dim, self.W], init='xavier') self.decoding_weight = weight('decoding_weight', [self.W, self.output_dim], init='xavier') self.decoding_bias = weight('decoding_bias', [self.output_dim], init='constant') zeros = np.zeros(self.W, dtype=np.float32) with tf.variable_scope(self.scope): init_state = self.controller.init_state() ss, qs = self.embedding(self.storys, self.querys) tf.get_variable_scope().reuse_variables() for i in range(self.batch_size): progress(i/float(self.batch_size)) state = init_state for sid in range(self.story_size): input_ = ss[i, sid:sid+1, :] state = self.controller.update_memory(state, [input_, input_]) # present inputs state['R'] = qs[i:i+1, :] outputs = [] # present targets for _ in range(self.max_hops): state = self.controller(state) outputs.append(self.decode(state['R'])) #out = tf.reduce_sum(tf.concat(outputs, 0), 0, keep_dims=True) out = outputs[-1] self.outputs.append(out) if not forward_only: logits = tf.concat(self.outputs, 0) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.labels) self.loss = tf.reduce_mean(cross_entropy) predicts = tf.cast(tf.argmax(logits, 1), 'int32') corrects = tf.equal(predicts, self.labels) self.num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32)) if not self.params: self.params = tf.trainable_variables() self.grads = [] for grad in tf.gradients(self.loss, self.params): if grad is not None: self.grads.append(tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: self.grads.append(grad) with tf.variable_scope("opt", reuse=None): if not forward_only: self.optim = self.opt.apply_gradients( zip(self.grads, self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a PTRModel QA model finished") def get_outputs(self): return self.outputs def decode(self, ans): return tf.nn.relu(tf.nn.bias_add(tf.matmul(ans, self.decoding_weight), self.decoding_bias)) def embedding(self, storys, querys): list = tf.unstack(storys) # self.batch_size * [self.story_size, self.input_dim] embed_list = [] for facts in list: facts = tf.unstack(facts) # self.story_size * self.input_dim embed = tf.stack([tf.nn.embedding_lookup(self.embedding_matrix, w) * self.mask for w in facts]) # [self.story_size, self.input_dim, self.W] embed_list.append(tf.reduce_sum(embed, 1)) # self.batch_size * [self.story_size, self.W] storys_embed = tf.stack(embed_list) # [self.batch_size, self.story_size, self.W] qs = tf.unstack(querys) # self.batch_size * self.input_dim embed = tf.stack([tf.nn.embedding_lookup(self.embedding_matrix, w) * self.mask for w in qs]) # [self.batch_size, self.input_dim, self.W] querys_embed = tf.reduce_sum(embed, 1) # [self.batch_size, self.W] return storys_embed, querys_embed def save(self, checkpoint_dir, task_name, step): task_dir = os.path.join(checkpoint_dir, "%s" % (task_name)) file_name = "PTRModel_%s.model" % task_name if not os.path.exists(task_dir): os.makedirs(task_dir) self.saver.save(self.sess, os.path.join(task_dir, file_name), global_step = step.astype(int)) def load(self, checkpoint_dir, task_name): print(" [*] Reading checkpoints...") checkpoint_dir = os.path.join(checkpoint_dir, "%s" % (task_name)) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name)) else: raise Exception(" [!] Testing, but %s not found" % checkpoint_dir)