Example #1
0
    def __init__(self, I, O, N, W, G, sess, batch_size=1024, story_size=1000, max_hops=3, 
                 controller_layer_size=1, controller_hidden_size=256,  min_grad=-10, max_grad=+10,
                 lr=1e-4, epsilon=0.1, weight_decay=0, scope="PTRModel", forward_only=False):
        self.controller = PTRCell([(N, W), (N, W)], [1, 1], [0, 0], (1, W), controller_layer_size=controller_layer_size, controller_hidden_size=controller_hidden_size, addr_mode=1)
     
        self.sess = sess
        self.scope = scope

        self.input_dim = I
        self.output_dim = O
        self.W = W

        self.batch_size = batch_size
        self.story_size = story_size
        self.max_hops = max_hops
        self.min_grad = min_grad
        self.max_grad = max_grad
        self.weight_decay = weight_decay

        self.outputs = []

        self.saver = None
        self.params = None

        self.global_step = tf.Variable(0, trainable=False)
        self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon)
        #self.opt = tf.train.RMSPropOptimizer(lr, decay=0.0, epsilon=epsilon, momentum=0.9)
        #self.opt = tf.train.AdagradOptimizer(lr)
 
        self.build_model(forward_only)
Example #2
0
    def __init__(self,
                 I,
                 O,
                 N,
                 W,
                 K,
                 G,
                 sess,
                 min_length=1,
                 max_length=10,
                 controller_layer_size=1,
                 min_grad=-10,
                 max_grad=+10,
                 lr=1e-3,
                 epsilon=1e-8,
                 momentum=0.9,
                 decay=0.0,
                 weight_decay=0,
                 scope="PTRModel",
                 forward_only=False):
        self.controller = PTRCell([(N, W), (N, W), (N * N, W)], [1, 1, 0],
                                  [0, 0, 1], (1, W),
                                  controller_layer_size=controller_layer_size,
                                  addr_mode=0)
        self.sess = sess
        self.scope = scope

        self.input_dim = I
        self.output_dim = O
        self.W = W

        self.min_length = min_length
        self.max_length = max_length
        self.min_grad = min_grad
        self.max_grad = max_grad
        self.weight_decay = weight_decay

        self.inputs_1 = []
        self.inputs_2 = []
        self.true_outputs = []

        self.outputs = {}
        self.prev_states = {}

        self.losses = {}
        self.optims = {}
        self.grads = {}

        self.collect_states = {0: []}
        self.debug = {0: []}

        self.saver = None
        self.params = None

        self.global_step = tf.Variable(0, trainable=False)
        #self.opt = tf.train.RMSPropOptimizer(lr, decay=decay, epsilon=epsilon, momentum=momentum)
        #self.opt = tf.train.AdagradOptimizer(lr)
        self.opt = tf.train.AdamOptimizer(lr)
        #self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon)
        self.build_model(forward_only)
Example #3
0
class PTRModel(object):
    def __init__(self,
                 I,
                 O,
                 N,
                 W,
                 sess,
                 min_length=1,
                 max_length=10,
                 controller_layer_size=1,
                 min_grad=-10,
                 max_grad=+10,
                 lr=1e-3,
                 epsilon=1e-8,
                 momentum=0.9,
                 decay=0.0,
                 weight_decay=0,
                 MAX_STEP=20,
                 scope="PTRModel",
                 forward_only=False):
        self.controller = PTRCell([(N, W), (N, W)], [1, 1], [0, 1], (1, W),
                                  controller_layer_size=controller_layer_size)
        self.sess = sess
        self.scope = scope

        self.input_dim = I
        self.output_dim = O
        self.W = W

        self.min_length = min_length
        self.max_length = max_length
        self.min_grad = min_grad
        self.max_grad = max_grad
        self.weight_decay = weight_decay

        self.MAX_STEP = MAX_STEP

        self.inputs_1 = []
        self.true_outputs = []

        self.stops = {}
        self.outputs = {}
        self.prev_states = {}

        self.losses = {}
        self.optims = {}
        self.grads = {}

        self.collect_states = {0: []}

        self.saver = None
        self.params = None

        self.global_step = tf.Variable(0, trainable=False)
        #self.opt = tf.train.RMSPropOptimizer(lr, decay=decay, epsilon=epsilon, momentum=momentum)
        #self.opt = tf.train.AdagradOptimizer(lr)
        self.opt = tf.train.AdamOptimizer(lr)
        self.build_model(forward_only)

    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            #embedding_matrix = tf.eye(self.input_dim, self.W)
            #embedding_matrix = weight('embedding', [self.input_dim, self.W], init='xavier')
            self.exp = weight('exp', [1, 1], init='constant', value=1.6)
            self.exp_weight = tf.constant(
                [[1.0]], dtype=tf.float32)  #weight('exp_weight', [1, 1])
            self.exp_bias = weight('exp_bias', [1, 1],
                                   init='constant',
                                   value=0.0)
            self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1]))

            prev_state = self.controller.init_state()

            tf.get_variable_scope().reuse_variables()
            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_1_%s' % seq_length)
                true_output = tf.placeholder(tf.float32, [self.output_dim],
                                             name='true_output_%s' %
                                             seq_length)

                self.inputs_1.append(input_1)
                self.true_outputs.append(true_output)

                # present inputs
                prev_state = self.controller.update_memory(
                    prev_state, [
                        tf.reshape(input_1, [1, -1]),
                        tf.reshape(input_1, [1, -1])
                    ])
                self.collect_states[seq_length] = self.collect_states[
                    seq_length - 1][0:(seq_length -
                                       1)] + [self.copy_state(prev_state)]

                state = prev_state
                self.prev_states[seq_length] = state

                stops = []
                candidate_outputs = []
                for j in range(self.MAX_STEP):
                    state, _ = self.controller(state, j)
                    self.collect_states[seq_length].append(
                        self.copy_state(state))
                    candidate_outputs.append(
                        tf.unstack(state['M'][-1][0:seq_length]))
                    # stops.append(state['stop'])

                self.outputs[seq_length] = candidate_outputs
                self.stops[seq_length] = stops

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)
                    all_losses = []
                    for index in range(self.MAX_STEP):
                        loss = sequence_loss(
                            logits=self.outputs[seq_length][index],
                            targets=self.true_outputs[0:seq_length],
                            weights=[1] * seq_length,
                            average_across_timesteps=False,
                            average_across_batch=False,
                            softmax_loss_function=l2_loss)
                        all_losses.append(loss)

                    all_losses = tf.stack(all_losses)

                    #step_dist = tf.nn.softmax(tf.concat(self.stops[seq_length], 1))
                    #step_dist = tf.nn.softmax(tf.nn.embedding_lookup(self.length2stepdist, [seq_length]))
                    #max_pos = tf.to_float(tf.argmax(step_dist))
                    max_pos = tf.clip_by_value(
                        self.exp_weight *
                        tf.pow(tf.to_float(seq_length), self.exp) +
                        self.exp_bias, 0, self.MAX_STEP - 1)
                    stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta)

                    loss1 = tf.reduce_sum(
                        tf.expand_dims(all_losses, 0) *
                        stop_pos) + 0.001 * tf.reduce_sum(max_pos)

                    self.losses[seq_length] = loss1

                    if not self.params:
                        self.params = tf.trainable_variables()

                    grads = []
                    for grad in tf.gradients(
                            loss1, self.params
                    ):  # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    self.optims[seq_length] = self.opt.apply_gradients(
                        zip(self.grads[seq_length], self.params),
                        global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")

    def get_outputs(self, seq_length, index):
        return self.outputs[seq_length][index]

    def get_stop_pos(self, seq_length):
        #stop_pos = tf.nn.softmax(tf.nn.embedding_lookup(self.length2stepdist, [seq_length]) * 50)
        max_pos = tf.clip_by_value(
            self.exp_weight * tf.pow(tf.to_float(seq_length), self.exp) +
            self.exp_bias, 0, self.MAX_STEP - 1)
        stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta)
        return tf.argmax(
            stop_pos[0]
        ), max_pos, self.exp, self.exp_weight, self.exp_bias, self.beta

    def get_loss(self, seq_length):
        if seq_length not in self.losses:
            index, _, _, _, _, _ = self.get_stop_pos(seq_length)
            loss = sequence_loss(logits=self.outputs[seq_length][index],
                                 targets=self.true_outputs[0:seq_length],
                                 weights=[1] * seq_length,
                                 average_across_timesteps=False,
                                 average_across_batch=False,
                                 softmax_loss_function=l2_loss)

            self.losses[seq_length] = loss
        return self.losses[seq_length]

    def copy_state(self, state):
        new_state = {}
        for k, v in state.items():
            if k != 'seq_length':
                new_state[k] = v
        return new_state

    def get_collect_state(self, seq_length):
        return self.collect_states[seq_length]

    def save(self, checkpoint_dir, task_name, step):
        task_dir = os.path.join(checkpoint_dir, "%s" % (task_name))
        file_name = "PTRModel_%s.model" % task_name

        if not os.path.exists(task_dir):
            os.makedirs(task_dir)

        self.saver.save(self.sess,
                        os.path.join(task_dir, file_name),
                        global_step=step.astype(int))

    def load(self, checkpoint_dir, task_name):
        print(" [*] Reading checkpoints...")

        checkpoint_dir = os.path.join(checkpoint_dir, "%s" % (task_name))

        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            self.saver.restore(self.sess,
                               os.path.join(checkpoint_dir, ckpt_name))
        else:
            raise Exception(" [!] Testing, but %s not found" % checkpoint_dir)
Example #4
0
class PTRModel(object):
    def __init__(self, I, O, N, W, K, G, sess, min_length = 1, max_length = 12,
                 controller_layer_size=1, min_grad=-10, max_grad=+10,
                 lr=1e-3, epsilon=1e-8, momentum=0.9, decay=0.0, weight_decay=0, scope="PTRModel", forward_only=False):
        self.controller = PTRCell([(N,W), (N,W)], [3, 0], [0,  1], (1, W), controller_layer_size=controller_layer_size, addr_mode=0)
        self.sess = sess
        self.scope = scope

        self.input_dim = I
        self.output_dim = O
        self.W = W
        
        self.min_length = min_length
        self.max_length = max_length
        self.min_grad = min_grad
        self.max_grad = max_grad
        self.weight_decay = weight_decay

        self.MAX_STEP = max_length

        self.inputs_1 = []
        self.inputs_2 = []
        self.true_outputs = []

        self.outputs = {}
        self.prev_states = {}

        self.losses = {}
        self.optims = {}
        self.grads = {}

        self.collect_states = {0:[]}
        self.debug = {0:[]}

        self.saver = None
        self.params = None

        self.global_step = tf.Variable(0, trainable=False)
        #self.opt = tf.train.RMSPropOptimizer(lr, decay=decay, epsilon=epsilon, momentum=momentum)
        #self.opt = tf.train.AdagradOptimizer(lr)
        self.opt = tf.train.AdamOptimizer(lr)
        #self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon)
        self.build_model(forward_only)

    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            self.a = weight('a', [1,1])
            # self.c = weight('c', [1,1])
            # self.d = weight('d', [1,1])
            self.b = weight('b', [1, 1], init = 'constant')
            self.beta = 1 + tf.nn.softplus(weight('beta', [1,1]))
            prev_state = self.controller.init_state()

            tf.get_variable_scope().reuse_variables()

            for seq_length in range(1, self.max_length + 1):
                true_output = tf.placeholder(tf.float32, [self.output_dim],
                                             name='true_output_%s' % seq_length)
                self.true_outputs.append(true_output)

            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_1_%s' % seq_length)

                self.inputs_1.append(input_1)

                # present inputs
                prev_state = self.controller.update_memory(prev_state, [tf.reshape(input_1, [1, -1]),   tf.zeros((1, self.W))])
                self.collect_states[seq_length] = self.collect_states[seq_length-1][0:(seq_length-1)] + [self.copy_state(prev_state)]

                self.debug[seq_length] = []


                state = prev_state
                self.prev_states[seq_length] = state

                for j in range(seq_length):
                    state,_ = self.controller(state,j)
                    new_state = self.copy_state(state)
                    self.collect_states[seq_length].append(new_state)
                    self.debug[seq_length].append((new_state['ptr'],new_state['dptr']))


                self.outputs[seq_length] = tf.unstack(state['M'][-1][0:seq_length])

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length ):
                    print(" [*] Building a loss model for seq_length %s" % seq_length)
                    print(len(self.outputs[seq_length]),len(self.true_outputs[0:seq_length]),len([1] * (seq_length)))
                    loss = sequence_loss(logits=self.outputs[seq_length] ,
                                         targets=self.true_outputs[0:seq_length],
                                         weights=[1] * (seq_length),
                                         average_across_timesteps=False,
                                         average_across_batch=False,
                                         softmax_loss_function=l2_loss)
                    self.losses[seq_length] = loss

                    if not self.params:
                        self.params = tf.trainable_variables()

                    grads = []
                    for grad in tf.gradients(loss, self.params): # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(tf.clip_by_value(grad,
                                                          self.min_grad,
                                                          self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length ):
                    self.optims[seq_length] = self.opt.apply_gradients(
                                                           zip(self.grads[seq_length], self.params),
                                                           global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")

    def get_outputs(self, seq_length):
        return self.outputs[seq_length]

    def get_loss(self, seq_length):
        if seq_length not in self.losses:
            loss = sequence_loss(logits=self.outputs[seq_length],
                                 targets=self.true_outputs[0:seq_length],
                                 weights=[1] * seq_length,
                                 average_across_timesteps=False,
                                 average_across_batch=False,
                                 softmax_loss_function=l2_loss)

            self.losses[seq_length] = loss
        return self.losses[seq_length]

    def copy_state(self, state):
        new_state = {}
        for k,v in state.items():
            if k != 'seq_length':
                new_state[k] = v
        return new_state
 
    def get_collect_state(self, seq_length):
        return self.collect_states[seq_length]

    def save(self, checkpoint_dir, task_name, step):
        task_dir = os.path.join(checkpoint_dir, "%s" % (task_name))
        file_name = "PTRModel_%s.model" % task_name

        if not os.path.exists(task_dir):
            os.makedirs(task_dir)

        self.saver.save(self.sess,
                       os.path.join(task_dir, file_name),
                       global_step = step.astype(int))

    def load(self, checkpoint_dir, task_name):
        print(" [*] Reading checkpoints...")

        checkpoint_dir = os.path.join(checkpoint_dir, "%s" % (task_name))

        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
        else:
            raise Exception(" [!] Testing, but %s not found" % checkpoint_dir)
Example #5
0
class PTRModel(object):
    def __init__(self,
                 I,
                 O,
                 N,
                 W,
                 K,
                 G,
                 sess,
                 min_length=1,
                 max_length=10,
                 controller_layer_size=1,
                 min_grad=-10,
                 max_grad=+10,
                 lr=1e-3,
                 epsilon=1e-8,
                 momentum=0.9,
                 decay=0.0,
                 weight_decay=0,
                 scope="PTRModel",
                 forward_only=False):
        self.controller = PTRCell([(N, W), (N, W), (N, W)], [1, 1, 0],
                                  [0, 0, 1], (1, W),
                                  controller_layer_size=controller_layer_size,
                                  addr_mode=0)
        self.sess = sess
        self.scope = scope

        self.input_dim = I
        self.output_dim = O
        self.W = W

        self.min_length = min_length
        self.max_length = max_length
        self.min_grad = min_grad
        self.max_grad = max_grad
        self.weight_decay = weight_decay

        self.inputs_1 = []
        self.inputs_2 = []
        self.true_outputs = []

        self.outputs = {}
        self.prev_states = {}

        self.losses = {}
        self.optims = {}
        self.grads = {}

        self.collect_states = {0: []}

        self.saver = None
        self.params = None

        self.global_step = tf.Variable(0, trainable=False)
        #self.opt = tf.train.RMSPropOptimizer(lr, decay=decay, epsilon=epsilon, momentum=momentum)
        #self.opt = tf.train.AdagradOptimizer(lr)
        self.opt = tf.train.AdamOptimizer(lr)
        #self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon)
        self.build_model(forward_only)

    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            self.length2stepdist = tf.get_variable(
                'embedding', [self.max_length + 1, self.max_length + 1]
            )  # for each length we have a vector with max_length representing the step dist
            # self.length2stepdist = tf.zeros( [self.max_length+1, 1], dtype = tf.int32, name = 'embedding')   # for each length we have a vector with max_length representing the step dist
            self.beta = tf.get_variable('beta', [1], dtype=tf.float32)
            # self.step_dist = tf.nn.softmax(self.step_dist1[:self.input_seqlen+1])
            # k=100
            # fx = tf.exp(k * self.step_dist ) - 1 + 0.00001
            # sm = fx / tf.reduce_sum(fx)
            # self.step_dist = sm
            #embedding_matrix = tf.eye(self.input_dim, self.W)
            #embedding_matrix = weight('embedding', [self.input_dim, self.W], init='xavier')
            #tf.gather(embedding_matrix, tf.to_int32(input_1))
            #tf.gather(embedding_matrix, tf.to_int32(input_2))
            prev_state = self.controller.init_state()

            tf.get_variable_scope().reuse_variables()
            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_1_%s' % seq_length)
                input_2 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_2_%s' % seq_length)
                true_output = tf.placeholder(tf.float32, [self.output_dim],
                                             name='true_output_%s' %
                                             seq_length)

                self.step_dist1 = tf.nn.embedding_lookup(
                    self.length2stepdist, [seq_length])
                # self.step_dist = D(self.max_length+1, tf.to_float(self.step_dist1[0][0]), 1, self.beta[0] * self.beta[0] )

                self.step_dist = tf.nn.softmax(self.step_dist1)
                k = 50  #self.beta * self.beta
                fx = tf.exp(k * self.step_dist) - 1 + 0.00001
                sm = fx / tf.reduce_sum(fx)
                self.step_dist = sm

                self.inputs_1.append(input_1)
                self.inputs_2.append(input_2)
                self.true_outputs.append(true_output)

                # present inputs
                prev_state = self.controller.update_memory(
                    prev_state, [
                        tf.reshape(input_1, [1, -1]),
                        tf.reshape(input_2, [1, -1]),
                        tf.zeros((1, self.W))
                    ])
                self.collect_states[seq_length] = self.collect_states[
                    seq_length - 1][0:(seq_length -
                                       1)] + [self.copy_state(prev_state)]

                state = prev_state
                self.prev_states[seq_length] = state

                earlystop = False
                if earlystop:
                    self.stops = []
                    stop_time = seq_length
                    self.collect_base = len(self.collect_states[seq_length])
                    # print ('collect_base:', collect_base)

                candidate_outputs = []
                for j in range(0, self.max_length + 1):
                    state, stop = self.controller(state, j)
                    self.collect_states[seq_length].append(
                        self.copy_state(state))
                    candidate_outputs.append(
                        tf.unstack(state['M'][-1][0:seq_length]))
                    if earlystop:
                        self.stops.append(stop)

                self.outputs[
                    seq_length] = candidate_outputs  #tf.unstack(state['M'][-1][0:seq_length])

                # print('sd,.kfnzdjkfbskjvbfiew', state['M'])

                if earlystop:

                    for j in range(0, seq_length + 1)[::-1]:
                        # print(self.stops[j].get_shape())
                        # print(j)
                        # stop_time = tf.cond(self.stops[j][1]>self.stops[j][0], lambda: j, lambda: stop_time)
                        cur_state = self.collect_states[seq_length][
                            self.collect_base + j]
                        self.outputs[seq_length] = tf.cond(
                            self.stops[j][1] > self.stops[j][0], lambda: tf.
                            unstack(cur_state['M'][-1][0:seq_length]),
                            lambda: self.outputs[seq_length])

                    # print(type(self.outputs[seq_length]))
                    if type(self.outputs[seq_length]) != type([]):
                        self.outputs[seq_length] = [self.outputs[seq_length]]

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)
                    all_losses = []
                    for index in range(self.max_length + 1):
                        loss = sequence_loss(
                            logits=self.outputs[seq_length][index],
                            targets=self.true_outputs[0:seq_length],
                            weights=[1] * seq_length,
                            average_across_timesteps=False,
                            average_across_batch=False,
                            softmax_loss_function=l2_loss)
                        all_losses.append(loss)

                    all_losses = tf.stack(all_losses)

                    # all_loss_diff = tf.abs(all_losses - tf.concat([[0], all_losses[:-1]], axis = 0))*1000

                    maxpos = tf.argmax(self.step_dist)

                    loss1 = tf.reduce_sum(
                        tf.expand_dims(all_losses, 0) * self.step_dist,
                        axis=1) + 0.002 * tf.to_float(maxpos[1])

                    self.losses[seq_length] = loss1

                    if not self.params:
                        self.params = tf.trainable_variables()
                        print(self.params)

                    grads = []
                    for grad in tf.gradients(
                            loss1, self.params
                    ):  # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    self.optims[seq_length] = self.opt.apply_gradients(
                        zip(self.grads[seq_length], self.params),
                        global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")

    def get_outputs(self, seq_length, index):
        return self.outputs[seq_length][index]

    def get_stop_pos(self, seq_length):
        # stop_pos = tf.nn.embedding_lookup(self.length2stepdist, [seq_length])
        # stop_pos = tf.nn.embedding_lookup(self.length2stepdist, [seq_length])

        return self.step_dist, tf.argmax(self.step_dist[0]), self.step_dist1

    def get_loss(self, seq_length):
        if seq_length not in self.losses:
            _, index, _ = self.get_stop_pos(seq_length)
            loss = sequence_loss(logits=self.outputs[seq_length][index],
                                 targets=self.true_outputs[0:seq_length],
                                 weights=[1] * seq_length,
                                 average_across_timesteps=False,
                                 average_across_batch=False,
                                 softmax_loss_function=l2_loss)

            self.losses[seq_length] = loss
        return self.losses[seq_length]

    def copy_state(self, state):
        new_state = {}
        for k, v in state.items():
            if k != 'seq_length':
                new_state[k] = v
        return new_state

    def get_collect_state(self, seq_length):
        return self.collect_states[seq_length]

    def save(self, checkpoint_dir, task_name, step):
        task_dir = os.path.join(checkpoint_dir, "%s" % (task_name))
        file_name = "PTRModel_%s.model" % task_name

        if not os.path.exists(task_dir):
            os.makedirs(task_dir)

        self.saver.save(self.sess,
                        os.path.join(task_dir, file_name),
                        global_step=step.astype(int))

    def load(self, checkpoint_dir, task_name):
        print(" [*] Reading checkpoints...")

        checkpoint_dir = os.path.join(checkpoint_dir, "%s" % (task_name))

        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            self.saver.restore(self.sess,
                               os.path.join(checkpoint_dir, ckpt_name))
        else:
            raise Exception(" [!] Testing, but %s not found" % checkpoint_dir)
Example #6
0
class PTRModel(object):
    def __init__(self, I, O, N, W, G, sess, batch_size=1024, story_size=1000, max_hops=3, 
                 controller_layer_size=1, controller_hidden_size=256,  min_grad=-10, max_grad=+10,
                 lr=1e-4, epsilon=0.1, weight_decay=0, scope="PTRModel", forward_only=False):
        self.controller = PTRCell([(N, W), (N, W)], [1, 1], [0, 0], (1, W), controller_layer_size=controller_layer_size, controller_hidden_size=controller_hidden_size, addr_mode=1)
     
        self.sess = sess
        self.scope = scope

        self.input_dim = I
        self.output_dim = O
        self.W = W

        self.batch_size = batch_size
        self.story_size = story_size
        self.max_hops = max_hops
        self.min_grad = min_grad
        self.max_grad = max_grad
        self.weight_decay = weight_decay

        self.outputs = []

        self.saver = None
        self.params = None

        self.global_step = tf.Variable(0, trainable=False)
        self.opt = tf.train.AdamOptimizer(lr, epsilon=epsilon)
        #self.opt = tf.train.RMSPropOptimizer(lr, decay=0.0, epsilon=epsilon, momentum=0.9)
        #self.opt = tf.train.AdagradOptimizer(lr)
 
        self.build_model(forward_only)

    def build_model(self, forward_only):
        print("[*] Building a PTRModel QA model")

        self.storys = tf.placeholder(tf.int32, [self.batch_size, self.story_size, self.input_dim], name='story')
        self.querys = tf.placeholder(tf.int32, [self.batch_size, self.input_dim], name='query')
        self.labels = tf.placeholder(tf.int32, [self.batch_size], name='label')

        self.embedding_matrix = weight('embedding', [self.output_dim, self.W], init='xavier')
        self.mask = tf.ones([self.input_dim, self.W]) #weight('mask', [self.input_dim, self.W], init='xavier')
        self.decoding_weight = weight('decoding_weight', [self.W, self.output_dim], init='xavier')
        self.decoding_bias = weight('decoding_bias', [self.output_dim], init='constant')

        zeros = np.zeros(self.W, dtype=np.float32)
        with tf.variable_scope(self.scope):
            init_state = self.controller.init_state()

            ss, qs = self.embedding(self.storys, self.querys)

            tf.get_variable_scope().reuse_variables()
            for i in range(self.batch_size):
                progress(i/float(self.batch_size))

                state = init_state
                for sid in range(self.story_size):
                    input_ = ss[i, sid:sid+1, :]
                    state = self.controller.update_memory(state, [input_, input_])

                # present inputs
                state['R'] = qs[i:i+1, :]

                outputs = []
                # present targets
                for _ in range(self.max_hops):
                    state = self.controller(state)
                    outputs.append(self.decode(state['R']))
                #out = tf.reduce_sum(tf.concat(outputs, 0), 0, keep_dims=True)
                out = outputs[-1]
                self.outputs.append(out)

            if not forward_only:
                logits = tf.concat(self.outputs, 0)
                cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.labels)
                self.loss = tf.reduce_mean(cross_entropy)

                predicts = tf.cast(tf.argmax(logits, 1), 'int32')
                corrects = tf.equal(predicts, self.labels)
                self.num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))

                if not self.params:
                    self.params = tf.trainable_variables()

                self.grads = []
                for grad in tf.gradients(self.loss, self.params):
                    if grad is not None:
                        self.grads.append(tf.clip_by_value(grad,
                                                      self.min_grad,
                                                      self.max_grad))
                    else:
                        self.grads.append(grad)

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                self.optim = self.opt.apply_gradients(
                                  zip(self.grads, self.params),
                                  global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel QA model finished")
 
    def get_outputs(self):
        return self.outputs

    def decode(self, ans):
        return tf.nn.relu(tf.nn.bias_add(tf.matmul(ans, self.decoding_weight), self.decoding_bias))

    def embedding(self, storys, querys):
        list = tf.unstack(storys)  # self.batch_size * [self.story_size, self.input_dim]
        embed_list = []
        for facts in list:
            facts = tf.unstack(facts) # self.story_size * self.input_dim
            embed = tf.stack([tf.nn.embedding_lookup(self.embedding_matrix, w) * self.mask for w in facts])  # [self.story_size, self.input_dim, self.W]
            embed_list.append(tf.reduce_sum(embed, 1)) # self.batch_size * [self.story_size, self.W]
        storys_embed = tf.stack(embed_list) # [self.batch_size, self.story_size, self.W]

        qs = tf.unstack(querys) # self.batch_size * self.input_dim
        embed = tf.stack([tf.nn.embedding_lookup(self.embedding_matrix, w) * self.mask for w in qs]) # [self.batch_size, self.input_dim, self.W]
        querys_embed = tf.reduce_sum(embed, 1) # [self.batch_size, self.W]

        return storys_embed, querys_embed

    def save(self, checkpoint_dir, task_name, step):
        task_dir = os.path.join(checkpoint_dir, "%s" % (task_name))
        file_name = "PTRModel_%s.model" % task_name

        if not os.path.exists(task_dir):
            os.makedirs(task_dir)

        self.saver.save(self.sess,
                       os.path.join(task_dir, file_name),
                       global_step = step.astype(int))

    def load(self, checkpoint_dir, task_name):
        print(" [*] Reading checkpoints...")

        checkpoint_dir = os.path.join(checkpoint_dir, "%s" % (task_name))

        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
        else:
            raise Exception(" [!] Testing, but %s not found" % checkpoint_dir)