Beispiel #1
0
 def get_stop_pos(self, seq_length):
     #stop_pos = tf.nn.softmax(tf.nn.embedding_lookup(self.length2stepdist, [seq_length]) * 50)
     max_pos = tf.clip_by_value(
         self.c * tf.pow(tf.to_float(seq_length), self.a) + self.b, 0,
         self.MAX_STEP - 1)
     stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta)
     return tf.argmax(stop_pos[0]), max_pos, self.a, self.b, self.c
Beispiel #2
0
 def get_stop_pos(self, seq_length):
     max_pos = tf.clip_by_value(
         self.a * (tf.to_float(seq_length)**2) +
         self.b * tf.to_float(seq_length) + self.c, 0, self.MAX_STEP - 1)
     stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta)
     return tf.argmax(
         stop_pos[0]), max_pos, self.a, self.b, self.c, self.beta
Beispiel #3
0
    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            #embedding_matrix = tf.eye(self.input_dim, self.W)
            #embedding_matrix = weight('embedding', [self.input_dim, self.W], init='xavier')
            self.exp = weight('exp', [1, 1], init='constant', value=1.6)
            self.exp_weight = tf.constant(
                [[1.0]], dtype=tf.float32)  #weight('exp_weight', [1, 1])
            self.exp_bias = weight('exp_bias', [1, 1],
                                   init='constant',
                                   value=0.0)
            self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1]))

            prev_state = self.controller.init_state()

            tf.get_variable_scope().reuse_variables()
            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_1_%s' % seq_length)
                true_output = tf.placeholder(tf.float32, [self.output_dim],
                                             name='true_output_%s' %
                                             seq_length)

                self.inputs_1.append(input_1)
                self.true_outputs.append(true_output)

                # present inputs
                prev_state = self.controller.update_memory(
                    prev_state, [
                        tf.reshape(input_1, [1, -1]),
                        tf.reshape(input_1, [1, -1])
                    ])
                self.collect_states[seq_length] = self.collect_states[
                    seq_length - 1][0:(seq_length -
                                       1)] + [self.copy_state(prev_state)]

                state = prev_state
                self.prev_states[seq_length] = state

                stops = []
                candidate_outputs = []
                for j in range(self.MAX_STEP):
                    state, _ = self.controller(state, j)
                    self.collect_states[seq_length].append(
                        self.copy_state(state))
                    candidate_outputs.append(
                        tf.unstack(state['M'][-1][0:seq_length]))
                    # stops.append(state['stop'])

                self.outputs[seq_length] = candidate_outputs
                self.stops[seq_length] = stops

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)
                    all_losses = []
                    for index in range(self.MAX_STEP):
                        loss = sequence_loss(
                            logits=self.outputs[seq_length][index],
                            targets=self.true_outputs[0:seq_length],
                            weights=[1] * seq_length,
                            average_across_timesteps=False,
                            average_across_batch=False,
                            softmax_loss_function=l2_loss)
                        all_losses.append(loss)

                    all_losses = tf.stack(all_losses)

                    #step_dist = tf.nn.softmax(tf.concat(self.stops[seq_length], 1))
                    #step_dist = tf.nn.softmax(tf.nn.embedding_lookup(self.length2stepdist, [seq_length]))
                    #max_pos = tf.to_float(tf.argmax(step_dist))
                    max_pos = tf.clip_by_value(
                        self.exp_weight *
                        tf.pow(tf.to_float(seq_length), self.exp) +
                        self.exp_bias, 0, self.MAX_STEP - 1)
                    stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta)

                    loss1 = tf.reduce_sum(
                        tf.expand_dims(all_losses, 0) *
                        stop_pos) + 0.001 * tf.reduce_sum(max_pos)

                    self.losses[seq_length] = loss1

                    if not self.params:
                        self.params = tf.trainable_variables()

                    grads = []
                    for grad in tf.gradients(
                            loss1, self.params
                    ):  # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    self.optims[seq_length] = self.opt.apply_gradients(
                        zip(self.grads[seq_length], self.params),
                        global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")
Beispiel #4
0
    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            self.a = weight('a', [1, 1], init='constant', value=1)
            self.c = weight('c', [1, 1], init='constant', value=2)
            # self.d = weight('d', [1,1])
            self.b = weight('b', [1, 1], init='constant')
            self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1]))
            prev_state = self.controller.init_state()

            tf.get_variable_scope().reuse_variables()

            for seq_length in range(1, self.max_length + 1):
                true_output = tf.placeholder(tf.float32, [self.output_dim],
                                             name='true_output_%s' %
                                             seq_length)
                self.true_outputs.append(true_output)

            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_1_%s' % seq_length)

                self.inputs_1.append(input_1)

                # present inputs
                prev_state = self.controller.update_memory(
                    prev_state,
                    [tf.reshape(input_1, [1, -1]),
                     tf.zeros((1, self.W))])
                self.collect_states[seq_length] = self.collect_states[
                    seq_length - 1][0:(seq_length -
                                       1)] + [self.copy_state(prev_state)]

                self.debug[seq_length] = []

                state = prev_state
                self.prev_states[seq_length] = state

                candidate_outputs = []
                for j in range(self.MAX_STEP):
                    state, _ = self.controller(state, j)
                    new_state = self.copy_state(state)
                    self.collect_states[seq_length].append(new_state)
                    candidate_outputs.append(
                        tf.unstack(state['M'][-1][0:seq_length]))
                    self.debug[seq_length].append(
                        (new_state['ptr'], new_state['dptr']))

                self.outputs[seq_length] = candidate_outputs

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)
                    print(len(self.outputs[seq_length]),
                          len(self.true_outputs[0:seq_length]),
                          len([1] * (seq_length)))
                    # print(self.outputs[seq_length][0].shape,self.true_outputs[0:2*seq_length][0].shape,len([1] * (2*seq_length)))
                    all_losses = []
                    for index in range(self.MAX_STEP):
                        loss = sequence_loss(
                            logits=self.outputs[seq_length][index],
                            targets=self.true_outputs[0:seq_length],
                            weights=[1] * (seq_length),
                            average_across_timesteps=False,
                            average_across_batch=False,
                            softmax_loss_function=l2_loss)
                        all_losses.append(loss)
                    all_losses = tf.stack(all_losses)
                    cn = self.c * tf.pow(tf.to_float(seq_length),
                                         self.a) + self.b
                    max_pos = tf.clip_by_value(cn, 0, self.MAX_STEP - 1)
                    stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta)

                    loss1 = tf.reduce_sum(
                        tf.expand_dims(all_losses, 0) *
                        stop_pos) + 0.001 * tf.reduce_sum(cn)

                    self.losses[seq_length] = loss1

                    if not self.params:
                        self.params = tf.trainable_variables()

                    grads = []
                    for grad in tf.gradients(
                            loss1, self.params
                    ):  # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length):
                    self.optims[seq_length] = self.opt.apply_gradients(
                        zip(self.grads[seq_length], self.params),
                        global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")