Esempio n. 1
0
def conv_block(input,
               channels,
               dropout_flag,
               dropout_rate,
               laxer_idx,
               stride_input=1,
               k_size=3,
               padding_type='SAME'):
    # Traditional 3D conv layer followed by batch norm and relu activation

    i_size = input.get_shape().as_list()[-2] / stride_input

    weights = ops.weight([k_size, k_size, k_size, channels[0], channels[1]],
                         layer_name='wcnn' + str(laxer_idx + 1),
                         reuse=tf.get_variable_scope().reuse)

    bias = ops.bias([i_size, i_size, i_size, channels[1]],
                    layer_name='bcnn' + str(laxer_idx + 1),
                    reuse=tf.get_variable_scope().reuse)

    conv_output = tf.add(
        ops.conv3d(input,
                   weights,
                   stride=[stride_input, stride_input, stride_input],
                   padding=padding_type), bias)
    conv_output = ops.batch_norm(conv_output)
    conv_output = ops.relu(conv_output)

    if dropout_flag:
        conv_output = Dropout(conv_output, keep_prob=dropout_rate)

    return conv_output
Esempio n. 2
0
def out_block(input_anc,
              input_pos,
              channels,
              laxer_idx,
              stride_input=1,
              k_size=8,
              padding_type='VALID'):

    # Last conv layer, flatten the output
    weights = ops.weight([k_size, k_size, k_size, channels[0], channels[1]],
                         layer_name='wcnn' + str(laxer_idx + 1))

    bias = ops.bias([1, 1, 1, channels[1]],
                    layer_name='bcnn' + str(laxer_idx + 1))

    conv_output_anc = tf.add(
        ops.conv3d(input_anc,
                   weights,
                   stride=[stride_input, stride_input, stride_input],
                   padding=padding_type), bias)
    conv_output_pos = tf.add(
        ops.conv3d(input_pos,
                   weights,
                   stride=[stride_input, stride_input, stride_input],
                   padding=padding_type), bias)

    conv_output_anc = ops.batch_norm(conv_output_anc)
    conv_output_pos = ops.batch_norm(conv_output_pos)

    conv_output_anc = tf.contrib.layers.flatten(conv_output_anc)
    conv_output_pos = tf.contrib.layers.flatten(conv_output_pos)

    return conv_output_anc, conv_output_pos
Esempio n. 3
0
    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            #embedding_matrix = tf.eye(self.input_dim, self.W)
            #embedding_matrix = weight('embedding', [self.input_dim, self.W], init='xavier')
            self.exp = weight('exp', [1, 1], init='constant', value=1.6)
            self.exp_weight = tf.constant(
                [[1.0]], dtype=tf.float32)  #weight('exp_weight', [1, 1])
            self.exp_bias = weight('exp_bias', [1, 1],
                                   init='constant',
                                   value=0.0)
            self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1]))

            prev_state = self.controller.init_state()

            tf.get_variable_scope().reuse_variables()
            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_1_%s' % seq_length)
                true_output = tf.placeholder(tf.float32, [self.output_dim],
                                             name='true_output_%s' %
                                             seq_length)

                self.inputs_1.append(input_1)
                self.true_outputs.append(true_output)

                # present inputs
                prev_state = self.controller.update_memory(
                    prev_state, [
                        tf.reshape(input_1, [1, -1]),
                        tf.reshape(input_1, [1, -1])
                    ])
                self.collect_states[seq_length] = self.collect_states[
                    seq_length - 1][0:(seq_length -
                                       1)] + [self.copy_state(prev_state)]

                state = prev_state
                self.prev_states[seq_length] = state

                stops = []
                candidate_outputs = []
                for j in range(self.MAX_STEP):
                    state, _ = self.controller(state, j)
                    self.collect_states[seq_length].append(
                        self.copy_state(state))
                    candidate_outputs.append(
                        tf.unstack(state['M'][-1][0:seq_length]))
                    # stops.append(state['stop'])

                self.outputs[seq_length] = candidate_outputs
                self.stops[seq_length] = stops

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)
                    all_losses = []
                    for index in range(self.MAX_STEP):
                        loss = sequence_loss(
                            logits=self.outputs[seq_length][index],
                            targets=self.true_outputs[0:seq_length],
                            weights=[1] * seq_length,
                            average_across_timesteps=False,
                            average_across_batch=False,
                            softmax_loss_function=l2_loss)
                        all_losses.append(loss)

                    all_losses = tf.stack(all_losses)

                    #step_dist = tf.nn.softmax(tf.concat(self.stops[seq_length], 1))
                    #step_dist = tf.nn.softmax(tf.nn.embedding_lookup(self.length2stepdist, [seq_length]))
                    #max_pos = tf.to_float(tf.argmax(step_dist))
                    max_pos = tf.clip_by_value(
                        self.exp_weight *
                        tf.pow(tf.to_float(seq_length), self.exp) +
                        self.exp_bias, 0, self.MAX_STEP - 1)
                    stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta)

                    loss1 = tf.reduce_sum(
                        tf.expand_dims(all_losses, 0) *
                        stop_pos) + 0.001 * tf.reduce_sum(max_pos)

                    self.losses[seq_length] = loss1

                    if not self.params:
                        self.params = tf.trainable_variables()

                    grads = []
                    for grad in tf.gradients(
                            loss1, self.params
                    ):  # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    self.optims[seq_length] = self.opt.apply_gradients(
                        zip(self.grads[seq_length], self.params),
                        global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")
    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            self.a = weight('a', [1, 1])
            # self.c = weight('c', [1, 1])
            # self.d = weight('d', [1, 1])
            self.b = weight('b', [1, 1], init='constant')
            self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1]))

            for seq_length in range(1, self.max_length * self.max_length + 1):
                true_output = tf.placeholder(
                    tf.float32, [batch_size, self.output_dim],
                    name='true_output_%s' % seq_length)
                self.true_outputs.append(true_output)

            prev_state = self.controller.init_state(
                self.true_outputs[0].get_shape()[0])

            tf.get_variable_scope().reuse_variables()

            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32,
                                         [batch_size, self.input_dim],
                                         name='input_1_%s' % seq_length)
                input_2 = tf.placeholder(tf.float32,
                                         [batch_size, self.input_dim],
                                         name='input_2_%s' % seq_length)

                self.inputs_1.append(input_1)
                self.inputs_2.append(input_2)

                # present inputs
                prev_state = self.controller.update_memory(
                    prev_state, [
                        tf.reshape(input_1, [batch_size, 1, -1]),
                        tf.reshape(input_2, [batch_size, 1, -1]),
                        tf.zeros((batch_size, 1, self.W))
                    ])
                self.collect_states[seq_length] = self.collect_states[
                    seq_length - 1][0:(seq_length -
                                       1)] + [self.copy_state(prev_state)]

                self.debug[seq_length] = []

                state = prev_state
                self.prev_states[seq_length] = state

                candidate_outputs = []
                for j in range(0, self.MAX_STEP):
                    state = self.controller(state, j)
                    new_state = self.copy_state(state)
                    self.collect_states[seq_length].append(new_state)
                    # print(state['M'][-1][:,0:(seq_length*seq_length)].get_shape())
                    candidate_outputs.append(
                        tf.unstack(
                            tf.transpose(
                                state['M'][-1][:, 0:(seq_length * seq_length)],
                                [1, 0, 2])))
                    # self.debug[seq_length].append((new_state['ptr'],new_state['dptr']))

                self.outputs[seq_length] = candidate_outputs

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)
                    print(len(self.outputs[seq_length]),
                          len(self.true_outputs[0:seq_length * seq_length]),
                          len([1] * (seq_length * seq_length)))
                    # print(self.outputs[seq_length][0].shape,self.true_outputs[0:2*seq_length][0].shape,len([1] * (2*seq_length)))
                    all_losses = []
                    for index in range(self.MAX_STEP):
                        # print(len(self.outputs[seq_length][index]), self.outputs[seq_length][index][0].get_shape())
                        # print(len(tf.unstack(self.true_outputs[0:seq_length*seq_length])), tf.unstack(self.true_outputs[0:seq_length*seq_length])[0].get_shape())

                        loss = sequence_loss(
                            logits=self.outputs[seq_length][index],
                            targets=tf.unstack(self.true_outputs[0:seq_length *
                                                                 seq_length]),
                            weights=[[1] * batch_size] * seq_length *
                            seq_length,
                            average_across_timesteps=False,
                            average_across_batch=False,
                            softmax_loss_function=l2_loss)
                        all_losses.append(loss)

                    all_losses = tf.stack(all_losses)

                    cn = tf.pow(tf.to_float(seq_length), self.a) + self.b
                    max_pos = tf.clip_by_value(cn, 0, self.MAX_STEP - 1)
                    stop_pos = D(self.MAX_STEP,
                                 tf.tile(max_pos, [batch_size, 1]), 1,
                                 self.beta)

                    loss1 = tf.reduce_sum(
                        tf.expand_dims(all_losses, 0) *
                        stop_pos) + 0.0001 * tf.reduce_sum(cn)

                    self.losses[seq_length] = loss1

                    if not self.params:
                        self.params = tf.trainable_variables()

                    grads = []
                    for grad in tf.gradients(
                            loss1, self.params
                    ):  # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    self.optims[seq_length] = self.opt.apply_gradients(
                        zip(self.grads[seq_length], self.params),
                        global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")
Esempio n. 5
0
    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            self.a = weight('a', [1,1])
            # self.c = weight('c', [1,1])
            # self.d = weight('d', [1,1])
            self.b = weight('b', [1, 1], init = 'constant')
            self.beta = 1 + tf.nn.softplus(weight('beta', [1,1]))
            prev_state = self.controller.init_state()

            tf.get_variable_scope().reuse_variables()

            for seq_length in range(1, self.max_length + 1):
                true_output = tf.placeholder(tf.float32, [self.output_dim],
                                             name='true_output_%s' % seq_length)
                self.true_outputs.append(true_output)

            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_1_%s' % seq_length)

                self.inputs_1.append(input_1)

                # present inputs
                prev_state = self.controller.update_memory(prev_state, [tf.reshape(input_1, [1, -1]),   tf.zeros((1, self.W))])
                self.collect_states[seq_length] = self.collect_states[seq_length-1][0:(seq_length-1)] + [self.copy_state(prev_state)]

                self.debug[seq_length] = []


                state = prev_state
                self.prev_states[seq_length] = state

                for j in range(seq_length):
                    state,_ = self.controller(state,j)
                    new_state = self.copy_state(state)
                    self.collect_states[seq_length].append(new_state)
                    self.debug[seq_length].append((new_state['ptr'],new_state['dptr']))


                self.outputs[seq_length] = tf.unstack(state['M'][-1][0:seq_length])

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length ):
                    print(" [*] Building a loss model for seq_length %s" % seq_length)
                    print(len(self.outputs[seq_length]),len(self.true_outputs[0:seq_length]),len([1] * (seq_length)))
                    loss = sequence_loss(logits=self.outputs[seq_length] ,
                                         targets=self.true_outputs[0:seq_length],
                                         weights=[1] * (seq_length),
                                         average_across_timesteps=False,
                                         average_across_batch=False,
                                         softmax_loss_function=l2_loss)
                    self.losses[seq_length] = loss

                    if not self.params:
                        self.params = tf.trainable_variables()

                    grads = []
                    for grad in tf.gradients(loss, self.params): # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(tf.clip_by_value(grad,
                                                          self.min_grad,
                                                          self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length ):
                    self.optims[seq_length] = self.opt.apply_gradients(
                                                           zip(self.grads[seq_length], self.params),
                                                           global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")
Esempio n. 6
0
    def build_model(self, forward_only):
        print("[*] Building a PTRModel QA model")

        self.storys = tf.placeholder(tf.int32, [self.batch_size, self.story_size, self.input_dim], name='story')
        self.querys = tf.placeholder(tf.int32, [self.batch_size, self.input_dim], name='query')
        self.labels = tf.placeholder(tf.int32, [self.batch_size], name='label')

        self.embedding_matrix = weight('embedding', [self.output_dim, self.W], init='xavier')
        self.mask = tf.ones([self.input_dim, self.W]) #weight('mask', [self.input_dim, self.W], init='xavier')
        self.decoding_weight = weight('decoding_weight', [self.W, self.output_dim], init='xavier')
        self.decoding_bias = weight('decoding_bias', [self.output_dim], init='constant')

        zeros = np.zeros(self.W, dtype=np.float32)
        with tf.variable_scope(self.scope):
            init_state = self.controller.init_state()

            ss, qs = self.embedding(self.storys, self.querys)

            tf.get_variable_scope().reuse_variables()
            for i in range(self.batch_size):
                progress(i/float(self.batch_size))

                state = init_state
                for sid in range(self.story_size):
                    input_ = ss[i, sid:sid+1, :]
                    state = self.controller.update_memory(state, [input_, input_])

                # present inputs
                state['R'] = qs[i:i+1, :]

                outputs = []
                # present targets
                for _ in range(self.max_hops):
                    state = self.controller(state)
                    outputs.append(self.decode(state['R']))
                #out = tf.reduce_sum(tf.concat(outputs, 0), 0, keep_dims=True)
                out = outputs[-1]
                self.outputs.append(out)

            if not forward_only:
                logits = tf.concat(self.outputs, 0)
                cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.labels)
                self.loss = tf.reduce_mean(cross_entropy)

                predicts = tf.cast(tf.argmax(logits, 1), 'int32')
                corrects = tf.equal(predicts, self.labels)
                self.num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))

                if not self.params:
                    self.params = tf.trainable_variables()

                self.grads = []
                for grad in tf.gradients(self.loss, self.params):
                    if grad is not None:
                        self.grads.append(tf.clip_by_value(grad,
                                                      self.min_grad,
                                                      self.max_grad))
                    else:
                        self.grads.append(grad)

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                self.optim = self.opt.apply_gradients(
                                  zip(self.grads, self.params),
                                  global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel QA model finished")