Esempio n. 1
0
    def __init__(self, flags, inputs):
        self.init_lr = flags.init_lr
        self.batch_size = flags.batch_size
        self.num_hop = flags.num_hop
        self.num_answer = flags.num_answer
        self.dim_text = flags.dim_text
        self.dim_memory = flags.dim_memory
        self.dim_mcb_output = flags.dim_mcb_output
        self.pretrain = flags.pretrain
        self.initializer = layers.xavier_initializer()
        self.initializer_conv = layers.xavier_initializer_conv2d()
        self.reg = flags.reg
        self.sharp = flags.sharp
        self.data_source = flags.data_source

        self.write = {'kernel': [], 'stride': [], 'channel': []}
        self.read = {'kernel': [], 'stride': [], 'channel': []}

        self.write_mode, self.read_mode = True, True
        if len(flags.write) == 0: self.write_mode = False
        if len(flags.read) == 0: self.read_mode = False
        if len(flags.write) == 0 and len(flags.read) == 0:
            self.baseline = True
        else: self.baseline = False

        if self.write_mode == True:
            for conv in flags.write.split('/'):
                k,s,c = conv.split('-')
                self.write['kernel'].append(int(k))
                self.write['stride'].append(int(s))
                self.write['channel'].append(int(c))

        if self.read_mode == True:
            for conv in flags.read.split('/'):
                k,s,c = conv.split('-')
                self.read['kernel'].append(int(k))
                self.read['stride'].append(int(s))
                self.read['channel'].append(int(c))

        with tf.variable_scope('inputs'):   
            self.rgb = layers.unit_norm(inputs=inputs['rgb'], dim=1, epsilon=1e-12)
            self.sub = layers.unit_norm(inputs=inputs['sub'], dim=1, epsilon=1e-12)
            self.query = layers.unit_norm(inputs=inputs['query'], dim=1, epsilon=1e-12)
            self.answer = layers.unit_norm(inputs=inputs['answer'], dim=1, epsilon=1e-12)
            self.answer_index = inputs['cor_idx']
            self.model_inputs = self.sub
            #self.model_inputs = tf.concat([self.rgb, self.sub], axis=1)

        with tf.variable_scope('memory_write'):
            self.query_w = tf.get_variable(
                    name='query_w', 
                    shape=[self.dim_text, self.dim_memory], 
                    initializer=self.initializer)

            self.query_b = tf.get_variable(
                    name='query_b', 
                    shape=[self.dim_memory], 
                    initializer=self.initializer)
Esempio n. 2
0
File: ltm.py Progetto: thientu/RWMN
    def build_model(self):
        with tf.variable_scope('query'): 
            # u.shape = (batch, dim_memory)
            self.u = tf.matmul(self.query, self.query_w) + self.query_b
            self.u = tf.reshape(self.u, shape=[-1, self.dim_memory])
            self.u = layers.unit_norm(inputs=self.u, dim=1, epsilon=1e-12)

        with tf.variable_scope('answer'):
            # g.shape = (batch, 5, dim_memory)
            self.g = tf.matmul(tf.reshape(self.answer,shape=[-1, self.dim_text]),
                        self.query_w) + self.query_b
            self.g = tf.reshape(self.g, shape=[-1, self.num_answer, self.dim_memory])
            self.g = layers.unit_norm(inputs=self.g, dim=2, epsilon=1e-12)

        with tf.variable_scope('memory'):
            self.memory = tf.matmul(self.long_cbp, self.query_w) + self.query_b
            self.memory = layers.unit_norm(inputs=self.memory, dim=1, epsilon=1e-12)
            # (N,H,W,C) style memory
            
            if self.write_mode == True:
                self.memory = self.write_network(tf.reshape(self.memory, shape=[1, -1, self.dim_memory, 1]))

            if self.read_mode == True:
                self.o = self.read_network(self.memory, self.u)

            elif self.read_mode == False:
                #----------baseline-----------
                self.att = tf.matmul(self.u, self.memory, transpose_b=True)
                self.att = tf.nn.softmax(self.sharp * self.att)
                self.o = tf.matmul(self.att, self.memory)
                #-----------------------------

            self.o = layers.unit_norm(self.o, dim=1, epsilon=1e-12)
            self.u = self.o + self.u
            self.u = layers.unit_norm(self.u, dim=1, epsilon=1e-12)
            self.u = tf.reshape(self.u, shape=[-1, self.dim_memory, 1])

        # a.shape = (batch, 1, 5)
        self.a = tf.reshape(
            tf.matmul(self.u, self.g, transpose_a=True, transpose_b=True),
            shape=[-1, self.num_answer])

        self.prob = tf.nn.softmax(self.a)
        self.answer_prediction = tf.argmax(self.prob, dimension=1)
        correct_prediction = tf.equal(self.answer_prediction, self.answer_index)
        self.acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        self.correct_examples = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))
        self.reg_loss = 0.0
        for var in tf.trainable_variables():
            self.reg_loss += tf.nn.l2_loss(var)
        self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.prob, 
                labels=self.answer_index) + self.reg * self.reg_loss
        optimizer = tf.train.AdagradOptimizer(learning_rate=self.init_lr, initial_accumulator_value=FLAGS.init_accumulator_value)
        #optimizer = tf.train.AdamOptimizer(self.init_lr, epsilon=0.1)
        self.train_op = optimizer.minimize(self.loss)
Esempio n. 3
0
File: ltm.py Progetto: thientu/RWMN
    def read_network(self, memory, query):
        # memory.shape = (m, dim_memory)
        # query.shape = (batch, dim_memory)
        print 'Read-CNN', '-'*70
        print self.read
        memory = tf.reshape(memory, shape=[1, -1, self.dim_memory, 1])

        num_layer = len(self.read['kernel'])
        for i in range(num_layer):
            with tf.variable_scope('Read-CNN-%d' % i):
                memory = layers.convolution2d(
                        inputs=memory, num_outputs=self.read['channel'][i],
                        kernel_size=[self.read['kernel'][i], self.dim_memory],
                        stride=[self.read['stride'][i], 1],
                        weights_initializer=self.initializer_conv,
                        biases_initializer=self.initializer,
                        activation_fn=tf.nn.relu)

        # output_memory = (l, dim_memory)
        # query_shape = (batch, dim_memory)
        output_memory = tf.reshape(memory, shape=[-1, self.dim_memory])
        output_memory = layers.unit_norm(inputs=output_memory, dim=1, epsilon=1e-12)
        
        # att.shape = (batch, n')
        att = tf.matmul(query, output_memory, transpose_b=True)
        att = tf.nn.softmax(self.sharp * att)
        self.att = att
        # output.shape = (batch, dim_memory)
        output = tf.matmul(att, output_memory)
        
        # output.shape = (batch, dim_memory)
        return output
Esempio n. 4
0
def unit_norm(x, dim=2):
    return layers.unit_norm(x, dim=dim, epsilon=1e-12)
Esempio n. 5
0
    def build_model(self):
        with tf.variable_scope('query'): 
            # u.shape = (batch, dim_memory)
            self.u = tf.matmul(self.query, self.query_w) + self.query_b
            self.u = tf.reshape(self.u, shape=[-1, self.dim_memory])
            self.u = layers.unit_norm(inputs=self.u, dim=1, epsilon=1e-12)

        with tf.variable_scope('answer'):
            # g.shape = (batch, 5, dim_memory)
            self.g = tf.matmul(tf.reshape(self.answer,shape=[-1, self.dim_text]),
                        self.query_w) + self.query_b
            self.g = tf.reshape(self.g, shape=[-1, self.num_answer, self.dim_memory])
            self.g = layers.unit_norm(inputs=self.g, dim=2, epsilon=1e-12)

        with tf.variable_scope('memory'):
            #video = tf.reshape(self.model_inputs[:, :-300], shape=[-1, 1, 1, FLAGS.dim_rgb])
            #text = tf.reshape(self.model_inputs[:, -300:], shape=[-1, 1, 1, 300])

            #self.memory = cbp(video, text, FLAGS.dim_mcb, sum_pool=True)
            self.memory = self.fc(inputs=self.model_inputs, num_outputs=self.dim_memory, name='E')
            self.memory = layers.unit_norm(inputs=self.memory, dim=1, epsilon=1e-12)
            # (N,H,W,C) style memory

            for _ in range(self.num_hop): 
                if self.write_mode == True:
                    self.memory = self.write_network(tf.reshape(self.memory, shape=[1, -1, self.dim_memory, 1]))

                if self.read_mode == True:
                    self.o = self.read_network(self.memory, self.u)

                elif self.read_mode == False:
                    #----------baseline-----------
                    self.att = tf.matmul(self.u, self.memory, transpose_b=True)
                    self.att = tf.nn.softmax(self.sharp * self.att)
                    self.o = tf.matmul(self.att, self.memory)
                    #-----------------------------

            self.o = layers.unit_norm(self.o, dim=1, epsilon=1e-12)
            self.u = self.o + self.u
            self.u = layers.unit_norm(self.u, dim=1, epsilon=1e-12)
            self.u = tf.reshape(self.u, shape=[-1, self.dim_memory, 1])

        # a.shape = (batch, 1, 5)
        self.a = tf.reshape(
            tf.matmul(self.u, self.g, transpose_a=True, transpose_b=True),
            shape=[-1, self.num_answer])

        self.prob = tf.nn.softmax(self.a)
        self.answer_prediction = tf.argmax(self.prob, dimension=1)
        correct_prediction = tf.equal(self.answer_prediction, self.answer_index)
        self.acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        self.correct_examples = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))
        self.reg_loss = 0.0
        for var in tf.trainable_variables():
            self.reg_loss += tf.nn.l2_loss(var)
        self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.prob, 
                labels=self.answer_index) + self.reg * self.reg_loss

        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(
            self.init_lr,
            global_step,
            FLAGS.learning_rate_decay_examples,
            FLAGS.learning_rate_decay_rate,
            staircase=True)

        optimizer = tf.train.AdagradOptimizer(learning_rate)
        #optimizer = tf.train.AdamOptimizer(learning_rate)
        self.train_op = optimizer.minimize(self.loss)