Beispiel #1
0
 def __init__(self):
     # 获取字典
     self.dealer = DataDealer(ANSWERS_DICT_PATH)
     if not os.path.exists(LSTM_MODEL_PATH):
         exit()
     self.model_word2vec = gensim.models.KeyedVectors.load_word2vec_format(
         GLOVE_WIKI_GENSIM_DATA_PATH)
     self.img = None
     self.path = '.\\'
     YukiVisual.__init__(self)
Beispiel #2
0
 def __init__(self):
     # 获取字典
     self.dealer = DataDealer(ANSWERS_DICT_PATH)
     # 获取样本集信息
     self.reader = DataReader(TRAIN_DATA_TYPE)
     self.reader.set_pos()
     self.weight_vgg = None
     self.biase_vgg = None
     self.model_word2vec = gensim.models.KeyedVectors.load_word2vec_format(
         GLOVE_WIKI_GENSIM_DATA_PATH)
Beispiel #3
0
def createQuestionsDict():
    """
    创建问题字典(包含回答字典)
    """
    reader = DataReader()
    reader.set_pos()
    dealer = DataDealer(ANSWERS_DICT_PATH)
    start_id = reader.get_next_pic_id()
    qa = reader.get_pic_qa(start_id)
    for q in qa:
        question = q['question']
        dealer.deal(question)
    now_id = reader.get_next_pic_id()
    i = 0
    while now_id != start_id:
        qa = reader.get_pic_qa(now_id)
        for q in qa:
            question = q['question']
            dealer.deal(question)
        now_id = reader.get_next_pic_id()
        i = i + 1
        if i % 1000 == 0:
            print('*', end='')
    dealer.saveData(QUESTIONS_DICT_PATH)
    print('over!')
Beispiel #4
0
def createAnswersDict():
    """
    创建回答字典
    """
    reader = DataReader()
    reader.set_pos()
    dealer = DataDealer(ANSWERS_DICT_PATH)
    start_id = reader.get_next_pic_id()
    qa = reader.get_pic_qa(start_id)
    for q in qa:
        answers = dict()
        for a in q['answers']:
            answer = a['answer']
            weight = 0
            if a['answer_confidence'] == 'yes':
                weight = 1
            elif a['answer_confidence'] == 'maybe':
                weight = 0.5
            if not answer in answers.keys():
                answers[answer] = 0
            answers[answer] = answers[answer] + weight
            answers_list = []
        for key in answers.keys():
            if answers[key] >= 3:
                dealer.deal(key)
    now_id = reader.get_next_pic_id()
    i = 0
    while now_id != start_id:
        qa = reader.get_pic_qa(now_id)
        for q in qa:
            answers = dict()
            for a in q['answers']:
                answer = a['answer']
                weight = 0
                if a['answer_confidence'] == 'yes':
                    weight = 1
                elif a['answer_confidence'] == 'maybe':
                    weight = 0.5
                if not answer in answers.keys():
                    answers[answer] = 0
                answers[answer] = answers[answer] + weight
                answers_list = []
            for key in answers.keys():
                if answers[key] >= 3:
                    dealer.deal(key)
        now_id = reader.get_next_pic_id()
        i = i + 1
        if i % 1000 == 0:
            print('*', end='')
    dealer.saveData()
    print('over!')
Beispiel #5
0
 def __init__(self):
     # 获取字典
     self.dealer = DataDealer(ANSWERS_DICT_PATH)
Beispiel #6
0
class TrainNetForVQA:
    
    def __init__(self):
        # 获取字典
        self.dealer = DataDealer(ANSWERS_DICT_PATH)

    def __loadBatch(self,capacity,batch_size,file_name):
        """
        读取tfrecords的资料
        batch_size为一次取出样本数量,capacity为队列的容量
        """
        def parse(example):
            features = tf.parse_single_example(example,features={      
                                               'answer': tf.FixedLenFeature([self.dealer.getWordNum()], tf.int64),                                                                 
                                               'question' : tf.FixedLenFeature([QUESTION_MAX_LEN * 300], tf.float32),
                                               'img':tf.FixedLenFeature([7*7*512], tf.float32),
                                               'question_len' : tf.FixedLenFeature([1], tf.int64) }) 
            answer = tf.cast(features['answer'],dtype = tf.float32)
            question = tf.cast(features['question'],dtype = tf.float32)
            img = tf.cast(features['img'],dtype = tf.float32)
            question_len = tf.cast(features['question_len'],dtype = tf.int64)
            return answer,question,img,question_len
        for root, dirs, files in os.walk(os.path.dirname(file_name)):  
            pass
        for i in range(len(files)):
            files[i] = os.path.dirname(file_name) + '\\' + files[i]
        dataset = tf.data.TFRecordDataset([files[random.randint(0,len(files)-1)]])
        dataset = dataset.map(parse).repeat().batch(batch_size).shuffle(buffer_size=capacity)
        iterator = dataset.make_one_shot_iterator()
        label_batch,question_batch,img_batch,question_len_batch = iterator.get_next()
        return label_batch,question_batch,img_batch,question_len_batch

    def __get_variables_to_restore(self,str):
      return [v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if str in v.name]

    def train(self,batch_size,iterate_time,learning_rate):
        """
        训练网络
        """
        model_path = LSTM_MODEL_PATH + LSTM_MODEL_NAME       
        # 生成随机权值
        weights = {
        'w_pic': tf.get_variable('weight_of_pic',initializer=tf.random_normal([512,512])),
        'w_q': tf.get_variable('weight_of_question',initializer=tf.random_normal([300,300])),
        'w_q_out': tf.get_variable('weight_of_question_output',initializer=tf.random_normal([300,512])),
        'w_pos': tf.get_variable('pos_weight_of_question_output',initializer=tf.random_normal([300,49])),
        'w_pic_in': tf.get_variable('weight_of_pic_in_lstm',initializer=tf.random_normal([512,300])),
        'w_q_in': tf.get_variable('weight_of_question_in_lstm',initializer=tf.random_normal([300,300])),
        'out': tf.get_variable('wo',initializer=tf.random_normal([300,self.dealer.getWordNum()]))
        }
        biases = {
            'b_pic': tf.get_variable('biase_of_pic',initializer=tf.random_normal([512,])),
            'b_q': tf.get_variable('biase_of_question',initializer=tf.random_normal([300,])),
            'b_q_out': tf.get_variable('biase_of_question_output',initializer=tf.random_normal([512,])),
            'b_pos': tf.get_variable('pos_biase_of_question_output',initializer=tf.random_normal([49,])),
            'b_pic_in': tf.get_variable('biase_of_pic_in_lstm',initializer=tf.random_normal([300,])),
            'b_q_in': tf.get_variable('biase_of_question_in_lstm',initializer=tf.random_normal([300,])),
            'out': tf.get_variable('bo',initializer=tf.random_normal([self.dealer.getWordNum(),]))
        }

        # 导入数据
        label_batch,question_batch,img_batch,question_len_batch = self.__loadBatch(iterate_time,batch_size,TRAIN_BATCH_PATH)
        # 获取图像数据矩阵
        img_batch = tf.reshape(img_batch,[batch_size*7*7,512])
        img_batch = tf.add(tf.matmul(img_batch, weights['w_pic']), biases['b_pic'])
        img_batch = tf.nn.leaky_relu(img_batch)
        # 空间序矩阵batch_size,49,512
        img_batch = tf.reshape(img_batch,[batch_size,7*7,512])

        # 问题长度
        question_len_batch = tf.reshape(question_len_batch,[batch_size])
        label_batch = tf.reshape(label_batch,[batch_size,self.dealer.getWordNum()])
        question_batch = tf.reshape(question_batch,[batch_size*QUESTION_MAX_LEN,300])
        data = tf.add(tf.matmul(question_batch, weights['w_q']), biases['b_q'])
        data_batch = tf.reshape(data,[batch_size,-1,300])
        # 问题矩阵序列构建
        data_batch = tf.nn.leaky_relu(data_batch)

        # 进入LSTM网络训练

        # LSTM网络得到问题特征
        question_lstm_cell = tf.nn.rnn_cell.LSTMCell(300,name = 'QuestionLSTMCell')
        # 设置dropout
        question_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(question_lstm_cell, input_keep_prob=0.7, output_keep_prob=0.7)
        # 初始状态为0
        q_init_state = question_lstm_cell.zero_state(batch_size, dtype=tf.float32)
        question_outputs, question_states = tf.nn.dynamic_rnn(question_lstm_cell, data_batch, initial_state=q_init_state, dtype = tf.float32,sequence_length = question_len_batch)
        question_output = question_states.h
        # 从问题序列中提取出来的特征
        question_output = tf.reshape(question_output,(batch_size,300))

        # 使用问题提取的特征生成空间权重 [batch_size,49]
        pos_weight = tf.add(tf.matmul(question_output, weights['w_pos']), biases['b_pos'])
        pos_weight = tf.reshape(pos_weight,[1,batch_size*49])
        # sigmoid规范化,这个权值在预想中是用于遗忘空间不需要的特征的
        pos_weight = tf.sigmoid(pos_weight)
        img_batch = tf.transpose(img_batch,(2,0,1))
        img_batch = tf.reshape(img_batch,(512,batch_size*7*7))
        img_batch = img_batch * pos_weight
        img_batch = tf.reshape(img_batch,(512,batch_size,7*7))
        img_batch = tf.transpose(img_batch,(1,2,0))

        # 使用问题提取的特征生成问题权重 [batch_size,512]
        question_weight = tf.add(tf.matmul(question_output, weights['w_q_out']), biases['b_q_out'])
        question_weight = tf.reshape(question_weight,[1,batch_size*512])
        # sigmoid规范化,这个权值在预想中是用于遗忘物体不需要的特征的
        question_weight = tf.sigmoid(question_weight)
        # 遗忘部分物体特征
        img_batch = tf.transpose(img_batch,(1,0,2))
        img_batch = tf.reshape(img_batch,(7*7,batch_size*512))
        img_batch = img_batch * question_weight
        img_batch = tf.reshape(img_batch,[7*7,batch_size,512])
        img_batch = tf.transpose(img_batch,(1,0,2))

        img_lstm_cell = tf.nn.rnn_cell.LSTMCell(512,name = 'PicLSTMCell')
        # 设置dropout
        img_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(img_lstm_cell, input_keep_prob=0.7, output_keep_prob=0.7)
        # 初始状态为0
        img_init_state = img_lstm_cell.zero_state(batch_size, dtype=tf.float32)
        img_outputs, img_states = tf.nn.dynamic_rnn(img_lstm_cell, img_batch, initial_state=img_init_state, dtype = tf.float32)
        img_output = img_states.h

        img_in = tf.add(tf.matmul(img_output, weights['w_pic_in']), biases['b_pic_in'])
        img_in = tf.reshape(img_in,[batch_size,1,300])
        q_in = tf.add(tf.matmul(question_batch, weights['w_q_in']), biases['b_q_in'])
        q_in = tf.reshape(q_in,[batch_size,QUESTION_MAX_LEN,300])
        data_add = tf.concat(axis = 1,values = [img_in,q_in])
        # 约束大小
        data_add = tf.sigmoid(data_add)
        lstm_cell = tf.nn.rnn_cell.LSTMCell(300,name = 'LSTMCell')
        init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
        question_len_batch = question_len_batch + 1
        outputs, states = tf.nn.dynamic_rnn(lstm_cell, data_add, initial_state=init_state, dtype = tf.float32,sequence_length = question_len_batch)

        pred = tf.add(tf.matmul(states.h, weights['out']), biases['out'])
        pred = tf.sigmoid(pred)

        # 防止梯度爆炸
        pred = tf.clip_by_value(pred,1e-7,1.0-1e-7)

        # 计算交叉熵
        # 由于正例远小于负例,使用激励系数
        up = 5
        loss = -(tf.log(pred)*label_batch*up + (1 - label_batch)*tf.log(1 - pred))
        loss = tf.reduce_mean(loss,name = 'loss')

        # 计算准确率
        accuracy = tf.abs(pred - label_batch)
        # print(accuracy)
        accuracy = 1 - tf.reduce_mean(accuracy,name = 'accuracy')

        # 建立优化器 随机梯度下降
        optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)

        # 减少误差,提升准确度
        train = optimizer.minimize(loss)

        saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))

        with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
            # 输入变量
            init = tf.group(tf.global_variables_initializer())
            sess.run(init)
            if os.path.exists(LSTM_MODEL_PATH):
                # 变量替换
                saver.restore(sess, model_path)            
            for m in range(iterate_time):
                sess.run(train)
                ac,lo = sess.run([accuracy,loss])    
                if not iterate_time % (m+1) == 0:
                    continue
                print('loss:',end = '')
                print(lo)
                print('accuracy:',end = '')
                print(ac)
            if not os.path.exists(LSTM_MODEL_PATH):
                os.makedirs(LSTM_MODEL_PATH)
            save_path = saver.save(sess, model_path)
            print("Model saved in file: %s" % save_path)
Beispiel #7
0
class Tester(YukiVisual):
    def __init__(self):
        # 获取字典
        self.dealer = DataDealer(ANSWERS_DICT_PATH)
        if not os.path.exists(LSTM_MODEL_PATH):
            exit()
        self.model_word2vec = gensim.models.KeyedVectors.load_word2vec_format(
            GLOVE_WIKI_GENSIM_DATA_PATH)
        self.img = None
        self.path = '.\\'
        YukiVisual.__init__(self)

    def __cd(self, path_):
        """
        成功进入目标文件夹返回True,否则返回False
        """
        # 创建目录
        if not os.path.exists(self.path + path_):
            return False
        else:
            self.path = self.path + path_ + '\\'
        return True

    def __op(self, image_name):
        """
        成功打开图像返回True,否则返回False
        """
        if not os.path.exists(self.path + image_name):
            return False
        else:
            self.img = Image.open(self.path + image_name)
            self.img.show()
            self.img = self.img.resize((224, 224))
            self.img = numpy.array(self.img)
        return True

    def __getAnswer(self, question):
        """
        获取question对应的答案
        """
        try:
            self.img.shape
        except:
            return "未打开有效图片!"

        # 重新设置图
        tf.reset_default_graph()

        img_batch = tf.Variable(self.img, dtype=tf.float32)
        img_batch = tf.reshape(img_batch, [1, 224, 224, 3])
        # 获取VGG19网络得到的结果
        img_batch = self.__getVGG19Result(img_batch)

        # 重新设置图
        tf.reset_default_graph()

        img_batch = tf.cast(img_batch, tf.float32)

        question = question.replace('\n', '')
        question = question.replace('?', '')
        question = question.replace(',', ' ,')
        question = question.replace('.', ' .')
        question = question.split(' ')
        data = []
        for word in question:
            # data.shape = (len(question),300)
            try:
                data.append(list(self.model_word2vec[word]))
            except:
                data.append([0] * 300)
        question_batch = tf.cast(data, tf.float32)

        model_path = LSTM_MODEL_PATH + LSTM_MODEL_NAME
        # 生成随机权值
        weights = {
            'w_pic':
            tf.get_variable('weight_of_pic',
                            initializer=tf.random_normal([512, 512])),
            'w_q':
            tf.get_variable('weight_of_question',
                            initializer=tf.random_normal([300, 300])),
            'w_q_out':
            tf.get_variable('weight_of_question_output',
                            initializer=tf.random_normal([300, 512])),
            'w_pos':
            tf.get_variable('pos_weight_of_question_output',
                            initializer=tf.random_normal([300, 49])),
            'w_pic_in':
            tf.get_variable('weight_of_pic_in_lstm',
                            initializer=tf.random_normal([512, 300])),
            'w_q_in':
            tf.get_variable('weight_of_question_in_lstm',
                            initializer=tf.random_normal([300, 300])),
            'out':
            tf.get_variable('wo',
                            initializer=tf.random_normal(
                                [300, self.dealer.getWordNum()]))
        }
        biases = {
            'b_pic':
            tf.get_variable('biase_of_pic',
                            initializer=tf.random_normal([
                                512,
                            ])),
            'b_q':
            tf.get_variable('biase_of_question',
                            initializer=tf.random_normal([
                                300,
                            ])),
            'b_q_out':
            tf.get_variable('biase_of_question_output',
                            initializer=tf.random_normal([
                                512,
                            ])),
            'b_pos':
            tf.get_variable('pos_biase_of_question_output',
                            initializer=tf.random_normal([
                                49,
                            ])),
            'b_pic_in':
            tf.get_variable('biase_of_pic_in_lstm',
                            initializer=tf.random_normal([
                                300,
                            ])),
            'b_q_in':
            tf.get_variable('biase_of_question_in_lstm',
                            initializer=tf.random_normal([
                                300,
                            ])),
            'out':
            tf.get_variable('bo',
                            initializer=tf.random_normal([
                                self.dealer.getWordNum(),
                            ]))
        }

        # 获取图像数据矩阵
        img_batch = tf.reshape(img_batch, [7 * 7, 512])
        img_batch = tf.add(tf.matmul(img_batch, weights['w_pic']),
                           biases['b_pic'])
        img_batch = tf.nn.leaky_relu(img_batch)
        # 空间序矩阵1,49,512
        img_batch = tf.reshape(img_batch, [1, 7 * 7, 512])

        question_batch = tf.reshape(question_batch, [1 * len(question), 300])
        data = tf.add(tf.matmul(question_batch, weights['w_q']), biases['b_q'])
        data_batch = tf.reshape(data, [1, -1, 300])
        # 问题矩阵序列构建
        data_batch = tf.nn.leaky_relu(data_batch)

        # 进入LSTM网络训练

        # LSTM网络得到问题特征
        question_lstm_cell = tf.nn.rnn_cell.LSTMCell(300,
                                                     name='QuestionLSTMCell')
        # 设置dropout
        question_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
            question_lstm_cell, input_keep_prob=0.7, output_keep_prob=0.7)
        # 初始状态为0
        q_init_state = question_lstm_cell.zero_state(1, dtype=tf.float32)
        question_outputs, question_states = tf.nn.dynamic_rnn(
            question_lstm_cell,
            data_batch,
            initial_state=q_init_state,
            dtype=tf.float32)
        question_output = question_states.h
        # 从问题序列中提取出来的特征
        question_output = tf.reshape(question_output, (1, 300))

        # 使用问题提取的特征生成空间权重 [1,49]
        pos_weight = tf.add(tf.matmul(question_output, weights['w_pos']),
                            biases['b_pos'])
        pos_weight = tf.reshape(pos_weight, [1, 1 * 49])
        # sigmoid规范化,这个权值在预想中是用于遗忘空间不需要的特征的
        pos_weight = tf.sigmoid(pos_weight)
        img_batch = tf.transpose(img_batch, (2, 0, 1))
        img_batch = tf.reshape(img_batch, (512, 1 * 7 * 7))
        img_batch = img_batch * pos_weight
        img_batch = tf.reshape(img_batch, (512, 1, 7 * 7))
        img_batch = tf.transpose(img_batch, (1, 2, 0))

        # 使用问题提取的特征生成问题权重 [1,512]
        question_weight = tf.add(
            tf.matmul(question_output, weights['w_q_out']), biases['b_q_out'])
        question_weight = tf.reshape(question_weight, [1, 1 * 512])
        # sigmoid规范化,这个权值在预想中是用于遗忘物体不需要的特征的
        question_weight = tf.sigmoid(question_weight)
        # 遗忘部分物体特征
        img_batch = tf.transpose(img_batch, (1, 0, 2))
        img_batch = tf.reshape(img_batch, (7 * 7, 1 * 512))
        img_batch = img_batch * question_weight
        img_batch = tf.reshape(img_batch, [7 * 7, 1, 512])
        img_batch = tf.transpose(img_batch, (1, 0, 2))

        img_lstm_cell = tf.nn.rnn_cell.LSTMCell(512, name='PicLSTMCell')
        # 设置dropout
        img_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(img_lstm_cell,
                                                      input_keep_prob=0.7,
                                                      output_keep_prob=0.7)
        # 初始状态为0
        img_init_state = img_lstm_cell.zero_state(1, dtype=tf.float32)
        img_outputs, img_states = tf.nn.dynamic_rnn(
            img_lstm_cell,
            img_batch,
            initial_state=img_init_state,
            dtype=tf.float32)
        img_output = img_states.h

        img_in = tf.add(tf.matmul(img_output, weights['w_pic_in']),
                        biases['b_pic_in'])
        img_in = tf.reshape(img_in, [1, 1, 300])
        q_in = tf.add(tf.matmul(question_batch, weights['w_q_in']),
                      biases['b_q_in'])
        q_in = tf.reshape(q_in, [1, len(question), 300])
        data_add = tf.concat(axis=1, values=[img_in, q_in])
        # 约束大小
        data_add = tf.sigmoid(data_add)
        lstm_cell = tf.nn.rnn_cell.LSTMCell(300, name='LSTMCell')
        init_state = lstm_cell.zero_state(1, dtype=tf.float32)
        outputs, states = tf.nn.dynamic_rnn(lstm_cell,
                                            data_add,
                                            initial_state=init_state,
                                            dtype=tf.float32)

        pred = tf.add(tf.matmul(states.h, weights['out']), biases['out'])
        pred = tf.sigmoid(pred)

        # 获取saver
        saver = tf.train.Saver(
            tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
        with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
                allow_growth=True))) as sess:
            # 输入变量
            init = tf.group(tf.global_variables_initializer())
            sess.run(init)
            # 变量替换
            saver.restore(sess, model_path)
            re = sess.run(pred)
        return list(re)[0]

    def __getVGG19Result(self, img_batch):
        """
        获取VGG19网络的结果
        img_batch:tf.Variable->[1,224,224,3]
        返回最后一层隐藏层的一维列表
        """
        # 导入VGG19模型
        model = VGG19model()
        weight_vgg, biase_vgg = model.loadWeightsAndBiases(
            VGG19_WEIGHTS_AND_BIASE_PATH, False)
        # 这里输出的是最后一个隐藏层
        out = model.getNet(img_batch, weight_vgg, biase_vgg, 0.2, True)
        init = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init)
            data = sess.run(out)
            # 一维列表
            data = data[0]
        return data

    def send_message(self, text):
        text_list = self.received_text.split()
        if text_list[0] == 'cd':
            if self.__cd(text_list[1]):
                YukiVisual.send_message(self, 'now in ' + self.path + '\n')
            else:
                YukiVisual.send_message(self, 'wrong!\n')
        elif text_list[0] == 'op':
            if self.__op(text_list[1]):
                YukiVisual.send_message(
                    self, 'success!\n Please ask me questions.\n')
            else:
                YukiVisual.send_message(self, 'wrong!\n')
        elif text_list[0] == 'ls':
            a = str()
            for i in os.listdir(self.path):
                a = a + i + '\n '
            YukiVisual.send_message(self, a)
        else:
            data = self.__getAnswer(self.received_text)
            if not type(data) == numpy.ndarray:
                YukiVisual.send_message(self, 'wrong!\n')
                return
            data = list(data)
            sort_list = data[:]
            sort_list.sort()

            # print(sum(sort_list)/len(sort_list))
            sort_list = sort_list[-5:]
            YukiVisual.send_message(self, 'Top Five Answer:\n')
            for i in range(len(data)):
                if data[i] in sort_list:
                    YukiVisual.send_message(
                        self, '*' * int(1 + data[i] * 10) +
                        ' ' * int(11 - data[i] * 10) + self.dealer.getWord(i) +
                        '      ' + str(data[i]) + '\n')
Beispiel #8
0
    def trainOne(self, batch_size, learning_rate, rounds=10):
        """
        batch_size:图片数
        learning_rate:学习率
        rounds:运算次数
        """
        img_batch, label_batch, bbox_batch = self.dealer.getRandomTrainBatch(
            batch_size)

        if not os.path.exists(os.path.dirname(self.model_path)):
            # os.makedirs(os.path.dirname(self.model_path))
            RESTORE = False
        else:
            RESTORE = True

        weights = {
            'down':
            tf.compat.v1.get_variable(name='w_down', shape=[1, 1, 2048,
                                                            1024]),  # 降采样
            'feature':
            tf.compat.v1.get_variable(name='w_feature',
                                      shape=[1, 1, 1024, K * K * 2])
        }
        biases = {
            'down':
            tf.compat.v1.get_variable(name='b_down', shape=[
                1024,
            ]),  # 降采样
            'feature':
            tf.compat.v1.get_variable(name='b_feature', shape=[
                K * K * 2,
            ])
        }

        for index in range(len(img_batch)):
            img = img_batch[index]
            label = label_batch[index]
            bbox = bbox_batch[index]

            rpn_view = [None] * 9
            for i in range(9):
                rpn_view[i] = tf.constant(label[i],
                                          dtype=tf.float32)  # 高宽比1:1 1:2 2:1

            h, w, mod = numpy.shape(img)
            img = tf.constant(img, shape=(1, h, w, mod),
                              dtype=tf.float32)  # 图像原始数据

            # 使用无pool1&pool5的RESNET 101
            net, endpoints = my_resnet(img,
                                       global_pool=False,
                                       num_classes=None,
                                       is_training=True,
                                       reuse=tf.compat.v1.AUTO_REUSE
                                       )  # net's w&h = original_img's w&h / 8

            net = tf.nn.conv2d(input=net,
                               filter=weights['down'],
                               strides=[1, 1, 1, 1],
                               padding='VALID')
            net = tf.add(net, biases['down'])

            # 训练RPN网络
            rpn_accuracy, rpn_result = trainRPN(net, rpn_view)

            # 生成feature_map
            feature_map = tf.nn.conv2d(input=net,
                                       filter=weights['feature'],
                                       strides=[1, 1, 1, 1],
                                       padding='VALID')
            feature_map = tf.add(feature_map, biases['feature'])

            # 获取选取的anchors index
            select = DataDealer.chooseClassficationData(label)

            select_img = []
            select_label = []
            select_bbox = []
            anchor_type = [[
                int(x[0] / NET_SCALE / K),
                int(x[1] / NET_SCALE / K)
            ] for x in ANTHORS_TYPIES]
            for s in select:
                img_ = feature_map[0, s[1]:(s[1] + anchor_type[s[0]][1] * K),
                                   s[2]:(s[2] + anchor_type[s[0]][0] * K)]
                img_ = tf.expand_dims(img_, 0)
                select_img.append(
                    tf.nn.avg_pool2d(
                        img_,
                        [1, anchor_type[s[0]][1], anchor_type[s[0]][0], 1],
                        [1, anchor_type[s[0]][1], anchor_type[s[0]][0], 1],
                        padding='VALID'))
                select_label.append(label[s[0]][s[1]][s[2]])
                select_bbox.append(bbox[s[0]][s[1]][s[2]])

            # 训练bounding_box
            bbox_accuracy = trainBbox(select, select_img, select_bbox,
                                      select_label)

        # 建立优化器 随机梯度下降
        optimizer = tf.compat.v1.train.GradientDescentOptimizer(
            learning_rate=learning_rate)

        # 减少误差,提升准确度
        train = optimizer.minimize(tf.compat.v1.losses.get_total_loss())

        saver = tf.compat.v1.train.Saver(
            tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES))

        with tf.compat.v1.Session() as sess:
            init = tf.compat.v1.global_variables_initializer()
            sess.run(init)

            if RESTORE:
                saver.restore(sess, self.model_path)
            else:
                slim.assign_from_checkpoint_fn(
                    self.init_model_path,
                    slim.get_variables_to_restore(
                    ),  # 第一次path = RES_NET_101 后来:model_path
                    ignore_missing_vars=False,
                    reshape_variables=False)
            for i in range(rounds):
                if i == 0:
                    ac = sess.run([rpn_accuracy, bbox_accuracy])
                sess.run(train)
                # print(sess.run([rpn_accuracy,bbox_accuracy,tf.compat.v1.losses.get_total_loss()]))
            if not RESTORE:
                os.makedirs(os.path.dirname(self.model_path))
            saver.save(sess, self.model_path)

        return ac
Beispiel #9
0
 def __init__(self, model_path=RPN_BATCH_PATH, init_model_path=RES_NET_101):
     self.dealer = DataDealer()
     self.model_path = model_path
     self.init_model_path = init_model_path
Beispiel #10
0
class BatchMaker:
    def __init__(self):
        # 获取字典
        self.dealer = DataDealer(ANSWERS_DICT_PATH)
        # 获取样本集信息
        self.reader = DataReader(TRAIN_DATA_TYPE)
        self.reader.set_pos()
        self.weight_vgg = None
        self.biase_vgg = None
        self.model_word2vec = gensim.models.KeyedVectors.load_word2vec_format(
            GLOVE_WIKI_GENSIM_DATA_PATH)

    def __getOnehot(self, pos, value, len_):
        """
        获取onehot标签
        位置为pos[i]的值为value[i]
        其他为0
        pos和value的长度必须相等
        返回列表,长度为len_
        """
        if type(pos) != list:
            pos = [pos]
        result = []
        for i in range(len_):
            if i in pos:
                result.append(value)
            else:
                result.append(0)
        return result

    def __getVGG19Result(self, img_batch):
        """
        获取VGG19网络的结果
        img_batch:tf.Variable->[1,224,224,3]
        返回最后一层隐藏层的一维列表
        """
        # 导入VGG19模型
        model = VGG19model()
        if self.weight_vgg == None and self.biase_vgg == None:
            self.weight_vgg, self.biase_vgg = model.loadWeightsAndBiases(
                VGG19_WEIGHTS_AND_BIASE_PATH, False)
        # 这里输出的是最后一个隐藏层
        out = model.getNet(img_batch, self.weight_vgg, self.biase_vgg, 0.2,
                           True)
        init = tf.global_variables_initializer()
        with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
                allow_growth=True))) as sess:
            sess.run(init)
            data = sess.run(out)
        return data[0]

    def makeTrainBatch(self, start_pos=0, end_pos=0):
        """
        制作训练集
        """
        # 保存位置TRAIN_BATCH_PATH
        # 速度很慢...
        if os.path.exists(os.path.dirname(TRAIN_BATCH_PATH)) == False:
            os.mkdir(os.path.dirname(TRAIN_BATCH_PATH))
        path = TRAIN_BATCH_PATH.split('.')
        writer = tf.python_io.TFRecordWriter(path[0] + str(start_pos) + '_' +
                                             str(end_pos) + '.' + path[1])
        self.reader.set_pos(start_pos)
        # 导入VGG19模型
        model = VGG19model()
        # img_batch为占位符
        img_batch = tf.placeholder(dtype=tf.float32,
                                   shape=[224, 224, 3],
                                   name='IMG')
        img_batch_1 = tf.reshape(img_batch, [1, 224, 224, 3])
        weight_vgg, biase_vgg = model.loadWeightsAndBiases(
            VGG19_WEIGHTS_AND_BIASE_PATH, False)
        # 这里输出的是最后的池化层
        out = model.getNet(img_batch_1, weight_vgg, biase_vgg, 0, True)
        out = tf.reshape(out, [1, 7 * 7 * 512])
        init = tf.global_variables_initializer()
        with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
                allow_growth=True))) as sess:
            sess.run(init)
            print('running...')
            while self.reader.get_pos() < end_pos:
                now_id = self.reader.get_next_pic_id()
                img = self.reader.get_pic_data(now_id)
                # 是否为黑白图判定
                if len(img.shape) == 2:
                    continue
                img_data = sess.run(out, feed_dict={img_batch: img})
                img_data = img_data.tolist()[0]
                qa = self.reader.get_pic_qa(now_id)
                for q in qa:
                    question = q['question']
                    question = question.replace('?', '')
                    question = question.replace(',', ' ,')
                    question = question.replace('.', ' .')
                    question = question.split(' ')
                    answers = dict()
                    confidences = []
                    for a in q['answers']:
                        # 判断条件->对该回答的信心程度,'yes'加权1,'maybe'加权0.5,三分以上为yes
                        answer = a['answer']
                        weight = 0
                        if a['answer_confidence'] == 'yes':
                            weight = 1
                        elif a['answer_confidence'] == 'maybe':
                            weight = 0.5
                        if not answer in answers.keys():
                            answers[answer] = 0
                        answers[answer] = answers[answer] + weight
                    answers_list = []
                    for key in answers.keys():
                        if answers[key] >= 3:
                            answers_list.append(self.dealer.deal(key)[1])
                    # 若这个问题没有正确回答,则跳过
                    if len(answers_list) == 0:
                        continue
                    label = self.__getOnehot(answers_list, 1,
                                             self.dealer.getWordNum())
                    data = []
                    for word in question:
                        # data.shape = (len(question)*300)
                        try:
                            data = data + list(self.model_word2vec[word])
                        except:
                            # 识别不出的填0
                            # 例如问号
                            data = data + [0] * 300
                        else:
                            pass
                    data = data + [0] * (
                        (QUESTION_MAX_LEN - len(question)) * 300)
                    example = tf.train.Example(features=tf.train.Features(
                        feature={
                            "answer":
                            tf.train.Feature(
                                int64_list=tf.train.Int64List(value=label)
                            ),  # len = self.dealer.getWordNum()
                            "question":
                            tf.train.Feature(float_list=tf.train.FloatList(
                                value=data)),  # len = len(question)*300
                            'img':
                            tf.train.Feature(float_list=tf.train.FloatList(
                                value=img_data)),  # len = 7*7*512
                            'question_len':
                            tf.train.Feature(int64_list=tf.train.Int64List(
                                value=[len(question)]))  # len = 1
                        }))
                    writer.write(example.SerializeToString())
            writer.close()
            print('over!')