예제 #1
0
파일: vqa1.py 프로젝트: chen0040/mxnet-vqa
 def predict_answer_class(self, img_path, question):
     f = self.fe.extract_image_features(img_path)
     questions_matrix_shape = self.meta['questions_matrix_shape']
     if len(questions_matrix_shape) == 2:
         max_seq_length = questions_matrix_shape[0]
         question_matrix = np.zeros(shape=(1, max_seq_length, 300))
         words = word_tokenize(question.lower())
         for i, word in enumerate(words[0:min(max_seq_length, len(words))]):
             question_matrix[0, i, :] = self.glove_model.encode_word(word)
         input_data = [
             f.as_in_context(self.model_ctx),
             nd.array(question_matrix,
                      ctx=self.model_ctx).reshape(1, max_seq_length * 300)
         ]
         output = self.model(input_data)
         return nd.argmax(output, axis=1).astype(np.uint8).asscalar()
     else:
         words = word_tokenize(question.lower())
         E = np.zeros(shape=(300, len(words)))
         for j, word in enumerate(words):
             E[:, j] = self.glove_model.encode_word(word)
         question_matrix = np.sum(E, axis=1)
         input_data = [
             f.as_in_context(self.model_ctx),
             nd.array(question_matrix, ctx=self.model_ctx).reshape(1, 300)
         ]
         output = self.model(input_data)
         return nd.argmax(output, axis=1).astype(np.uint8).asscalar()
예제 #2
0
def get_questions_matrix_sum(data_dir_path,
                             max_lines_retrieved=-1,
                             split='val'):
    questions = get_questions(data_dir_path, max_lines_retrieved, split)
    glove_word2emb = glove_word2emb_300(data_dir_path)
    logging.debug('glove: %d words loaded', len(glove_word2emb))
    seq_list = []

    for i, question in enumerate(questions):
        words = word_tokenize(question[0].lower())
        E = np.zeros(shape=(300, len(words)))
        for j, word in enumerate(words):
            if word in glove_word2emb:
                emb = glove_word2emb[word]
                E[:, j] = emb
        E = np.sum(E, axis=1)
        if (i + 1) % 10000 == 0:
            logging.debug('loaded %d questions', i + 1)
        seq_list.append(E)
    question_matrix = np.array(seq_list)

    return question_matrix
예제 #3
0
def get_questions_matrix_concat(data_dir_path,
                                max_lines_retrieved=-1,
                                split='val',
                                max_sequence_length=-1):
    questions = get_questions(data_dir_path, max_lines_retrieved, split)
    glove_word2emb = glove_word2emb_300(data_dir_path)
    logging.debug('glove: %d words loaded', len(glove_word2emb))
    seq_list = []

    for i, question in enumerate(questions):
        words = word_tokenize(question[0].lower())
        seq = []
        for word in words:
            emb = np.zeros(shape=300)
            if word in glove_word2emb:
                emb = glove_word2emb[word]
            seq.append(emb)
        if (i + 1) % 10000 == 0:
            logging.debug('loaded %d questions', i + 1)
        seq_list.append(seq)
    question_matrix = pad_sequences(seq_list,
                                    max_sequence_length=max_sequence_length)

    return question_matrix