Ejemplo n.º 1
0
class preprocessing:
	def __init__(self, annotation_file=annFile, question_file=quesFile):
		self.vqar=VQA(annFile, quesFile)
		self.annIds = self.vqar.getQuesIds()
		self.anns = self.vqar.loadQA(self.annIds)  #every questions with the dictionary loaded

		self.l=[a['multiple_choice_answer'] for a in self.anns]
		self.c=collections.Counter(self.l)
		self.Selected_key=[]
		self.Selected_keys={}
		self.i=0
		for a in self.c.most_common(1000):
			self.Selected_key.extend([a[0]])
			self.Selected_keys[a[0]] = self.i
			self.i+=1

		self.Question_element=[]
		for ele in self.anns:
			if ele['multiple_choice_answer'] in self.Selected_keys.keys():
				self.Question_element.extend([ele])
		self.qqa = {}
		self.qqa = {ann['question_id']:       [] for ann in self.Question_element}
		print 'assigning questions '
		y=0
		for ques in self.vqar.questions['questions']:
			print 'done',y
			y+=1
			if ques['question_id'] in self.qqa.keys():
				self.qqa[ques['question_id']] = ques
		print 'assigning questions finished'
		ques_words=[]
		for ann in self.Question_element:
			quesId = ann['question_id']
			for words in self.qqa[quesId]['question']:
				ques_words.extend([words])
		s=collections.Counter(ques_words)
		self.Selected_ques={}
		j=0
		for a in s.most_common(5000):
			self.Selected_ques[a[0]] = j
			j+=1

		print 'elements list completed'

	def load_class_dict(self):
		return self.Selected_keys

	def load_Q_final(self):
		return self.Selected_ques
Ejemplo n.º 2
0
print("\n")
print("Overall Accuracy is: %.02f\n" %(vqaEval.accuracy['overall']))
print("Per Question Type Accuracy is the following:")
for quesType in vqaEval.accuracy['perQuestionType']:
	print("%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType]))
print("\n")
print("Per Answer Type Accuracy is the following:")
for ansType in vqaEval.accuracy['perAnswerType']:
	print("%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType]))
print("\n")
# demo how to use evalQA to retrieve low score result
evals = [quesId for quesId in vqaEval.evalQA if vqaEval.evalQA[quesId]<35]   #35 is per question percentage accuracy
if len(evals) > 0:
	print('ground truth answers')
	randomEval = random.choice(evals)
	randomAnn = vqa.loadQA(randomEval)
	vqa.showQA(randomAnn)

	print('\n')
	print('generated answer (accuracy %.02f)'%(vqaEval.evalQA[randomEval]))
	ann = vqaRes.loadQA(randomEval)[0]
	print("Answer:   %s\n" %(ann['answer']))

	imgId = randomAnn[0]['image_id']
	imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg'
	if os.path.isfile(imgDir + imgFilename):
		I = io.imread(imgDir + imgFilename)
		plt.imshow(I)
		plt.axis('off')
		plt.show()
Ejemplo n.º 3
0
print('Enter the quesTypes (\'what color\', \'is this\', ..., \'all\')')
# quesTypes = input()
quesTypes = 'what is'

if quesTypes == 'all':
    annIdsA = vqa.getQuesIds()
    tannIdsA = tvqa.getQuesIds()
    imgIdsA = vqa.getImgIds()
    timgIdsA = tvqa.getImgIds()
else:
    annIdsA = vqa.getQuesIds(quesTypes=quesTypes)
    tannIdsA = tvqa.getQuesIds(quesTypes=quesTypes)
    imgIdsA = vqa.getImgIds(quesTypes=quesTypes)
    timgIdsA = tvqa.getImgIds(quesTypes=quesTypes)

annsA = vqa.loadQA(annIdsA)
tannsA = tvqa.loadQA(tannIdsA)

if len(annsA) > TR_LIMIT_SIZE:
    annsA[TR_LIMIT_SIZE:] = []
    imgIdsA[TR_LIMIT_SIZE:] = []

if len(tannsA) > VAL_LIMIT_SIZE:
    tannsA[VAL_LIMIT_SIZE:] = []
    timgIdsA[VAL_LIMIT_SIZE:] = []

train = get_inputList(vqa, annsA)
test = get_inputList(tvqa, tannsA)
vocab = sorted(list(set(train + test)))
# Reserve 0 for masking via pad_sequences
vocab_size = len(vocab) + 1
json_read = modelReader.read()
model = model_from_json(json_read)
model.load_weights('./model_weights_100iter.h5py')

vqaVal = VQA(annFile2, quesFile2)

newdataSubType = 'analysis1'
outputQuestionFile = '%s/Questions/%s_%s_%s_questions.json' % (
    dataDir, taskType, dataType, newdataSubType)
outputAnnotationFile = '%s/Annotations/%s_%s_annotations.json' % (
    dataDir, dataType, newdataSubType)
# vqaAnalysis = vqaVal
newQuestion = 'yes'
questionIndex = 0
ids = vqaVal.getQuesIds()
anns = vqaVal.loadQA(ids)

if not os.path.exists(outputAnnotationFile) or os.stat(
        outputAnnotationFile).st_size == 0:
    outputQuestionWriter = open(outputQuestionFile, 'w')
    outputAnnotationWriter = open(outputAnnotationFile, 'w')

    outputQuestions = {}
    outputAnnotations = {}

    outputAnnotations['info'] = {}
    outputAnnotations['info'][
        'description'] = 'This is the dataset created for further analysis of the VQA task.'
    outputAnnotations['info']['url'] = ' '
    outputAnnotations['info']['version'] = '1.0'
    outputAnnotations['info']['year'] = 2015
Ejemplo n.º 5
0
dataSubType = 'train2014'
annFile = '%s/Annotations/%s_%s_annotations.json' % (dataDir, dataType,
                                                     dataSubType)
quesFile = '%s/Questions/%s_%s_%s_questions.json' % (dataDir, taskType,
                                                     dataType, dataSubType)
imgDir = '%s/%s/' % (dataDir, dataSubType)

# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)

# load and display QA annotations for given question types
"""
All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder.
"""
annIds = vqa.getQuesIds(quesTypes='how many')
anns = vqa.loadQA(annIds)
randomAnn = random.choice(anns)
vqa.showQA([randomAnn])
imgId = randomAnn['image_id']
imgFilename = 'COCO_' + dataSubType + '_' + str(imgId).zfill(12) + '.jpg'
if os.path.isfile(imgDir + imgFilename):
    I = io.imread(imgDir + imgFilename)
    plt.imshow(I)
    plt.axis('off')
    plt.show()

# load and display QA annotations for given answer types
"""
ansTypes can be one of the following
yes/no
number
Ejemplo n.º 6
0
            break
    return pad_sequences(rX, maxlen=ques_maxlen), pad_sequences(rY, maxlen=ans_maxlen)


print("Enter the quesTypes ('what color', 'is this', ..., 'all')")
# quesTypes = input()
quesTypes = "is this"

if quesTypes == "all":
    annIdsA = vqa.getQuesIds()
    tannIdsA = tvqa.getQuesIds()
else:
    annIdsA = vqa.getQuesIds(quesTypes=quesTypes)
    tannIdsA = tvqa.getQuesIds(quesTypes=quesTypes)

annsA = vqa.loadQA(annIdsA)
tannsA = tvqa.loadQA(tannIdsA)

train = get_inputList(vqa, annsA)
test = get_inputList(tvqa, tannsA)
vocab = sorted(list(set(train + test)))
# Reserve 0 for masking via pad_sequences
vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

train = get_inputVec(vqa, annsA)
test = get_inputVec(tvqa, tannsA)

ques_maxlen = max(map(len, (x for x, _ in train + test)))
ans_maxlen = max(map(len, (x for _, x in train + test)))
Ejemplo n.º 7
0
dataSubType = 'train2014'
qtype = ['what color','what is on the','what sport is']
# path to images 
data_dir = '/srv/share/data/mscoco/coco/images/train2014/'
model = '/home/ashwin/caffe/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
prototxt = '/home/ashwin/caffe/models/bvlc_reference_caffenet/deploy.prototxt'
# load QAs
vqa = VQA(annFile, quesFile) 
# add question type

annIds = []
anns = []
ids = []
for qitem in qtype:
  annIds = vqa.getQuesIds(quesTypes= qtype)
  anns.extend(vqa.loadQA(annIds))
  ids.extend(vqa.getImgIds(quesTypes = qtype))

UIDs = list(np.unique(np.array(ids)))

# extract fc7 features
caffe.set_mode_gpu()
caffe.set_device(1)
net = caffe.Net(prototxt,model,caffe.TEST)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel
transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB
net.blobs['data'].reshape(1,3,227,227)
def main():
    glove_word_vec_file = './../glove/glove.6B.300d.txt'
    word_vec_dict = readGloveData(glove_word_vec_file)
    vqaTrain = VQA(annFile, quesFile)
    annotations = vqaTrain.dataset['annotations']
    questions = vqaTrain.questions['questions']
    answerFeatures = createAnswerFeatures(annotations)

    # Dumping answer features
    answer_features_list = open('answer_feature_list.json', 'w')
    answer_features_list.write(json.dumps(answerFeatures))

    # For getting image vectors
    imageDict = pramod.generateDictionary(tfile)
    feats = sio.loadmat('./../features/coco/vgg_feats.mat')['feats']

    data = []
    X_train = []
    Y_train = []
    X_test = []
    Y_test = []
    FILE_INDEX = 0
    for question in questions:
        # quesItem = {}
        # print question
        questionVector = getBOWVector(
            question['question'].strip().replace('?', ' ?').split(),
            word_vec_dict)
        imgID = question['image_id']
        imageVector = np.asarray(feats[:, imageDict[imgID]])
        # quesItem['image_id'] = imgID
        # quesItem['question'] = question['question'].replace('?', ' ?').split(' ')
        annotations = vqaTrain.loadQA(ids=[question['question_id']])
        for annotation in annotations:
            ansString = annotation['multiple_choice_answer']
            answerVector = getAnswerVector(ansString, answerFeatures)
            temp_X_train = np.append(imageVector, questionVector)
            temp_Y_train = answerVector
            X_train.append(temp_X_train)
            Y_train.append(temp_Y_train)
            if len(X_train) >= FILE_LIMIT:
                train_x_file = open(
                    FILE_PATH + X_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy',
                    'w')
                train_y_file = open(
                    FILE_PATH + Y_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy',
                    'w')
                np.save(train_x_file, X_train)
                np.save(train_y_file, Y_train)
                X_train = []
                Y_train = []
                FILE_INDEX = FILE_INDEX + 1
            # print len(X_train)
        # if len(annotations) != 1:
        # print imgID, " has annotations ", len(annotations)

        # for ann in annotations:
        # quesItemCopy = dict(quesItem)
        # ansString = ann['multiple_choice_answer']
        # quesItemCopy['answer'] = ansString
        # data.append(quesItemCopy)
    if len(X_train) > 0:
        train_x_file = open(
            FILE_PATH + X_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w')
        train_y_file = open(
            FILE_PATH + Y_TRAIN_FILE_NAME + str(FILE_INDEX) + '.npy', 'w')
        np.save(train_x_file, X_train)
        np.save(train_y_file, Y_train)
        X_train = []
        Y_train = []
Ejemplo n.º 9
0
Archivo: vqaDemo.py Proyecto: caomw/VQA
taskType = "OpenEnded"
dataType = "mscoco"  # 'mscoco' for real and 'abstract_v002' for abstract
dataSubType = "train2014"
annFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, dataSubType)
quesFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, dataSubType)
imgDir = "%s/Images/%s/%s/" % (dataDir, dataType, dataSubType)

# initialize VQA api for QA annotations
vqa = VQA(annFile, quesFile)

# load and display QA annotations for given question types
"""
All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder.
"""
annIds = vqa.getQuesIds(quesTypes="how many")
anns = vqa.loadQA(annIds)
randomAnn = random.choice(anns)
vqa.showQA([randomAnn])
imgId = randomAnn["image_id"]
imgFilename = "COCO_" + dataSubType + "_" + str(imgId).zfill(12) + ".jpg"
if os.path.isfile(imgDir + imgFilename):
    I = io.imread(imgDir + imgFilename)
    plt.imshow(I)
    plt.axis("off")
    plt.show()

# load and display QA annotations for given answer types
"""
ansTypes can be one of the following
yes/no
number
Ejemplo n.º 10
0
class COCODataset(Dataset):
    
    def __init__(self, vocab, answers, rootDir='../../data2', dataSubType='train2014'):
        
        annFile ='{}/v2_mscoco_{}_annotations.json'.format(rootDir, dataSubType)
        quesFile ='{}/v2_OpenEnded_mscoco_{}_questions.json'.format(rootDir, dataSubType)
        self.vqa = VQA(annFile, quesFile)
        self.imgDir = '{}/{}'.format(rootDir, dataSubType)
        self.vocab = vocab
        self.answers = answers
        self.quesIds = self.vqa.getQuesIds()
        self.dataSubType = dataSubType
        self.transform = transforms.Compose([
                         transforms.Resize(299),
                         transforms.CenterCrop(299),
                         transforms.ToTensor(),
                         transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                              std=[0.229, 0.224, 0.225]),
                         ])
        
    def __len__(self):
        return len(self.quesIds)
        
    def __getitem__(self, index):
        
        """
        returns:
            question: tensor of word-indices
            transformed image: tensor of shape (3, 299, 299)
            answers: tensor of indices mapped to 3000 most frequently occurring answers
            answers not found among 300 most frequently occurring answers are eliminated
        """
        
        quesId = self.quesIds[index]
        
        img_id = self.vqa.qqa[quesId]['image_id'] 
        img_id = str(img_id).zfill(12)
        path = 'COCO_{}_{}.jpg'.format(self.dataSubType, img_id)
#         print(os.path.join(self.imgDir, path))
        image = Image.open(os.path.join(self.imgDir, path)).convert('RGB')

        image = self.transform(image)
            
            
        # Convert question to word ids
        vocab = self.vocab
        question = self.vqa.qqa[quesId]['question']
#         print(question)
        
        tokens = nltk.tokenize.word_tokenize(question.lower())
        question_list = []
        question_list.append(vocab('<start>'))
        question_list.extend([vocab(token) for token in tokens])
        question_list.append(vocab('<end>'))
        question_tensor = torch.Tensor(question_list)
        
        qa = self.vqa.loadQA(quesId)
        
        ans_list = [a['answer'] for a in qa[0]['answers']]
#         print(ans_list)
        
        ans_index_list = [self.answers(ans) for ans in ans_list] #if ans in self.answers.ans2idx.keys()]
        answer_tensor = torch.Tensor(ans_index_list)
        
        return question_tensor, image, answer_tensor     

    def subset(self, fraction=0.5, count=None, shuffle=True):
        '''
        give subset of certain fraction/count
        prioritizes count
        '''
        if not count:
            count = int(len(self.quesIds) * fraction)
        print('Getting subset of length', count, 'out of', len(self))
        subset = copy.deepcopy(self)
        if shuffle: random.shuffle(subset.quesIds)
        subset.quesIds = subset.quesIds[:count]
        return subset
Ejemplo n.º 11
0
class DataReader:
    
    def __init__(self, data_type = TRAIN_DATA_TYPE, shape = (224,224)):
        """
        shape为输出图像数据的shape
        data_type为需导入的数据集的类型
        """
        self.data_type = data_type
        annFile='{}\\annotations\\{}{}_{}_annotations.json'.format(DATA_PATH,VERSION_TYPE,DATA_TYPE,self.data_type)
        quesFile ='{}\\Questions\\{}{}_{}_{}_questions.json'.format(DATA_PATH,VERSION_TYPE,TASK_TYPE,DATA_TYPE,self.data_type)
        self.vqa = VQA(annFile, quesFile)
        self.img_ids = list(self.vqa.imgToQA.keys())
        self.pos = 0
        self.shape = shape
        questions = self.vqa.getQuestionsFile()
        questions = questions['questions']
        # qf为通过id索引查找question的字典
        self.qf = dict()
        for q in questions:
            self.qf[q["question_id"]] = q["question"]

    def get_pic_data(self,pic_id):
        """
        获取图像数据
        pic_id:图像的id
        return numpy三维数组
        """
        imgFilename = 'COCO_' + self.data_type + '_'+ str(pic_id).zfill(12) + '.jpg'
        path = '{}\\images\\{}\\'.format(DATA_PATH,self.data_type)
        img = Image.open(path+imgFilename)
        img = img.resize(self.shape)
        return numpy.array(img)

    def get_pic_qa(self,pic_id):
        """
        获取图像的问题和回答
        return ['question_type':str,'question_id':num,'answers':[{'answer':str,'answer_confidence':'yes'|'maybe'|'no','answer_id':num}...]]
        """
        q = self.vqa.getQuesIds(imgIds=[pic_id])
        qas = self.vqa.loadQA(q)
        result = []
        for qa in qas:
            q = self.get_question(qa['question_id'])
            result.append({"question":q,"answers":qa["answers"]})
        return result

    def set_pos(self,pos = 0):
        """
        设置当前读取游标
        """
        self.pos = pos if pos < len(self.img_ids) else len(self.img_ids) + 1

    def get_pos(self):
        """
        获取当前pos
        """
        return self.pos

    def get_next_pic_id(self):
        """
        获取下一张图片的id(即当前游标所在图像的id)
        并且使索引+1
        """
        img_id = self.img_ids[self.pos]
        self.pos = self.pos + 1 if not self.pos + 1 == len(self.img_ids) else 0
        return img_id

    def get_question(self,question_id):
        return self.qf[question_id]
Ejemplo n.º 12
0
modelReader = open("./model_definition_100iter.json")
json_read = modelReader.read()
model = model_from_json(json_read)
model.load_weights("./model_weights_100iter.h5py")

vqaVal = VQA(annFile2, quesFile2)


newdataSubType = "analysis1"
outputQuestionFile = "%s/Questions/%s_%s_%s_questions.json" % (dataDir, taskType, dataType, newdataSubType)
outputAnnotationFile = "%s/Annotations/%s_%s_annotations.json" % (dataDir, dataType, newdataSubType)
# vqaAnalysis = vqaVal
newQuestion = "yes"
questionIndex = 0
ids = vqaVal.getQuesIds()
anns = vqaVal.loadQA(ids)


if not os.path.exists(outputAnnotationFile) or os.stat(outputAnnotationFile).st_size == 0:
    outputQuestionWriter = open(outputQuestionFile, "w")
    outputAnnotationWriter = open(outputAnnotationFile, "w")

    outputQuestions = {}
    outputAnnotations = {}

    outputAnnotations["info"] = {}
    outputAnnotations["info"]["description"] = "This is the dataset created for further analysis of the VQA task."
    outputAnnotations["info"]["url"] = " "
    outputAnnotations["info"]["version"] = "1.0"
    outputAnnotations["info"]["year"] = 2015
    outputAnnotations["info"]["contributor"] = "vishaal"