def make_vocab_files(): """ Produce the question and answer vocabulary files. """ print 'making question vocab...', config.QUESTION_VOCAB_SPACE qdic, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE) question_vocab = make_question_vocab(qdic) print 'making answer vocab...', config.ANSWER_VOCAB_SPACE _, adic = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE) answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS) return question_vocab, answer_vocab
def make_question_vocab(qdic): """ Returns a dictionary that maps words to indices. """ vdict = {'':0} vid = 1 for qid in qdic.keys(): # sequence to list q_str = qdic[qid]['qstr'] q_list = VQADataProvider.seq_to_list(q_str) # create dict for w in q_list: if not vdict.has_key(w): vdict[w] = vid vid +=1 return vdict