def TestVocabMapping(): dataFile = "./dataset/samples/qa-dump-1460090355004_new.json" wordToIdFile = "./wordToId.json" idToWordFile = "./idToWord.json" dataProvider = DataProcessor(dataFile) dataProvider.BuildVocab() dataProvider.SaveVocab(wordToIdFile, idToWordFile) dataProvider.LoadVocab(wordToIdFile, idToWordFile) dataProvider.TranslateWordToIdPerArticle() data = dataProvider.data for title in data.keys(): article = data[title] sentencesInId = article["textInSentencesInId"] sentencesInWordsFromId = dataProvider.TranslateIdToWord(sentencesInId) sentencesInWords = SentenceToWord(article["textInSentences"]) for s0, s1 in zip(sentencesInWords, sentencesInWordsFromId): assert len(s0) == len(s1) for w0, w1 in zip(s0, s1): assert w0 == w1 print "Vocab Mapping test passed!"