コード例 #1
0
def TestVocabMapping():
    dataFile = "./dataset/samples/qa-dump-1460090355004_new.json"
    wordToIdFile = "./wordToId.json"
    idToWordFile = "./idToWord.json"
    dataProvider = DataProcessor(dataFile)
    dataProvider.BuildVocab()
    dataProvider.SaveVocab(wordToIdFile, idToWordFile)

    dataProvider.LoadVocab(wordToIdFile, idToWordFile)
    dataProvider.TranslateWordToIdPerArticle()
    data = dataProvider.data
    for title in data.keys():
        article = data[title]
        sentencesInId = article["textInSentencesInId"]
        sentencesInWordsFromId = dataProvider.TranslateIdToWord(sentencesInId)
        sentencesInWords = SentenceToWord(article["textInSentences"])
        for s0, s1 in zip(sentencesInWords, sentencesInWordsFromId):
            assert len(s0) == len(s1)
            for w0, w1 in zip(s0, s1):
                assert w0 == w1
    print "Vocab Mapping test passed!"