コード例 #1
0
def measurement(nKnownWords, nUnknownWords):
    corpora = MockCorpora(nKnownWords + nUnknownWords)
    # We're using theoretical distribution of real person's vocabulary
    # it would be interesting to test it later against real vocabularies
    # but for now let's proceed with it
    knownWords = set(
        numpy.random.choice(corpora.words(),
                            p=corpora.probabilities(),
                            size=nKnownWords))
    print(knownWords)
    v = Vocabulary(corpora)
    questions = v.getQuestions()
    for q in questions:
        q.setAnswer(q.word in knownWords)
    print([q.word for q in questions])
    return v.calculate(questions)