def measurement(nKnownWords, nUnknownWords): corpora = MockCorpora(nKnownWords + nUnknownWords) # We're using theoretical distribution of real person's vocabulary # it would be interesting to test it later against real vocabularies # but for now let's proceed with it knownWords = set( numpy.random.choice(corpora.words(), p=corpora.probabilities(), size=nKnownWords)) print(knownWords) v = Vocabulary(corpora) questions = v.getQuestions() for q in questions: q.setAnswer(q.word in knownWords) print([q.word for q in questions]) return v.calculate(questions)