def test_basic(self): s = PhraseSequencer(self.corpus) a = s.sequence('a') b = s.sequence('b') c = s.sequence('c') self.assertEqual(0, a) self.assertEqual(1, b) self.assertEqual(2, c) self.assertEqual(a, s.sequence('a')) self.assertEqual(b, s.sequence('b')) self.assertEqual(c, s.sequence('c'))
def test_persistence(self): s1 = PhraseSequencer(self.corpus) a = s1.sequence('a') b = s1.sequence('b') c = s1.sequence('c') # new sequencer shouldn't see updates that haven't been persisted # note: should never do this in practice--should only ever be one # active sequencer per corpus. s2 = PhraseSequencer(self.corpus) self.assertEqual(0, s2.sequence('a')) s1.upload_new_phrases() self.assertEqual(1, s1.sequence('b')) # existing phrases still present self.assertEqual(3, s1.sequence('d')) # new phrases can still be added s3 = PhraseSequencer(self.corpus) self.assertEqual(2, s3.sequence('c')) # previously uploaded phrase appears self.assertEqual(3, s3.sequence('e')) # but not d=3, which wasn't uploaded s4 = PhraseSequencer(Corpus()) self.assertEqual(0, s4.sequence('f')) # sequencer with different corpus doesn't show at all