def save(session, name, counts): """Saves the contents of a book. Args: session (session): database session name (str): the name of the book counts (dict): unparsed words mapped to frequency counts """ logger.info(MSG_SAVING, name) db_book = db.Book(name=name) progress = Progress(len(counts)).start() for i, (stripped, (frequency, uni_word)) in enumerate(counts.iteritems()): progress.update(i) # check if word appeared in different book db_word = session.query(db.Word)\ .filter_by(hebrew=stripped)\ .first() if not db_word: # first occurrence of word ever groups = hebrew.clusters(uni_word) syllables = hebrew.syllabify(groups=groups, strict=True) syllables_hatafs = hebrew.syllabify(groups=groups, strict=False) db_word = db.Word(hebrew=stripped, shemot=hebrew.isshemot(uni_word), gematria=hebrew.gematria(stripped), syllables=str(syllables), syllen=len(syllables), syllen_hatafs=len(syllables_hatafs)) db_rel = db.Occurrence(frequency=frequency) db_rel.word = db_word db_book.words.append(db_rel) # pylint: disable=E1101 session.add(db_book) session.commit() progress.end() logger.info(MSG_SAVED, name)
def test_syllabify(self): """Expected to syllabify strings.""" test = hebrew.syllabify(u'מַת') expected = [['mem', 'patah', 'sav']] self.assertEqual(test, expected, 'simple closed syllable') test = hebrew.syllabify(u'מִי') expected = [['mem', 'hiriq-male', 'yod']] self.assertEqual(test, expected, 'simple open syllable') test = hebrew.syllabify(u'לָרֶדֶת') expected = [['lamed', 'qamats'], ['resh', 'segol'], ['dalet', 'segol', 'sav']] self.assertEqual(test, expected, 'simple word') test = hebrew.syllabify(u'בְּ/רֵאשִׁית') expected = [['bet', 'dagesh-qal', 'sheva-na', 'resh', 'tsere', 'alef'], ['shin', 'hiriq-male', 'yod', 'sav']] self.assertEqual(test, expected, 'should parse basic word') test = hebrew.syllabify(u'אֶֽעֱשֶׂהּ־') expected = [['alef', 'segol'], ['ayin', 'hataf-segol'], ['sin', 'segol', 'mapiq-he', 'mapiq']] self.assertEqual(test, expected, 'hatafs in own syllables') test = hebrew.syllabify(u'אֶֽעֱשֶׂהּ־', strict=True) expected = [['alef', 'segol'], ['ayin', 'hataf-segol', 'sin', 'segol', 'mapiq-he', 'mapiq']] self.assertEqual(test, expected, 'hatafs merged with next syllable') test = hebrew.syllabify(u'וַ/יִּתְפְּרוּ') expected = [['vav', 'patah'], ['yod', 'dagesh-hazaq', 'hiriq', 'sav', 'sheva-nah'], ['pe', 'dagesh-qal', 'sheva-na', 'resh', 'shuruq']] self.assertEqual(test, expected, 'sheva-nah does not break syllable')