Example #1
0
def save(session, name, counts):
    """Saves the contents of a book.

    Args:
        session (session): database session
        name (str): the name of the book
        counts (dict): unparsed words mapped to frequency counts
    """
    logger.info(MSG_SAVING, name)

    db_book = db.Book(name=name)

    progress = Progress(len(counts)).start()
    for i, (stripped, (frequency, uni_word)) in enumerate(counts.iteritems()):
        progress.update(i)

        # check if word appeared in different book
        db_word = session.query(db.Word)\
                         .filter_by(hebrew=stripped)\
                         .first()

        if not db_word:  # first occurrence of word ever
            groups = hebrew.clusters(uni_word)
            syllables = hebrew.syllabify(groups=groups, strict=True)
            syllables_hatafs = hebrew.syllabify(groups=groups, strict=False)
            db_word = db.Word(hebrew=stripped,
                              shemot=hebrew.isshemot(uni_word),
                              gematria=hebrew.gematria(stripped),
                              syllables=str(syllables),
                              syllen=len(syllables),
                              syllen_hatafs=len(syllables_hatafs))

        db_rel = db.Occurrence(frequency=frequency)
        db_rel.word = db_word
        db_book.words.append(db_rel)  # pylint: disable=E1101

    session.add(db_book)
    session.commit()

    progress.end()
    logger.info(MSG_SAVED, name)
Example #2
0
    def test_syllabify(self):
        """Expected to syllabify strings."""
        test = hebrew.syllabify(u'מַת')
        expected = [['mem', 'patah', 'sav']]
        self.assertEqual(test, expected, 'simple closed syllable')

        test = hebrew.syllabify(u'מִי')
        expected = [['mem', 'hiriq-male', 'yod']]
        self.assertEqual(test, expected, 'simple open syllable')

        test = hebrew.syllabify(u'לָרֶדֶת')
        expected = [['lamed', 'qamats'],
                    ['resh', 'segol'],
                    ['dalet', 'segol', 'sav']]
        self.assertEqual(test, expected, 'simple word')

        test = hebrew.syllabify(u'בְּ/רֵאשִׁית')
        expected = [['bet', 'dagesh-qal', 'sheva-na', 'resh', 'tsere', 'alef'],
                    ['shin', 'hiriq-male', 'yod', 'sav']]
        self.assertEqual(test, expected, 'should parse basic word')

        test = hebrew.syllabify(u'אֶֽעֱשֶׂהּ־')
        expected = [['alef', 'segol'],
                    ['ayin', 'hataf-segol'],
                    ['sin', 'segol', 'mapiq-he', 'mapiq']]
        self.assertEqual(test, expected, 'hatafs in own syllables')

        test = hebrew.syllabify(u'אֶֽעֱשֶׂהּ־', strict=True)
        expected = [['alef', 'segol'],
                    ['ayin', 'hataf-segol', 'sin', 'segol',
                    'mapiq-he', 'mapiq']]
        self.assertEqual(test, expected, 'hatafs merged with next syllable')

        test = hebrew.syllabify(u'וַ/יִּתְפְּרוּ')
        expected = [['vav', 'patah'],
                    ['yod', 'dagesh-hazaq', 'hiriq', 'sav', 'sheva-nah'],
                    ['pe', 'dagesh-qal', 'sheva-na', 'resh', 'shuruq']]
        self.assertEqual(test, expected, 'sheva-nah does not break syllable')