コード例 #1
0
ファイル: schema.py プロジェクト: dakom/Sefaria-Project
    def toNumber(self, lang, s):
        if lang == "en":
            try:
                if s[-1] in ["a", "b"]:
                    amud = s[-1]
                    daf = int(s[:-1])
                else:
                    amud = "a"
                    daf = int(s)
            except ValueError:
                raise InputError(u"Couldn't parse Talmud reference: {}".format(s))

            if self.length and daf > self.length:
                #todo: Catch this above and put the book name on it.  Proably change Exception type.
                raise InputError(u"{} exceeds max of {} dafs.".format(daf, self.length))

            indx = daf * 2
            if amud == "a":
                indx -= 1
            return indx
        elif lang == "he":
            num = re.split("[.:,\s]", s)[0]
            daf = decode_hebrew_numeral(num) * 2
            if s[-1] == ":" or (
                    s[-1] == u"\u05d1"    #bet
                        and
                    ((len(s) > 2 and s[-2] in ", ")  # simple bet
                     or (len(s) > 4 and s[-3] == u'\u05e2')  # ayin"bet
                     or (len(s) > 5 and s[-4] == u"\u05e2")  # ayin''bet
                    )
            ):
                return daf  # amud B
            return daf - 1
コード例 #2
0
 def test_encodes_and_decodes_correctly(self):
     for x in range(1, 5000):
         if x in (2000, 3000, 4000, 5000):
             # known ambiguity with single thousands above 1000
             pass
         else:
             assert x == h.decode_hebrew_numeral(h.encode_hebrew_numeral(x))
コード例 #3
0
 def test_encodes_and_decodes_correctly(self):
     for x in range(1, 5000):
         if x in (2000, 3000, 4000, 5000):
             # known ambiguity with single thousands above 1000
             pass
         else:
             assert x == h.decode_hebrew_numeral(h.encode_hebrew_numeral(x))
コード例 #4
0
    def toNumber(self, lang, s):
        if lang == "en":
            try:
                if s[-1] in ["a", "b"]:
                    amud = s[-1]
                    daf = int(s[:-1])
                else:
                    amud = "a"
                    daf = int(s)
            except ValueError:
                raise InputError(
                    u"Couldn't parse Talmud reference: {}".format(s))

            if self.length and daf > self.length:
                #todo: Catch this above and put the book name on it.  Proably change Exception type.
                raise InputError(u"{} exceeds max of {} dafs.".format(
                    daf, self.length))

            indx = daf * 2
            if amud == "a":
                indx -= 1
            return indx
        elif lang == "he":
            num = re.split("[.:,\s]", s)[0]
            daf = decode_hebrew_numeral(num) * 2
            if s[-1] == ":" or (
                    s[-1] == u"\u05d1"  #bet
                    and ((len(s) > 2 and s[-2] in ", ")  # simple bet
                         or (len(s) > 4 and s[-3] == u'\u05e2')  # ayin"bet
                         or (len(s) > 5 and s[-4] == u"\u05e2")  # ayin''bet
                         )):
                return daf  # amud B
            return daf - 1
コード例 #5
0
ファイル: chizkuni.py プロジェクト: smontagu/Sefaria-Data
def parse_text():
    """
    Takes the result of strip_tags() and parses into a level four data structure for easy upload

    :return: Dictionary of books, depth 4.
    """

    # initiate data structure and variables
    full_text, chapters, verses, raw_text = {}, {}, {}, u''
    current_book, current_chapter, current_verse = u'', u'', u''

    to_parse = codecs.open('chizkuni_no-tags.txt', 'r', 'utf-8')

    for line in to_parse:

        # if new book add book to full_text.
        if line.find(u'<book>') != -1:

            # if this is the first book, do nothing
            if current_book != u'':

                # set up book and add it to full_text
                verses[current_verse] = process_verse(raw_text)
                chapters[current_chapter] = convertDictToArray(verses)
                full_text[current_book] = convertDictToArray(chapters)

                # reset verses and chapters
                chapters, verses, raw_text = {}, {} ,u''
                current_chapter, current_verse = u'', u''

            # save the next book as current_book
            current_book = removeAllStrings([u'\n', u'\r', u' '], to_parse.readline())

        # if new chapter, add verses to previous chapter
        elif line.find(u'<perek>') != -1:

            # if first chapter, set current chapter but do nothing else
            if current_chapter != u'':

                verses[current_verse] = process_verse(raw_text)
                chapters[current_chapter] = convertDictToArray(verses)
                verses, raw_text = {}, u''

            # get next chapter number
            current_chapter = removeAllStrings([u'.', u'\n'], to_parse.readline())
            current_chapter = decode_hebrew_numeral(current_chapter)
            current_verse = u''

        # if new verse, process raw text and add to verses
        elif line.find(u'<pasuk>') != -1:

            # add previous verse if not first verse
            if current_verse != u'':
                verses[current_verse] = process_verse(raw_text)
                raw_text = u''

            # get next verse number
            current_verse = removeAllStrings([u'.', u'\n'], to_parse.readline())
            current_verse = decode_hebrew_numeral(current_verse)

        # don't include parsha tags
        elif line.find(u'<parsha>') != -1:
            continue

        else:

            # add to raw text
            raw_text += line

    # add final book
    verses[current_verse] = process_verse(raw_text)
    chapters[current_chapter] = convertDictToArray(verses)
    full_text[current_book] = convertDictToArray(chapters)

    to_parse.close()
    return full_text
コード例 #6
0
ファイル: schema.py プロジェクト: dakom/Sefaria-Project
 def toNumber(self, lang, s):
     if lang == "en":
         return int(s)
     elif lang == "he":
         return decode_hebrew_numeral(s)
コード例 #7
0
 def toNumber(self, lang, s):
     if lang == "en":
         return int(s)
     elif lang == "he":
         return decode_hebrew_numeral(s)