def toNumber(self, lang, s): if lang == "en": try: if s[-1] in ["a", "b"]: amud = s[-1] daf = int(s[:-1]) else: amud = "a" daf = int(s) except ValueError: raise InputError(u"Couldn't parse Talmud reference: {}".format(s)) if self.length and daf > self.length: #todo: Catch this above and put the book name on it. Proably change Exception type. raise InputError(u"{} exceeds max of {} dafs.".format(daf, self.length)) indx = daf * 2 if amud == "a": indx -= 1 return indx elif lang == "he": num = re.split("[.:,\s]", s)[0] daf = decode_hebrew_numeral(num) * 2 if s[-1] == ":" or ( s[-1] == u"\u05d1" #bet and ((len(s) > 2 and s[-2] in ", ") # simple bet or (len(s) > 4 and s[-3] == u'\u05e2') # ayin"bet or (len(s) > 5 and s[-4] == u"\u05e2") # ayin''bet ) ): return daf # amud B return daf - 1
def test_encodes_and_decodes_correctly(self): for x in range(1, 5000): if x in (2000, 3000, 4000, 5000): # known ambiguity with single thousands above 1000 pass else: assert x == h.decode_hebrew_numeral(h.encode_hebrew_numeral(x))
def toNumber(self, lang, s): if lang == "en": try: if s[-1] in ["a", "b"]: amud = s[-1] daf = int(s[:-1]) else: amud = "a" daf = int(s) except ValueError: raise InputError( u"Couldn't parse Talmud reference: {}".format(s)) if self.length and daf > self.length: #todo: Catch this above and put the book name on it. Proably change Exception type. raise InputError(u"{} exceeds max of {} dafs.".format( daf, self.length)) indx = daf * 2 if amud == "a": indx -= 1 return indx elif lang == "he": num = re.split("[.:,\s]", s)[0] daf = decode_hebrew_numeral(num) * 2 if s[-1] == ":" or ( s[-1] == u"\u05d1" #bet and ((len(s) > 2 and s[-2] in ", ") # simple bet or (len(s) > 4 and s[-3] == u'\u05e2') # ayin"bet or (len(s) > 5 and s[-4] == u"\u05e2") # ayin''bet )): return daf # amud B return daf - 1
def parse_text(): """ Takes the result of strip_tags() and parses into a level four data structure for easy upload :return: Dictionary of books, depth 4. """ # initiate data structure and variables full_text, chapters, verses, raw_text = {}, {}, {}, u'' current_book, current_chapter, current_verse = u'', u'', u'' to_parse = codecs.open('chizkuni_no-tags.txt', 'r', 'utf-8') for line in to_parse: # if new book add book to full_text. if line.find(u'<book>') != -1: # if this is the first book, do nothing if current_book != u'': # set up book and add it to full_text verses[current_verse] = process_verse(raw_text) chapters[current_chapter] = convertDictToArray(verses) full_text[current_book] = convertDictToArray(chapters) # reset verses and chapters chapters, verses, raw_text = {}, {} ,u'' current_chapter, current_verse = u'', u'' # save the next book as current_book current_book = removeAllStrings([u'\n', u'\r', u' '], to_parse.readline()) # if new chapter, add verses to previous chapter elif line.find(u'<perek>') != -1: # if first chapter, set current chapter but do nothing else if current_chapter != u'': verses[current_verse] = process_verse(raw_text) chapters[current_chapter] = convertDictToArray(verses) verses, raw_text = {}, u'' # get next chapter number current_chapter = removeAllStrings([u'.', u'\n'], to_parse.readline()) current_chapter = decode_hebrew_numeral(current_chapter) current_verse = u'' # if new verse, process raw text and add to verses elif line.find(u'<pasuk>') != -1: # add previous verse if not first verse if current_verse != u'': verses[current_verse] = process_verse(raw_text) raw_text = u'' # get next verse number current_verse = removeAllStrings([u'.', u'\n'], to_parse.readline()) current_verse = decode_hebrew_numeral(current_verse) # don't include parsha tags elif line.find(u'<parsha>') != -1: continue else: # add to raw text raw_text += line # add final book verses[current_verse] = process_verse(raw_text) chapters[current_chapter] = convertDictToArray(verses) full_text[current_book] = convertDictToArray(chapters) to_parse.close() return full_text
def toNumber(self, lang, s): if lang == "en": return int(s) elif lang == "he": return decode_hebrew_numeral(s)