def checkMishnayot(): for file in glob.glob(u"*.txt"): if file.find("intro") == -1: print file reg = u'@22.*?[\u05d0-\u05ea]+.*?' open_file = open(file) tt = TagTester("@22", open_file, perek_checker, reg=reg) tt.in_order_many_sections(end_tag="@00")
def checkMishnayot(): for file in glob.glob(u"*.txt"): if file.find("intro") == -1: print file reg = u'@22.*?[\u05d0-\u05ea]+.*?' open_file = open(file) tt = TagTester("@22", open_file, reg=reg) result = tt.in_order_many_sections(end_tag="@00") if result[0] != "SUCCESS": pdb.set_trace()
def check_mishnayot(): cards = get_cards() success, failure = [], [] for card in cards: with codecs.open('{}.txt'.format(card), 'r', 'utf-8') as infile: tester = TagTester(u'@22', infile, u'@22([\u05d0-\u05ea]{1,2})') result = tester.in_order_many_sections(end_tag=u'@00', capture_group=1) if result[0] == 'SUCCESS': success.append(card) else: print 'failure: {}'.format(card) print len(result[1]) print 'successes: {}'.format(len(success)) print 'failures: {}'.format(len(failure)) print 'total: {}'.format(len(cards)) for item in failure: print item
def get_num_TYTs_per_perek(): num_TYTs = {} actual_TYTs = {} for file in glob.glob(u"*.txt"): if file.find("intro") == -1: reg = u'@22.*?[\u05d0-\u05ea]+.*?' open_file = open(file) tt = TagTester("@22", open_file, reg=reg) headers = tt.in_order_many_sections(end_tag="@00") if headers[0] == "SUCCESS": headers = headers[1] else: pdb.set_trace() if file.find("avot") >= 0: masechet = "Pirkei Avot" else: masechet = "Mishnah "+file.replace(".txt", "").replace("_"," ").title() num_TYTs[masechet] = [] actual_TYTs[masechet] = headers for perek in headers: num_TYTs[masechet].append(len(perek)) return num_TYTs, actual_TYTs
def get_num_TYTs_per_perek(): num_TYTs = {} actual_TYTs = {} for file in glob.glob(u"*.txt"): if file.find("intro") == -1: reg = u'@22.*?[\u05d0-\u05ea]+.*?' open_file = open(file) tt = TagTester("@22", open_file, reg=reg) headers = tt.in_order_many_sections(end_tag="@00") if headers[0] == "SUCCESS": headers = headers[1] else: pdb.set_trace() if file.find("avot") >= 0: masechet = "Pirkei Avot" else: masechet = "Mishnah " + file.replace(".txt", "").replace( "_", " ").title() num_TYTs[masechet] = [] actual_TYTs[masechet] = headers for perek in headers: num_TYTs[masechet].append(len(perek)) return num_TYTs, actual_TYTs