예제 #1
0
def checkMishnayot():
	for file in glob.glob(u"*.txt"):
		if file.find("intro") == -1:
			print file
			reg = u'@22.*?[\u05d0-\u05ea]+.*?'
			open_file = open(file)
			tt = TagTester("@22", open_file, perek_checker, reg=reg)
			tt.in_order_many_sections(end_tag="@00")
예제 #2
0
def checkMishnayot():
	for file in glob.glob(u"*.txt"):
		if file.find("intro") == -1:
			print file
			reg = u'@22.*?[\u05d0-\u05ea]+.*?'
			open_file = open(file)
			tt = TagTester("@22", open_file, reg=reg)
			result = tt.in_order_many_sections(end_tag="@00")
			if result[0] != "SUCCESS":
				pdb.set_trace()
예제 #3
0
def checkMishnayot():
    for file in glob.glob(u"*.txt"):
        if file.find("intro") == -1:
            print file
            reg = u'@22.*?[\u05d0-\u05ea]+.*?'
            open_file = open(file)
            tt = TagTester("@22", open_file, reg=reg)
            result = tt.in_order_many_sections(end_tag="@00")
            if result[0] != "SUCCESS":
                pdb.set_trace()
예제 #4
0
def check_mishnayot():
    cards = get_cards()
    success, failure = [], []
    for card in cards:
        with codecs.open('{}.txt'.format(card), 'r', 'utf-8') as infile:
            tester = TagTester(u'@22', infile, u'@22([\u05d0-\u05ea]{1,2})')
            result = tester.in_order_many_sections(end_tag=u'@00', capture_group=1)
        if result[0] == 'SUCCESS':
            success.append(card)
        else:
            print 'failure: {}'.format(card)
            print len(result[1])

    print 'successes: {}'.format(len(success))
    print 'failures: {}'.format(len(failure))
    print 'total: {}'.format(len(cards))
    for item in failure:
        print item
예제 #5
0
def check_mishnayot():
    cards = get_cards()
    success, failure = [], []
    for card in cards:
        with codecs.open('{}.txt'.format(card), 'r', 'utf-8') as infile:
            tester = TagTester(u'@22', infile, u'@22([\u05d0-\u05ea]{1,2})')
            result = tester.in_order_many_sections(end_tag=u'@00',
                                                   capture_group=1)
        if result[0] == 'SUCCESS':
            success.append(card)
        else:
            print 'failure: {}'.format(card)
            print len(result[1])

    print 'successes: {}'.format(len(success))
    print 'failures: {}'.format(len(failure))
    print 'total: {}'.format(len(cards))
    for item in failure:
        print item
예제 #6
0
def get_num_TYTs_per_perek():
	num_TYTs = {}
	actual_TYTs = {}
	for file in glob.glob(u"*.txt"):
		if file.find("intro") == -1:
			reg = u'@22.*?[\u05d0-\u05ea]+.*?'
			open_file = open(file)
			tt = TagTester("@22", open_file, reg=reg)
			headers = tt.in_order_many_sections(end_tag="@00")
			if headers[0] == "SUCCESS":
				headers = headers[1]
			else:
				pdb.set_trace()
			if file.find("avot") >= 0:
				masechet = "Pirkei Avot"
			else:
	 			masechet = "Mishnah "+file.replace(".txt", "").replace("_"," ").title()
	 		num_TYTs[masechet] = []
	 		actual_TYTs[masechet] = headers
	 		for perek in headers:
	 			num_TYTs[masechet].append(len(perek))
	return num_TYTs, actual_TYTs
예제 #7
0
def get_num_TYTs_per_perek():
    num_TYTs = {}
    actual_TYTs = {}
    for file in glob.glob(u"*.txt"):
        if file.find("intro") == -1:
            reg = u'@22.*?[\u05d0-\u05ea]+.*?'
            open_file = open(file)
            tt = TagTester("@22", open_file, reg=reg)
            headers = tt.in_order_many_sections(end_tag="@00")
            if headers[0] == "SUCCESS":
                headers = headers[1]
            else:
                pdb.set_trace()
            if file.find("avot") >= 0:
                masechet = "Pirkei Avot"
            else:
                masechet = "Mishnah " + file.replace(".txt", "").replace(
                    "_", " ").title()
            num_TYTs[masechet] = []
            actual_TYTs[masechet] = headers
            for perek in headers:
                num_TYTs[masechet].append(len(perek))
    return num_TYTs, actual_TYTs