Beispiel #1
0
def check_demarcation(search_key):
    """
    Sanity check function: make sure a certain search key can be used to find the beginning of the ktav yad rashi in
    text. Prints out files missing the search key, as well as number of files searched and number of keys found.
    :param search_key: A string indicating where ktav yad rashi begins.
    """

    total, count = 0, 0

    # loop through files
    for page in range(functions.get_page(72, 'b'), functions.get_page(94, 'a')+1):
        file_name = u'מנחות_{}.txt'.format(functions.get_daf(page))
        rashi_file = codecs.open(file_name, 'r', 'utf-8')
        total += 1

        found_key = False
        for line in rashi_file:
            if line.find(search_key) != -1:
                found_key = True
                count += 1
                break

        if not found_key:
            print file_name

        rashi_file.close()

    print '{} files scanned, found key in {} file'.format(total, count)
Beispiel #2
0
def split_files(search_key):
    """
    Loops through files, splitting Rashi and ktav yad rashi into 2 different files.
    Recommend running check_demarcation first.
    :param search_key: key to find end of Rashi and beginning of ktav yad rashi
    """

    # loop through files
    for page in range(functions.get_page(72, 'b'), functions.get_page(94, 'a') + 1):
        file_name = u'מנחות_{}.txt'.format(functions.get_daf(page))
        rashi = codecs.open(u'rashi_fixed/{}'.format(file_name), 'w', 'utf-8')
        ktav_yad_rashi = codecs.open(u'ktav_yad_rashi/{}'.format(file_name), 'w', 'utf-8')
        original = codecs.open(file_name, 'r', 'utf-8')

        found = False

        for line in original:

            if line.find(search_key) != -1:
                found = True

            if not found:
                rashi.write(line)
            if found:
                ktav_yad_rashi.write(line)

        original.close()
        rashi.close()
        ktav_yad_rashi.close()
Beispiel #3
0
def separate_ktav_yad_rashi():
    """

    :return: An dict named 'results' which contains lists of refs as described below.
    """

    # set up a range of refs
    ref_range = Ref("Rashi on Menachot.72b-94a")

    ref = get_deepest_ref(Ref("Rashi on Menachot.72b"))

    results = {
        'rashi': [],
        'ktav yad rashi': [],
        'found in both': [],
        'found in none': [],
    }

    while ref_range.contains(ref):
        # open the files
        file_name = u'מנחות_{}.txt'.format(functions.get_daf(ref.sections[0]-2))
        rashi = codecs.open(u'rashi_fixed/{}'.format(file_name), 'r', 'utf-8')
        ktav_yad_rashi = codecs.open(u'ktav_yad_rashi/{}'.format(file_name), 'r', 'utf-8')

        # look for ref in files
        in_rashi = find_TextChunk_in_file(ref, rashi)
        in_ktav_yad = find_TextChunk_in_file(ref, ktav_yad_rashi)

        if in_rashi and in_ktav_yad:
            results['found in both'].append(ref)

        elif not in_rashi and not in_ktav_yad:
            results['found in none'].append(ref)

        elif in_rashi and not in_ktav_yad:
            results['rashi'].append(ref)

        elif not in_rashi and in_ktav_yad:
            results['ktav yad rashi'].append(ref)

        # get next ref
        ref = ref.next_segment_ref()

        rashi.close()
        ktav_yad_rashi.close()

    return results