예제 #1
0
def parse():
    book_names = library.get_indexes_in_category('Torah')
    names = node_names()
    parsed = {}
    for book_num, filename in enumerate(filenames()):
        with codecs.open(filename, 'r', 'utf-8') as infile:
            current = util.file_to_ja([[[]]], infile, [u'@88', u'@44'], sefat_parse_helper).array()
            parsed[book_names[book_num]] = util.clean_jagged_array(current, [u'@[0-9]{2}', u'\?'])
    for book in book_names:
        parashot = names[book].keys()
        parsed[book] = util.simple_to_complex(parashot, parsed[book])
        for parsha in parashot:
            parsed[book][parsha] = util.simple_to_complex(names[book][parsha], parsed[book][parsha])

    return parsed
예제 #2
0
def produce_parsed_data(filename):

    with codecs.open(filename, 'r', 'utf-8') as datafile:
        parsed = util.file_to_ja(3, datafile, (m_pattern, comment_pattern), nothing)

        datafile.seek(0)

        names = util.grab_section_names(m_pattern, datafile, 1)
        names = [int(util.getGematria(name)) for name in names]

    comp_text = util.simple_to_complex(names, parsed.array())
    parsed = util.convert_dict_to_array(comp_text)

    return parsed
예제 #3
0
def produce_parsed_data(filename):

    with codecs.open(filename, 'r', 'utf-8') as datafile:
        parsed = util.file_to_ja([[[]]], datafile,
                                 (m_pattern, comment_pattern), nothing)

        datafile.seek(0)

        names = util.grab_section_names(m_pattern, datafile, 1)
        names = [int(util.getGematria(name)) for name in names]

    comp_text = util.simple_to_complex(names, parsed.array())
    parsed = util.convert_dict_to_array(comp_text)

    return parsed
예제 #4
0
def parse():
    book_names = library.get_indexes_in_category('Torah')
    names = node_names()
    parsed = {}
    for book_name, filename in zip(book_names, filenames()):
        with codecs.open(filename, 'r', 'utf-8') as infile:
            current = util.file_to_ja(2, infile, [u'@88'],
                                      sefat_parse_helper).array()
            parsed[book_name] = util.clean_jagged_array(
                current, [u'@[0-9]{2}', u'\?'])
    for book in book_names:
        parashot = names[book].keys()
        parsed[book] = util.simple_to_complex(parashot, parsed[book])

    return parsed
예제 #5
0
def align_boaz_chapters(source_file, simple_array):
    """
    Boaz does not guarantee text for every chapter. Using the util library, this method will pad the parsed text with
     empty sections as necessary to accurately represent the data.
    :param source_file: File from which to derive chapter numbers
    :param simple_array: A "naive" parse of the data structured as a nested list.
    :return: Nested array, with proper padding to account for empty chapters.
    """

    # grab each chapter number from the source file
    chapters = [
        util.getGematria(n) for n in util.grab_section_names(
            u'@00פרק ([\u05d0-\u05ea]{1,2})', source_file, 1)
    ]
    as_dict = util.simple_to_complex(chapters, simple_array)
    return util.convert_dict_to_array(as_dict)