def produce_parsed_data(filename): with codecs.open(filename, 'r', 'utf-8') as datafile: parsed = util.file_to_ja(3, datafile, (m_pattern, comment_pattern), nothing) datafile.seek(0) names = util.grab_section_names(m_pattern, datafile, 1) names = [int(util.getGematria(name)) for name in names] comp_text = util.simple_to_complex(names, parsed.array()) parsed = util.convert_dict_to_array(comp_text) return parsed
def produce_parsed_data(filename): with codecs.open(filename, 'r', 'utf-8') as datafile: parsed = util.file_to_ja([[[]]], datafile, (m_pattern, comment_pattern), nothing) datafile.seek(0) names = util.grab_section_names(m_pattern, datafile, 1) names = [int(util.getGematria(name)) for name in names] comp_text = util.simple_to_complex(names, parsed.array()) parsed = util.convert_dict_to_array(comp_text) return parsed
def align_boaz_chapters(source_file, simple_array): """ Boaz does not guarantee text for every chapter. Using the util library, this method will pad the parsed text with empty sections as necessary to accurately represent the data. :param source_file: File from which to derive chapter numbers :param simple_array: A "naive" parse of the data structured as a nested list. :return: Nested array, with proper padding to account for empty chapters. """ # grab each chapter number from the source file chapters = [ util.getGematria(n) for n in util.grab_section_names( u'@00פרק ([\u05d0-\u05ea]{1,2})', source_file, 1) ] as_dict = util.simple_to_complex(chapters, simple_array) return util.convert_dict_to_array(as_dict)