def build_chapters(file: str, verses: List[Verse], verse_translations: List[Translation]) -> List[Chapter]: chapters: List[Chapter] = [] quran = xml.etree.ElementTree.parse(file).getroot() suras = quran.find('suras') for s in suras.findall('sura'): meta = s.attrib index = int(meta['index']) ayas = int(meta['ayas']) start = int(meta['start']) name = meta['name'] tname = meta['tname'] ename = meta['ename'] type = meta['type'] order = int(meta['order']) rukus = int(meta['rukus']) titles = { Language.AR.value: name, Language.EN.value: ename, Language.ENT.value: tname } sura = Chapter() sura.part_type = PartType.Chapter sura.titles = titles sura.reveal_type = type sura.order = order sura.rukus = rukus sura.verses = verses[start:ayas + start] sura.verse_translations = verse_translations sura.default_verse_translation_ids = { "en": "en.qarai", "fa": "fa.makarem" } chapters.append(sura) sajdas = get_sajda_data(quran) for k, v in sajdas.items(): (sura_index, aya_index) = k sajda_chapter = chapters[sura_index - 1] sajda_chapter.sajda_type = v sajda_chapter.verses[aya_index - 1].sajda_type = v # add_group_data(quran, ayaindex, 'juzs', 'juz') # add_group_data(quran, ayaindex, 'hizbs', 'quarter') # add_group_data(quran, ayaindex, 'manzils', 'manzil') # add_group_data(quran, ayaindex, 'rukus', 'ruku') # add_group_data(quran, ayaindex, 'pages', 'page') return chapters
def build_volume(file, title_en: str, title_ar: str, description: str, last_volume: bool = False) -> Chapter: volume = Chapter() volume.titles = {Language.EN.value: title_en, Language.AR.value: title_ar} volume.descriptions = {Language.EN.value: [description]} if last_volume: volume.chapters = build_hubeali_book_8(file) else: volume.chapters = build_hubeali_books(file) volume.part_type = PartType.Volume return volume
def build_alhassanain_baabs(file) -> List[Chapter]: baabs: List[Chapter] = [] logger.info("Adding Al-Kafi file %s", file) translation = Translation() translation.name = "HubeAli.com" translation.lang = Language.EN.value translation.id = HUBEALI_TRANSLATION_ID with open(file, 'r', encoding='utf8') as qfile: inner_html = qfile.read() sections = inner_html.split("<br clear=all>") for section in sections: section_soup = BeautifulSoup(section, 'html.parser') headings = section_soup.select(".Heading1Center") if not headings: continue # process "the book of" chapter baab_titles = extract_headings(headings) en_title = baab_titles[Language.EN.value] baab = None for existing_baab in baabs: if existing_baab.titles[Language.EN.value] == en_title: baab = existing_baab if not baab: baab = Chapter() baab.part_type = PartType.Book baab.titles = baab_titles baab.chapters = [] baabs.append(baab) # process chapters chapters = section_soup.select(".Heading2Center") chapters_len = len(chapters) for subchapter_index in range(math.ceil(chapters_len / 2)): subchapter_heading_index = subchapter_index * 2 remaining_chapters = chapters[subchapter_heading_index:] if len(remaining_chapters) > 1: remaining_chapters = remaining_chapters[:2] chapter_titles = extract_headings(remaining_chapters) chapter = Chapter() chapter.part_type = PartType.Chapter chapter.titles = chapter_titles chapter.verse_translations = [translation] chapter.verses = [] baab.chapters.append(chapter) last_element = remaining_chapters[-1] last_element = last_element.next_sibling verse: Verse = None while (last_element is not None and (isinstance(last_element, NavigableString) or (is_tag(last_element) and 'Heading2Center' not in last_element['class']))): is_a_tag = is_tag(last_element) if is_a_tag and 'libAr' in last_element['class']: # push the last verse if its not the start of chapter if verse != None: chapter.verses.append(verse) verse = Verse() verse.part_type = PartType.Hadith verse.translations = {} verse.translations[HUBEALI_TRANSLATION_ID] = [] verse.text = [last_element.get_text(strip=True)] if is_a_tag and 'libNormal' in last_element['class']: verse.translations[HUBEALI_TRANSLATION_ID].append( last_element.get_text(strip=True)) last_element = last_element.next_sibling if verse != None: chapter.verses.append(verse) return baabs
def build_hubeali_book_8(dirname) -> List[Chapter]: logger.info("Adding Al-Kafi dir %s", dirname) cfiles = glob.glob(dirname + "c*.xhtml") book = Chapter() book.part_type = PartType.Book book.titles = {} # Arabic title comes from previous file book.titles[ Language.AR. value] = "كتاب الرَّوْضَةِ" book.titles[Language.EN.value] = "The Book - Garden (of Flowers)" book.chapters = [] is_the_end = False previous_hadith_num = 14449 chapter = None chapter_title_ar = None hadith_ar = [] hadith_en = [] for cfile in cfiles: if is_the_end: break logger.info("Processing file %s", cfile) with open(cfile, 'r', encoding='utf8') as qfile: file_html = qfile.read() file_html = file_correction(cfile, file_html) soup = BeautifulSoup(file_html, 'html.parser') heading = soup.body.h1 if we_dont_care(heading): continue if table_of_contents(heading): hadith_ar.append(get_contents(soup.body.contents[-2])) continue heading_en = get_contents(heading.a) is_hadith_title = V8_HADITH_TITLE_PATTERN.match(heading_en) # sometimes the anchor is early terminated if not heading_en or is_hadith_title: heading_en = get_contents(heading) if chapter_title_ar or not chapter: chapter = Chapter() chapter.part_type = PartType.Chapter chapter.titles = {} if chapter_title_ar: chapter.titles[Language.AR.value] = chapter_title_ar else: chapter.titles[ Language.AR. value] = "بِسْمِ اللَّهِ الرَّحْمَنِ الرَّحِيمِ" if heading_en: chapter.titles[Language.EN.value] = heading_en else: chapter.titles[ Language.EN. value] = "In the name of Allah, the Beneficent, the Merciful" chapter_title_ar = None chapter.verses = [] chapter.verse_translations = [hubbeali_translation] book.chapters.append(chapter) elif is_hadith_title: hadith_en.append(heading_en) last_element = soup.find('p', 'first-in-chapter') while last_element: if is_newline(last_element): last_element = last_element.next_sibling continue is_a_tag = is_tag(last_element) is_paragraph = is_a_tag and last_element.name == 'p' is_not_section_break_paragraph = is_paragraph and not is_section_break_tag( last_element) is_arabic = is_rtl_tag(last_element) element_content = get_contents(last_element) element_content = element_content.replace( 'style="font-style: italic; font-weight: bold"', 'class="ibTxt"') element_content = element_content.replace( 'style="font-weight: bold"', 'class="bTxt"') element_content = element_content.replace( 'style="font-style: italic"', 'class="iTxt"') is_new_hadith = V8_HADITH_BEGINNING_PATTERN.match( last_element.get_text(strip=True)) is_the_end = element_content.startswith( "تَمَّ كِتَابُ الرَّوْضَةِ مِنَ" ) # We commit the hadith that has been building up until now if we encounter a new hadith beginning if (is_new_hadith or is_the_end) and hadith_ar and hadith_en: add_hadith(chapter, hadith_ar, hadith_en) hadith_ar = [] hadith_en = [] if is_new_hadith: hadith_num = int(is_new_hadith.group(1)) if previous_hadith_num + 1 != hadith_num: print("Skipped one hadith " + str(previous_hadith_num) + " to " + str(hadith_num) + " title: " + element_content) previous_hadith_num = hadith_num if is_chapter_title(last_element): if hadith_ar and hadith_en: add_hadith(chapter, hadith_ar, hadith_en) hadith_ar = [] hadith_en = [] chapter_title_ar = element_content elif is_arabic: hadith_ar.append(element_content) elif is_not_section_break_paragraph: hadith_en.append(element_content) if is_the_end: add_hadith(chapter, hadith_ar, hadith_en, PartType.Heading) last_element = last_element.next_sibling return [book]
def build_hubeali_books(dirname) -> List[Chapter]: books: List[Chapter] = [] logger.info("Adding Al-Kafi dir %s", dirname) cfiles = glob.glob(dirname + "c*.xhtml") book = None chapter = None book_title_ar = None chapter_title_ar = None hadith_ar = [] hadith_en = [] for cfile in cfiles: logger.info("Processing file %s", cfile) with open(cfile, 'r', encoding='utf8') as qfile: file_html = qfile.read() file_html = file_correction(cfile, file_html) soup = BeautifulSoup(file_html, 'html.parser') heading = soup.body.h1 if we_dont_care(heading): continue if table_of_contents(heading): book_title_ar = get_contents(soup.body.contents[-2]) continue heading_en = get_contents(heading.a) # sometimes the anchor is early terminated if not heading_en: heading_en = get_contents(heading) if book_title_ar: book = Chapter() book.part_type = PartType.Book book.titles = {} # Arabic title comes from previous file book.titles[Language.AR.value] = book_title_ar book.titles[Language.EN.value] = heading_en book_title_ar = None book.chapters = [] books.append(book) elif (chapter_title_ar or not chapter) and heading_en.startswith('Chapter'): chapter = Chapter() chapter.part_type = PartType.Chapter chapter.titles = {} chapter.titles[Language.AR.value] = chapter_title_ar chapter.titles[Language.EN.value] = heading_en chapter_title_ar = None chapter.verse_translations = [hubbeali_translation] chapter.verses = [] book.chapters.append(chapter) elif chapter_title_ar: add_hadith(chapter, [chapter_title_ar], [heading_en], PartType.Heading) chapter_title_ar = None last_element = soup.find('p', 'first-in-chapter') while last_element: if is_newline(last_element): last_element = last_element.next_sibling continue is_a_tag = is_tag(last_element) is_paragraph = is_a_tag and last_element.name == 'p' is_not_section_break_paragraph = is_paragraph and not is_section_break_tag( last_element) is_arabic = is_rtl_tag(last_element) element_content = get_contents(last_element) element_content = element_content.replace( 'style="font-style: italic; font-weight: bold"', 'class="ibTxt"') element_content = element_content.replace( 'style="font-weight: bold"', 'class="bTxt"') element_content = element_content.replace( 'style="font-style: italic"', 'class="iTxt"') is_end_of_hadith = END_OF_HADITH_PATTERN.search( element_content) if is_book_title(last_element): if hadith_ar and hadith_en: add_hadith(chapter, hadith_ar, hadith_en, PartType.Heading) hadith_ar = [] hadith_en = [] # hubeali put chapters on ziyarat as separate book but we're going to stay true to the indexing on # other copies out there and have the chapters under this as part of book of Hajj if not cfile.endswith('Al-Kafi-Volume-4\\c346.xhtml'): book_title_ar = element_content chapter = None elif is_chapter_title(last_element): if hadith_ar and hadith_en: if chapter: add_hadith(chapter, hadith_ar, hadith_en) else: book.descriptions = {} book.descriptions[Language.AR.value] = hadith_ar book.descriptions[Language.EN.value] = hadith_en hadith_ar = [] hadith_en = [] chapter_title_ar = element_content elif is_arabic: hadith_ar.append(element_content) # elif is_book_ending(last_element): # add_hadith(chapter, hadith_ar, [element_content], PartType.Heading) # hadith_ar = [] # hadith_en = [] elif is_not_section_break_paragraph: hadith_en.append(element_content) if is_end_of_hadith: add_hadith(chapter, hadith_ar, hadith_en) hadith_ar = [] hadith_en = [] last_element = last_element.next_sibling return books
def build_quran() -> Chapter: verses = build_verses(get_path("tanzil_net/quran_simple.txt")) verse_translations = [] insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.ansarian.txt"), "ansarian", "fa", "Hussain Ansarian", "https://fa.wikipedia.org/wiki/%D8%AD%D8%B3%DB%8C%D9%86_%D8%A7%D9%86%D8%B5%D8%A7%D8%B1%DB%8C%D8%A7%D9%86" ) insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.ayati.txt"), "ayati", "fa", "AbdolMohammad Ayati", "https://fa.wikipedia.org/wiki/%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%85%D8%AD%D9%85%D8%AF_%D8%A2%DB%8C%D8%AA%DB%8C" ) insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.bahrampour.txt"), "bahrampour", "fa", "Abolfazl Bahrampour", "https://fa.wikipedia.org/wiki/%D8%A7%D8%A8%D9%88%D8%A7%D9%84%D9%81%D8%B6%D9%84_%D8%A8%D9%87%D8%B1%D8%A7%D9%85%E2%80%8C%D9%BE%D9%88%D8%B1" ) insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.fooladvand.txt"), "fooladvand", "fa", "Mohammad Mahdi Fooladvand", "https://fa.wikipedia.org/wiki/%D9%85%D8%AD%D9%85%D8%AF%D9%85%D9%87%D8%AF%DB%8C_%D9%81%D9%88%D9%84%D8%A7%D8%AF%D9%88%D9%86%D8%AF" ) insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.ghomshei.txt"), "ghomshei", "fa", "Mahdi Elahi Ghomshei", "https://fa.wikipedia.org/wiki/%D9%85%D9%87%D8%AF%DB%8C_%D8%A7%D9%84%D9%87%DB%8C_%D9%82%D9%85%D8%B4%D9%87%E2%80%8C%D8%A7%DB%8C" ) insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.khorramdel.txt"), "khorramdel", "fa", "Mostafa Khorramdel", "https://rasekhoon.net/mashahir/Show-904328.aspx") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.khorramshahi.txt"), "khorramshahi", "fa", "Baha'oddin Khorramshahi", "https://fa.wikipedia.org/wiki/%D8%A8%D9%87%D8%A7%D8%A1%D8%A7%D9%84%D8%AF%DB%8C%D9%86_%D8%AE%D8%B1%D9%85%D8%B4%D8%A7%D9%87%DB%8C" ) insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.makarem.txt"), "makarem", "fa", "Naser Makarem Shirazi", "https://en.wikipedia.org/wiki/Naser_Makarem_Shirazi") insert_quran_translation(verses, verse_translations, get_path("tanzil_net/translations/fa.moezzi.txt"), "moezzi", "fa", "Mohammad Kazem Moezzi", "") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.mojtabavi.txt"), "mojtabavi", "fa", "Sayyed Jalaloddin Mojtabavi", "http://rasekhoon.net/mashahir/Show-118481.aspx") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/fa.sadeqi.txt"), "sadeqi", "fa", "Mohammad Sadeqi Tehrani", "https://fa.wikipedia.org/wiki/%D9%85%D8%AD%D9%85%D8%AF_%D8%B5%D8%A7%D8%AF%D9%82%DB%8C_%D8%AA%D9%87%D8%B1%D8%A7%D9%86%DB%8C" ) insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.ahmedali.txt"), "ahmedali", "en", "Ahmed Ali", "https://en.wikipedia.org/wiki/Ahmed_Ali_(writer)") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.ahmedraza.txt"), "ahmedraza", "en", "Ahmed Raza Khan", "https://en.wikipedia.org/wiki/Ahmed_Raza_Khan_Barelvi") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.arberry.txt"), "arberry", "en", "A. J. Arberry", "https://en.wikipedia.org/wiki/Arthur_John_Arberry") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.daryabadi.txt"), "daryabadi", "en", "Abdul Majid Daryabadi", "https://en.wikipedia.org/wiki/Abdul_Majid_Daryabadi") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.hilali.txt"), "hilali", "en", "Muhammad Taqi-ud-Din al-Hilali and Muhammad Muhsin Khan", "https://en.wikipedia.org/wiki/Noble_Quran_(Hilali-Khan)") insert_quran_translation(verses, verse_translations, get_path("tanzil_net/translations/en.itani.txt"), "itani", "en", "Talal Itani", "") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.maududi.txt"), "maududi", "en", "Abul Ala Maududi", "https://en.wikipedia.org/wiki/Abul_A%27la_Maududi") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.mubarakpuri.txt"), "mubarakpuri", "en", "Safi-ur-Rahman al-Mubarakpuri", "https://en.wikipedia.org/wiki/Safiur_Rahman_Mubarakpuri") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.pickthall.txt"), "pickthall", "en", "Mohammed Marmaduke William Pickthall", "https://en.wikipedia.org/wiki/Marmaduke_Pickthall") insert_quran_translation(verses, verse_translations, get_path("tanzil_net/translations/en.qarai.txt"), "qarai", "en", "Ali Quli Qarai", "") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.qaribullah.txt"), "qaribullah", "en", "Hasan al-Fatih Qaribullah and Ahmad Darwish", "") insert_quran_translation(verses, verse_translations, get_path("tanzil_net/translations/en.sahih.txt"), "sahih", "en", "Saheeh International", "http://www.saheehinternational.com/") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.sarwar.txt"), "sarwar", "en", "Muhammad Sarwar", "https://en.wikipedia.org/wiki/Shaykh_Muhammad_Sarwar") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.shakir.txt"), "shakir", "en", "Mohammad Habib Shakir", "https://en.wikipedia.org/wiki/Muhammad_Habib_Shakir") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.transliteration.txt"), "transliteration", "en", "English Transliteration", "") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.wahiduddin.txt"), "wahiduddin", "en", "Wahiduddin Khan", "https://en.wikipedia.org/wiki/Wahiduddin_Khan") insert_quran_translation( verses, verse_translations, get_path("tanzil_net/translations/en.yusufali.txt"), "yusufali", "en", "Abdullah Yusuf Ali", "https://en.wikipedia.org/wiki/Abdullah_Yusuf_Ali") chapters = build_chapters(get_path("tanzil_net/quran-data.xml"), verses, verse_translations) q = Chapter() q.index = BOOK_INDEX q.path = BOOK_PATH q.verse_start_index = 0 q.part_type = PartType.Book q.titles = { Language.EN.value: "The Holy Quran", Language.AR.value: "القرآن الكريم" } q.descriptions = {Language.EN.value: ["Was revealed to the prophet SAW"]} q.chapters = chapters q.verse_translations = verse_translations q.default_verse_translation_ids = {"en": "en.qarai", "fa": "fa.makarem"} crumb = Crumb() crumb.titles = q.titles crumb.indexed_titles = q.titles crumb.path = q.path q.crumbs = [crumb] set_index(q, [0, 0], 0) return q