コード例 #1
0
def build_volume(file,
                 title_en: str,
                 title_ar: str,
                 description: str,
                 last_volume: bool = False) -> Chapter:
    volume = Chapter()
    volume.titles = {Language.EN.value: title_en, Language.AR.value: title_ar}
    volume.descriptions = {Language.EN.value: [description]}
    if last_volume:
        volume.chapters = build_hubeali_book_8(file)
    else:
        volume.chapters = build_hubeali_books(file)
    volume.part_type = PartType.Volume

    return volume
コード例 #2
0
def build_kafi() -> Chapter:
    kafi = Chapter()
    kafi.index = BOOK_INDEX
    kafi.path = BOOK_PATH
    kafi.titles = {Language.EN.value: "Al-Kafi", Language.AR.value: "الكافي"}
    kafi.descriptions = {
        Language.EN.value: [
            "Of the majestic narrator and the scholar, the jurist, the Sheykh Muhammad Bin Yaqoub Al-Kulayni Well known as ‘The trustworthy of Al-Islam Al-Kulayni’ Who died in the year 329 H"
        ]
    }
    kafi.chapters = []

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-1\\"), "Volume One",
                     "الجزء الأول‏", "First volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-2\\"), "Volume Two",
                     "الجزء الثاني‏", "Second volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-3\\"),
                     "Volume Three", "الجزء الثالث‏",
                     "Third volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-4\\"),
                     "Volume Four", "الجزء الرابع‏",
                     "Forth volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-5\\"),
                     "Volume Five", "الجزء الخامس‏",
                     "Fifth volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-6\\"), "Volume Six",
                     "الجزء السادس‏", "Sixth volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-7\\"),
                     "Volume Seven", "الجزء السابع‏",
                     "Seventh volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-8\\"),
                     "Volume Eight", "الجزء الثامن‏",
                     "Eighth volume of Al-Kafi", True))

    kafi.verse_start_index = 0
    kafi.index = BOOK_INDEX
    kafi.path = BOOK_PATH

    crumb = Crumb()
    crumb.titles = kafi.titles
    crumb.indexed_titles = kafi.titles
    crumb.path = kafi.path
    kafi.crumbs = [crumb]

    set_index(kafi, [0, 0, 0, 0], 0)

    return kafi
コード例 #3
0
def build_alhassanain_baabs(file) -> List[Chapter]:
    baabs: List[Chapter] = []
    logger.info("Adding Al-Kafi file %s", file)

    translation = Translation()
    translation.name = "HubeAli.com"
    translation.lang = Language.EN.value
    translation.id = HUBEALI_TRANSLATION_ID

    with open(file, 'r', encoding='utf8') as qfile:
        inner_html = qfile.read()
        sections = inner_html.split("<br clear=all>")
        for section in sections:
            section_soup = BeautifulSoup(section, 'html.parser')

            headings = section_soup.select(".Heading1Center")
            if not headings:
                continue

            # process "the book of" chapter
            baab_titles = extract_headings(headings)

            en_title = baab_titles[Language.EN.value]

            baab = None
            for existing_baab in baabs:
                if existing_baab.titles[Language.EN.value] == en_title:
                    baab = existing_baab

            if not baab:
                baab = Chapter()
                baab.part_type = PartType.Book
                baab.titles = baab_titles
                baab.chapters = []

                baabs.append(baab)

            # process chapters
            chapters = section_soup.select(".Heading2Center")
            chapters_len = len(chapters)
            for subchapter_index in range(math.ceil(chapters_len / 2)):
                subchapter_heading_index = subchapter_index * 2

                remaining_chapters = chapters[subchapter_heading_index:]
                if len(remaining_chapters) > 1:
                    remaining_chapters = remaining_chapters[:2]
                chapter_titles = extract_headings(remaining_chapters)

                chapter = Chapter()
                chapter.part_type = PartType.Chapter
                chapter.titles = chapter_titles
                chapter.verse_translations = [translation]
                chapter.verses = []

                baab.chapters.append(chapter)

                last_element = remaining_chapters[-1]
                last_element = last_element.next_sibling

                verse: Verse = None
                while (last_element is not None and
                       (isinstance(last_element, NavigableString) or
                        (is_tag(last_element)
                         and 'Heading2Center' not in last_element['class']))):
                    is_a_tag = is_tag(last_element)
                    if is_a_tag and 'libAr' in last_element['class']:

                        # push the last verse if its not the start of chapter
                        if verse != None:
                            chapter.verses.append(verse)

                        verse = Verse()
                        verse.part_type = PartType.Hadith
                        verse.translations = {}
                        verse.translations[HUBEALI_TRANSLATION_ID] = []

                        verse.text = [last_element.get_text(strip=True)]

                    if is_a_tag and 'libNormal' in last_element['class']:
                        verse.translations[HUBEALI_TRANSLATION_ID].append(
                            last_element.get_text(strip=True))

                    last_element = last_element.next_sibling

                if verse != None:
                    chapter.verses.append(verse)

    return baabs
コード例 #4
0
def build_hubeali_book_8(dirname) -> List[Chapter]:
    logger.info("Adding Al-Kafi dir %s", dirname)

    cfiles = glob.glob(dirname + "c*.xhtml")

    book = Chapter()
    book.part_type = PartType.Book
    book.titles = {}
    # Arabic title comes from previous file
    book.titles[
        Language.AR.
        value] = "&#1603;&#1578;&#1575;&#1576; &#1575;&#1604;&#1585;&#1617;&#1614;&#1608;&#1618;&#1590;&#1614;&#1577;&#1616;"
    book.titles[Language.EN.value] = "The Book - Garden (of Flowers)"
    book.chapters = []

    is_the_end = False
    previous_hadith_num = 14449
    chapter = None
    chapter_title_ar = None
    hadith_ar = []
    hadith_en = []
    for cfile in cfiles:
        if is_the_end:
            break

        logger.info("Processing file %s", cfile)

        with open(cfile, 'r', encoding='utf8') as qfile:
            file_html = qfile.read()
            file_html = file_correction(cfile, file_html)
            soup = BeautifulSoup(file_html, 'html.parser')

            heading = soup.body.h1
            if we_dont_care(heading):
                continue

            if table_of_contents(heading):
                hadith_ar.append(get_contents(soup.body.contents[-2]))
                continue

            heading_en = get_contents(heading.a)
            is_hadith_title = V8_HADITH_TITLE_PATTERN.match(heading_en)
            # sometimes the anchor is early terminated
            if not heading_en or is_hadith_title:
                heading_en = get_contents(heading)

            if chapter_title_ar or not chapter:
                chapter = Chapter()
                chapter.part_type = PartType.Chapter
                chapter.titles = {}
                if chapter_title_ar:
                    chapter.titles[Language.AR.value] = chapter_title_ar
                else:
                    chapter.titles[
                        Language.AR.
                        value] = "&#1576;&#1616;&#1587;&#1618;&#1605;&#1616; &#1575;&#1604;&#1604;&#1617;&#1614;&#1607;&#1616; &#1575;&#1604;&#1585;&#1617;&#1614;&#1581;&#1618;&#1605;&#1614;&#1606;&#1616; &#1575;&#1604;&#1585;&#1617;&#1614;&#1581;&#1616;&#1610;&#1605;&#1616;"
                if heading_en:
                    chapter.titles[Language.EN.value] = heading_en
                else:
                    chapter.titles[
                        Language.EN.
                        value] = "In the name of Allah, the Beneficent, the Merciful"
                chapter_title_ar = None
                chapter.verses = []
                chapter.verse_translations = [hubbeali_translation]

                book.chapters.append(chapter)
            elif is_hadith_title:
                hadith_en.append(heading_en)

            last_element = soup.find('p', 'first-in-chapter')

            while last_element:
                if is_newline(last_element):
                    last_element = last_element.next_sibling
                    continue

                is_a_tag = is_tag(last_element)
                is_paragraph = is_a_tag and last_element.name == 'p'
                is_not_section_break_paragraph = is_paragraph and not is_section_break_tag(
                    last_element)
                is_arabic = is_rtl_tag(last_element)

                element_content = get_contents(last_element)
                element_content = element_content.replace(
                    'style="font-style: italic; font-weight: bold"',
                    'class="ibTxt"')
                element_content = element_content.replace(
                    'style="font-weight: bold"', 'class="bTxt"')
                element_content = element_content.replace(
                    'style="font-style: italic"', 'class="iTxt"')

                is_new_hadith = V8_HADITH_BEGINNING_PATTERN.match(
                    last_element.get_text(strip=True))
                is_the_end = element_content.startswith(
                    "&#1578;&#1614;&#1605;&#1617;&#1614; &#1603;&#1616;&#1578;&#1614;&#1575;&#1576;&#1615; &#1575;&#1604;&#1585;&#1617;&#1614;&#1608;&#1618;&#1590;&#1614;&#1577;&#1616; &#1605;&#1616;&#1606;&#1614;"
                )

                # We commit the hadith that has been building up until now if we encounter a new hadith beginning
                if (is_new_hadith or is_the_end) and hadith_ar and hadith_en:
                    add_hadith(chapter, hadith_ar, hadith_en)
                    hadith_ar = []
                    hadith_en = []

                if is_new_hadith:
                    hadith_num = int(is_new_hadith.group(1))
                    if previous_hadith_num + 1 != hadith_num:
                        print("Skipped one hadith " +
                              str(previous_hadith_num) + " to " +
                              str(hadith_num) + " title: " + element_content)
                    previous_hadith_num = hadith_num

                if is_chapter_title(last_element):
                    if hadith_ar and hadith_en:
                        add_hadith(chapter, hadith_ar, hadith_en)
                        hadith_ar = []
                        hadith_en = []

                    chapter_title_ar = element_content
                elif is_arabic:
                    hadith_ar.append(element_content)
                elif is_not_section_break_paragraph:
                    hadith_en.append(element_content)
                    if is_the_end:
                        add_hadith(chapter, hadith_ar, hadith_en,
                                   PartType.Heading)

                last_element = last_element.next_sibling

    return [book]
コード例 #5
0
def build_hubeali_books(dirname) -> List[Chapter]:
    books: List[Chapter] = []
    logger.info("Adding Al-Kafi dir %s", dirname)

    cfiles = glob.glob(dirname + "c*.xhtml")

    book = None
    chapter = None
    book_title_ar = None
    chapter_title_ar = None
    hadith_ar = []
    hadith_en = []
    for cfile in cfiles:
        logger.info("Processing file %s", cfile)

        with open(cfile, 'r', encoding='utf8') as qfile:
            file_html = qfile.read()
            file_html = file_correction(cfile, file_html)
            soup = BeautifulSoup(file_html, 'html.parser')

            heading = soup.body.h1
            if we_dont_care(heading):
                continue

            if table_of_contents(heading):
                book_title_ar = get_contents(soup.body.contents[-2])
                continue

            heading_en = get_contents(heading.a)
            # sometimes the anchor is early terminated
            if not heading_en:
                heading_en = get_contents(heading)

            if book_title_ar:
                book = Chapter()
                book.part_type = PartType.Book
                book.titles = {}
                # Arabic title comes from previous file
                book.titles[Language.AR.value] = book_title_ar
                book.titles[Language.EN.value] = heading_en
                book_title_ar = None
                book.chapters = []

                books.append(book)

            elif (chapter_title_ar
                  or not chapter) and heading_en.startswith('Chapter'):
                chapter = Chapter()
                chapter.part_type = PartType.Chapter
                chapter.titles = {}
                chapter.titles[Language.AR.value] = chapter_title_ar
                chapter.titles[Language.EN.value] = heading_en
                chapter_title_ar = None
                chapter.verse_translations = [hubbeali_translation]
                chapter.verses = []

                book.chapters.append(chapter)

            elif chapter_title_ar:
                add_hadith(chapter, [chapter_title_ar], [heading_en],
                           PartType.Heading)

                chapter_title_ar = None

            last_element = soup.find('p', 'first-in-chapter')

            while last_element:
                if is_newline(last_element):
                    last_element = last_element.next_sibling
                    continue

                is_a_tag = is_tag(last_element)
                is_paragraph = is_a_tag and last_element.name == 'p'
                is_not_section_break_paragraph = is_paragraph and not is_section_break_tag(
                    last_element)
                is_arabic = is_rtl_tag(last_element)

                element_content = get_contents(last_element)
                element_content = element_content.replace(
                    'style="font-style: italic; font-weight: bold"',
                    'class="ibTxt"')
                element_content = element_content.replace(
                    'style="font-weight: bold"', 'class="bTxt"')
                element_content = element_content.replace(
                    'style="font-style: italic"', 'class="iTxt"')

                is_end_of_hadith = END_OF_HADITH_PATTERN.search(
                    element_content)

                if is_book_title(last_element):
                    if hadith_ar and hadith_en:
                        add_hadith(chapter, hadith_ar, hadith_en,
                                   PartType.Heading)
                        hadith_ar = []
                        hadith_en = []

                    # hubeali put chapters on ziyarat as separate book but we're going to stay true to the indexing on
                    # other copies out there and have the chapters under this as part of book of Hajj
                    if not cfile.endswith('Al-Kafi-Volume-4\\c346.xhtml'):
                        book_title_ar = element_content
                        chapter = None
                elif is_chapter_title(last_element):
                    if hadith_ar and hadith_en:
                        if chapter:
                            add_hadith(chapter, hadith_ar, hadith_en)
                        else:
                            book.descriptions = {}
                            book.descriptions[Language.AR.value] = hadith_ar
                            book.descriptions[Language.EN.value] = hadith_en
                        hadith_ar = []
                        hadith_en = []

                    chapter_title_ar = element_content
                elif is_arabic:
                    hadith_ar.append(element_content)
                # elif is_book_ending(last_element):
                # 	add_hadith(chapter, hadith_ar, [element_content], PartType.Heading)
                # 	hadith_ar = []
                # 	hadith_en = []
                elif is_not_section_break_paragraph:
                    hadith_en.append(element_content)

                if is_end_of_hadith:
                    add_hadith(chapter, hadith_ar, hadith_en)
                    hadith_ar = []
                    hadith_en = []

                last_element = last_element.next_sibling

    return books
コード例 #6
0
def build_quran() -> Chapter:
    verses = build_verses(get_path("tanzil_net/quran_simple.txt"))
    verse_translations = []

    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.ansarian.txt"), "ansarian", "fa",
        "Hussain Ansarian",
        "https://fa.wikipedia.org/wiki/%D8%AD%D8%B3%DB%8C%D9%86_%D8%A7%D9%86%D8%B5%D8%A7%D8%B1%DB%8C%D8%A7%D9%86"
    )
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.ayati.txt"), "ayati", "fa",
        "AbdolMohammad Ayati",
        "https://fa.wikipedia.org/wiki/%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%85%D8%AD%D9%85%D8%AF_%D8%A2%DB%8C%D8%AA%DB%8C"
    )
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.bahrampour.txt"), "bahrampour",
        "fa", "Abolfazl Bahrampour",
        "https://fa.wikipedia.org/wiki/%D8%A7%D8%A8%D9%88%D8%A7%D9%84%D9%81%D8%B6%D9%84_%D8%A8%D9%87%D8%B1%D8%A7%D9%85%E2%80%8C%D9%BE%D9%88%D8%B1"
    )
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.fooladvand.txt"), "fooladvand",
        "fa", "Mohammad Mahdi Fooladvand",
        "https://fa.wikipedia.org/wiki/%D9%85%D8%AD%D9%85%D8%AF%D9%85%D9%87%D8%AF%DB%8C_%D9%81%D9%88%D9%84%D8%A7%D8%AF%D9%88%D9%86%D8%AF"
    )
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.ghomshei.txt"), "ghomshei", "fa",
        "Mahdi Elahi Ghomshei",
        "https://fa.wikipedia.org/wiki/%D9%85%D9%87%D8%AF%DB%8C_%D8%A7%D9%84%D9%87%DB%8C_%D9%82%D9%85%D8%B4%D9%87%E2%80%8C%D8%A7%DB%8C"
    )
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.khorramdel.txt"), "khorramdel",
        "fa", "Mostafa Khorramdel",
        "https://rasekhoon.net/mashahir/Show-904328.aspx")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.khorramshahi.txt"),
        "khorramshahi", "fa", "Baha'oddin Khorramshahi",
        "https://fa.wikipedia.org/wiki/%D8%A8%D9%87%D8%A7%D8%A1%D8%A7%D9%84%D8%AF%DB%8C%D9%86_%D8%AE%D8%B1%D9%85%D8%B4%D8%A7%D9%87%DB%8C"
    )
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.makarem.txt"), "makarem", "fa",
        "Naser Makarem Shirazi",
        "https://en.wikipedia.org/wiki/Naser_Makarem_Shirazi")
    insert_quran_translation(verses, verse_translations,
                             get_path("tanzil_net/translations/fa.moezzi.txt"),
                             "moezzi", "fa", "Mohammad Kazem Moezzi", "")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.mojtabavi.txt"), "mojtabavi",
        "fa", "Sayyed Jalaloddin Mojtabavi",
        "http://rasekhoon.net/mashahir/Show-118481.aspx")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/fa.sadeqi.txt"), "sadeqi", "fa",
        "Mohammad Sadeqi Tehrani",
        "https://fa.wikipedia.org/wiki/%D9%85%D8%AD%D9%85%D8%AF_%D8%B5%D8%A7%D8%AF%D9%82%DB%8C_%D8%AA%D9%87%D8%B1%D8%A7%D9%86%DB%8C"
    )

    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.ahmedali.txt"), "ahmedali", "en",
        "Ahmed Ali", "https://en.wikipedia.org/wiki/Ahmed_Ali_(writer)")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.ahmedraza.txt"), "ahmedraza",
        "en", "Ahmed Raza Khan",
        "https://en.wikipedia.org/wiki/Ahmed_Raza_Khan_Barelvi")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.arberry.txt"), "arberry", "en",
        "A. J. Arberry", "https://en.wikipedia.org/wiki/Arthur_John_Arberry")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.daryabadi.txt"), "daryabadi",
        "en", "Abdul Majid Daryabadi",
        "https://en.wikipedia.org/wiki/Abdul_Majid_Daryabadi")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.hilali.txt"), "hilali", "en",
        "Muhammad Taqi-ud-Din al-Hilali and Muhammad Muhsin Khan",
        "https://en.wikipedia.org/wiki/Noble_Quran_(Hilali-Khan)")
    insert_quran_translation(verses, verse_translations,
                             get_path("tanzil_net/translations/en.itani.txt"),
                             "itani", "en", "Talal Itani", "")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.maududi.txt"), "maududi", "en",
        "Abul Ala Maududi",
        "https://en.wikipedia.org/wiki/Abul_A%27la_Maududi")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.mubarakpuri.txt"), "mubarakpuri",
        "en", "Safi-ur-Rahman al-Mubarakpuri",
        "https://en.wikipedia.org/wiki/Safiur_Rahman_Mubarakpuri")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.pickthall.txt"), "pickthall",
        "en", "Mohammed Marmaduke William Pickthall",
        "https://en.wikipedia.org/wiki/Marmaduke_Pickthall")
    insert_quran_translation(verses, verse_translations,
                             get_path("tanzil_net/translations/en.qarai.txt"),
                             "qarai", "en", "Ali Quli Qarai", "")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.qaribullah.txt"), "qaribullah",
        "en", "Hasan al-Fatih Qaribullah and Ahmad Darwish", "")
    insert_quran_translation(verses, verse_translations,
                             get_path("tanzil_net/translations/en.sahih.txt"),
                             "sahih", "en", "Saheeh International",
                             "http://www.saheehinternational.com/")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.sarwar.txt"), "sarwar", "en",
        "Muhammad Sarwar",
        "https://en.wikipedia.org/wiki/Shaykh_Muhammad_Sarwar")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.shakir.txt"), "shakir", "en",
        "Mohammad Habib Shakir",
        "https://en.wikipedia.org/wiki/Muhammad_Habib_Shakir")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.transliteration.txt"),
        "transliteration", "en", "English Transliteration", "")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.wahiduddin.txt"), "wahiduddin",
        "en", "Wahiduddin Khan",
        "https://en.wikipedia.org/wiki/Wahiduddin_Khan")
    insert_quran_translation(
        verses, verse_translations,
        get_path("tanzil_net/translations/en.yusufali.txt"), "yusufali", "en",
        "Abdullah Yusuf Ali",
        "https://en.wikipedia.org/wiki/Abdullah_Yusuf_Ali")

    chapters = build_chapters(get_path("tanzil_net/quran-data.xml"), verses,
                              verse_translations)

    q = Chapter()
    q.index = BOOK_INDEX
    q.path = BOOK_PATH
    q.verse_start_index = 0
    q.part_type = PartType.Book
    q.titles = {
        Language.EN.value: "The Holy Quran",
        Language.AR.value: "القرآن الكريم"
    }
    q.descriptions = {Language.EN.value: ["Was revealed to the prophet SAW"]}
    q.chapters = chapters
    q.verse_translations = verse_translations
    q.default_verse_translation_ids = {"en": "en.qarai", "fa": "fa.makarem"}

    crumb = Crumb()
    crumb.titles = q.titles
    crumb.indexed_titles = q.titles
    crumb.path = q.path
    q.crumbs = [crumb]

    set_index(q, [0, 0], 0)

    return q