Esempio n. 1
0
        def derivation(word, leveldist):
            """ search in defined field """
            # define source level index
            if word in derivedict["word_"]:
                indexsrc = "word_"
            elif word in derivedict["lemma"]:
                indexsrc = "lemma"
            elif word in derivedict["root"]:
                indexsrc = "root"
            else:
                indexsrc = None  # warning
            # define destination level index
            if leveldist == 0:
                indexdist = "word_"
            elif leveldist == 1:
                indexdist = "lemma"
            elif leveldist == 2:
                indexdist = "root"
            else:
                indexdist = "root"  # new levels

            lst = []
            if indexsrc:  # if index source level is defined
                itm = LOCATE(derivedict[indexsrc], derivedict[indexdist], word)
                if itm:  # if different of none
                    lst = FILTER_DOUBLES(
                        FIND(derivedict[indexdist], derivedict["word_"], itm))
                else:
                    lst = [word]

            return lst
Esempio n. 2
0
        def tuple(props):
            """ search the words that have the specific properties """

            wset = set()
            firsttime = True
            for propkey in props.keys():
                if worddict.has_key(propkey):
                    partial_wset = set(
                        FIND(worddict[propkey], worddict["word_"],
                             props[propkey]))
                    if firsttime:
                        wset = partial_wset
                        firsttime = False
                    else:
                        wset &= partial_wset

                else:
                    # property has now index
                    pass

            return list(wset)
Esempio n. 3
0
    def _search_aya(self, flags):
        """
		return the results of aya search as a dictionary data structure
		"""
        # flags
        query = flags["query"] if flags.has_key("query") \
            else self._defaults["flags"]["query"]
        sortedby = flags["sortedby"] if flags.has_key("sortedby") \
            else self._defaults["flags"]["sortedby"]
        range = int(flags["perpage"]) if flags.has_key("perpage") \
            else flags["range"] if flags.has_key("range") \
            else self._defaults["flags"]["range"]
        ## offset = (page-1) * perpage   --  mode paging
        offset = ((int(flags["page"]) - 1) * range) + 1 if flags.has_key("page") \
            else int(flags["offset"]) if flags.has_key("offset") \
            else self._defaults["flags"]["offset"]
        recitation = flags["recitation"] if flags.has_key("recitation") \
            else self._defaults["flags"]["recitation"]
        translation = flags["translation"] if flags.has_key("translation") \
            else self._defaults["flags"]["translation"]
        romanization = flags["romanization"] if flags.has_key("romanization") \
            else self._defaults["flags"]["romanization"]
        highlight = flags["highlight"] if flags.has_key("highlight") \
            else self._defaults["flags"]["highlight"]
        script = flags["script"] if flags.has_key("script") \
            else self._defaults["flags"]["script"]
        vocalized = IS_FLAG(flags, 'vocalized')
        fuzzy = IS_FLAG(flags, 'fuzzy')
        view = flags["view"] if flags.has_key("view") \
            else self._defaults["flags"]["view"]

        # pre-defined views
        if view == "minimal":
            # fuzzy = True
            # page = 25
            vocalized = False
            recitation = None
            translation = None
            prev_aya = next_aya = False
            sura_info = False
            word_info = False
            word_synonyms = False
            word_derivations = False
            word_vocalizations = False
            aya_position_info = aya_theme_info = aya_sajda_info = False
            aya_stat_info = False
            sura_stat_info = False
            annotation_aya = annotation_word = False
        elif view == "normal":
            prev_aya = next_aya = True
            sura_info = True
            word_info = True
            word_synonyms = False
            word_derivations = True
            word_vocalizations = True
            aya_position_info = aya_theme_info = aya_sajda_info = True
            aya_stat_info = True
            sura_stat_info = False
            annotation_aya = annotation_word = False
        elif view == "full":
            prev_aya = next_aya = True
            sura_info = True
            word_info = True
            word_synonyms = True
            word_derivations = True
            word_vocalizations = True
            aya_position_info = aya_theme_info = aya_sajda_info = True
            aya_stat_info = sura_stat_info = True
            annotation_aya = annotation_word = True
            romanization = "iso"
        elif view == "statistic":
            prev_aya = next_aya = False
            sura_info = True
            word_info = True
            word_synonyms = False
            word_derivations = True
            word_vocalizations = True
            aya_position_info = True
            aya_theme_info = aya_sajda_info = False
            aya_stat_info = True
            sura_stat_info = True
            annotation_aya = False
            annotation_word = False
        elif view == "linguistic":
            prev_aya = next_aya = False
            sura_info = False
            word_info = True
            word_synonyms = True
            word_derivations = True
            word_vocalizations = True
            aya_position_info = False
            aya_theme_info = aya_sajda_info = True
            aya_stat_info = False
            sura_stat_info = False
            annotation_aya = True
            annotation_word = True
            romanization = "buckwalter"
        elif view == "recitation":
            script = "uthmani"
            prev_aya = next_aya = True
            sura_info = True
            word_info = False
            word_synonyms = False
            word_derivations = False
            word_vocalizations = False
            aya_position_info = True
            aya_theme_info = False
            aya_sajda_info = True
            aya_stat_info = False
            sura_stat_info = False
            annotation_aya = False
            annotation_word = False
        else:  # if view == custom or undefined
            prev_aya = IS_FLAG(flags, 'prev_aya')
            next_aya = IS_FLAG(flags, 'next_aya')
            sura_info = IS_FLAG(flags, 'sura_info')
            sura_stat_info = IS_FLAG(flags, 'sura_stat_info')
            word_info = IS_FLAG(flags, 'word_info')
            word_synonyms = IS_FLAG(flags, 'word_synonyms')
            word_derivations = IS_FLAG(flags, 'word_derivations')
            word_vocalizations = IS_FLAG(flags, 'word_vocalizations')

            aya_position_info = IS_FLAG(flags, 'aya_position_info')
            aya_theme_info = IS_FLAG(flags, 'aya_theme_info')
            aya_stat_info = IS_FLAG(flags, 'aya_stat_info')
            aya_sajda_info = IS_FLAG(flags, 'aya_sajda_info')
            annotation_aya = IS_FLAG(flags, 'annotation_aya')
            annotation_word = IS_FLAG(flags, 'annotation_word')

        # print query
        # preprocess query
        query = query.replace("\\", "")
        if not isinstance(query, unicode):
            query = unicode(query, 'utf8')

        if ":" not in query:
            query = unicode(
                transliterate("buckwalter", query,
                              ignore="'_\"%*?#~[]{}:>+-|"))

        # Search
        SE = self.FQSE if fuzzy else self.QSE
        res, termz, searcher = SE.search_all(
            query, self._defaults["results_limit"]["aya"], sortedby=sortedby)
        terms = [
            term[1] for term in list(termz)[:self._defaults["maxkeywords"]]
        ]
        terms_uthmani = map(STANDARD2UTHMANI, terms)
        # pagination
        offset = 1 if offset < 1 else offset
        range = self._defaults[
            "minrange"] if range < self._defaults["minrange"] else range
        range = self._defaults[
            "maxrange"] if range > self._defaults["maxrange"] else range
        interval_end = offset + range - 1
        end = interval_end if interval_end < len(res) else len(res)
        start = offset if offset <= len(res) else -1
        reslist = [] if end == 0 or start == -1 else list(res)[start - 1:end]
        # todo pagination should be done inside search operation for better performence
        # closing the searcher
        searcher.close()
        output = {}

        ## disable annotations for aya words if there is more then one result
        if annotation_aya and len(res) > 1:
            annotation_aya = False

        # if True:
        ## strip vocalization when vocalized = true
        V = QArabicSymbolsFilter( \
            shaping=False, \
            tashkil=not vocalized, \
            spellerrors=False, \
            hamza=False \
            ).normalize_all
        strip_vocalization = QArabicSymbolsFilter( \
            shaping=False, \
            tashkil=True, \
            spellerrors=False, \
            hamza=False \
            ).normalize_all
        # highligh function that consider None value and non-definition
        H = lambda X: self.QSE.highlight(
            X, terms, highlight
        ) if highlight != "none" and X else X if X else u"-----"
        # Numbers are 0 if not defined
        N = lambda X: X if X else 0
        # parse keywords lists , used for Sura names
        kword = re.compile(u"[^,،]+")
        keywords = lambda phrase: kword.findall(phrase)
        ##########################################
        extend_runtime = res.runtime
        # Words & Annotations
        words_output = {"individual": {}}
        if word_info:
            matches = 0
            docs = 0
            nb_vocalizations_globale = 0
            cpt = 1
            annotation_word_query = u"( 0 "
            for term in termz:
                if term[0] == "aya" or term[0] == "aya_":
                    if term[2]:
                        matches += term[2]
                    docs += term[3]
                    if term[0] == "aya_":
                        annotation_word_query += u" OR word:%s " % term[1]
                    else:  # if aya
                        annotation_word_query += u" OR normalized:%s " % STANDARD2UTHMANI(
                            term[1])
                    if word_vocalizations:
                        vocalizations = vocalization_dict[strip_vocalization(term[1])] if vocalization_dict.has_key(
                            strip_vocalization(term[1])) \
                            else []
                        nb_vocalizations_globale += len(vocalizations)
                    if word_synonyms:
                        synonyms = syndict[term[1]] if syndict.has_key(term[1]) \
                            else []
                    derivations_extra = []
                    if word_derivations:
                        lemma = LOCATE(derivedict["word_"],
                                       derivedict["lemma"], term[1])
                        if lemma:  # if different of none
                            derivations = FILTER_DOUBLES(
                                FIND(derivedict["lemma"], derivedict["word_"],
                                     lemma))
                        else:
                            derivations = []
                        # go deeper with derivations
                        root = LOCATE(derivedict["word_"], derivedict["root"],
                                      term[1])
                        if root:  # if different of none
                            derivations_extra = list(
                                set(
                                    FILTER_DOUBLES(
                                        FIND(derivedict["root"],
                                             derivedict["word_"], lemma))) -
                                set(derivations))

                    words_output["individual"][cpt] = {
                        "word":
                        term[1],
                        "romanization":
                        transliterate(
                            romanization, term[1], ignore="", reverse=True) if
                        romanization in self.DOMAINS["romanization"] else None,
                        "nb_matches":
                        term[2],
                        "nb_ayas":
                        term[3],
                        "nb_vocalizations":
                        len(vocalizations)
                        if word_vocalizations else 0,  # unneeded
                        "vocalizations":
                        vocalizations if word_vocalizations else [],
                        "nb_synonyms":
                        len(synonyms) if word_synonyms else 0,  # unneeded
                        "synonyms":
                        synonyms if word_synonyms else [],
                        "lemma":
                        lemma if word_derivations else "",
                        "root":
                        root if word_derivations else "",
                        "nb_derivations":
                        len(derivations)
                        if word_derivations else 0,  # unneeded
                        "derivations":
                        derivations if word_derivations else [],
                        "nb_derivations_extra":
                        len(derivations_extra),
                        "derivations_extra":
                        derivations_extra,
                    }
                    cpt += 1
            annotation_word_query += u" ) "
            words_output["global"] = {
                "nb_words": cpt - 1,
                "nb_matches": matches,
                "nb_vocalizations": nb_vocalizations_globale
            }
        output["words"] = words_output
        # Magic_loop to built queries of Adjacents,translations and annotations in the same time
        if prev_aya or next_aya or translation or annotation_aya:
            adja_query = trad_query = annotation_aya_query = u"( 0"

            for r in reslist:
                if prev_aya:
                    adja_query += u" OR gid:%s " % unicode(r["gid"] - 1)
                if next_aya:
                    adja_query += u" OR gid:%s " % unicode(r["gid"] + 1)
                if translation:
                    trad_query += u" OR gid:%s " % unicode(r["gid"])
                if annotation_aya:
                    annotation_aya_query += u" OR  ( aya_id:%s AND  sura_id:%s ) " % (
                        unicode(r["aya_id"]), unicode(r["sura_id"]))

            adja_query += u" )"
            trad_query += u" )" + u" AND id:%s " % unicode(translation)
            annotation_aya_query += u" )"

        # Adjacents
        if prev_aya or next_aya:
            adja_res, searcher = self.QSE.find_extended(adja_query, "gid")
            adja_ayas = {
                0: {
                    "aya_": u"----",
                    "uth_": u"----",
                    "sura": u"---",
                    "aya_id": 0,
                    "sura_arabic": u"---"
                },
                6237: {
                    "aya_": u"----",
                    "uth_": u"----",
                    "sura": u"---",
                    "aya_id": 9999,
                    "sura_arabic": u"---"
                }
            }
            for adja in adja_res:
                adja_ayas[adja["gid"]] = {
                    "aya_": adja["aya_"],
                    "uth_": adja["uth_"],
                    "aya_id": adja["aya_id"],
                    "sura": adja["sura"],
                    "sura_arabic": adja["sura_arabic"]
                }
                extend_runtime += adja_res.runtime
            searcher.close()

        # translations
        if translation:
            trad_res, searcher = self.TSE.find_extended(trad_query, "gid")
            extend_runtime += trad_res.runtime
            trad_text = {}
            for tr in trad_res:
                trad_text[tr["gid"]] = tr["text"]
            searcher.close()

        # annotations for aya words
        if annotation_aya or (annotation_word and word_info):
            annotation_word_query = annotation_word_query if annotation_word and word_info else u"()"
            annotation_aya_query = annotation_aya_query if annotation_aya else u"()"
            annotation_query = annotation_aya_query + u" OR  " + annotation_word_query
            # print annotation_query.encode( "utf-8" )
            annot_res, searcher = self.WSE.find_extended(
                annotation_query, "gid")
            extend_runtime += annot_res.runtime
            ## prepare annotations for use
            annotations_by_word = {}
            annotations_by_position = {}
            for annot in annot_res:
                if (annotation_word and word_info):
                    if annot["normalized"] in terms_uthmani:
                        if annotations_by_word.has_key(annot["normalized"]):
                            if annotations_by_word[
                                    annot["normalized"]].has_key(
                                        annot["word"]):
                                annotations_by_word[annot["normalized"]][
                                    annot["word"]].append(annot)
                            else:
                                annotations_by_word[annot["normalized"]][
                                    annot["word"]] = [annot]
                        else:
                            annotations_by_word[annot["normalized"]] = {
                                annot["word"]: [annot]
                            }
                if annotation_aya:
                    if annotations_by_position.has_key(
                        (annot["sura_id"], annot["aya_id"])):
                        annotations_by_position[(
                            annot["sura_id"],
                            annot["aya_id"])][annot["word_id"]] = annot
                    else:
                        annotations_by_position[(annot["sura_id"],
                                                 annot["aya_id"])] = {
                                                     annot["word_id"]: annot
                                                 }
            searcher.close()

        ## merge word annotations to word output
        if (annotation_word and word_info):
            for cpt in xrange(1, len(output["words"]["individual"]) + 1):
                current_word = STANDARD2UTHMANI(
                    output["words"]["individual"][cpt]["word"])
                # print current_word.encode( "utf-8" ), "=>", annotations_by_word, "=>", list( annot_res )
                if annotations_by_word.has_key(current_word):
                    current_word_annotations = annotations_by_word[
                        current_word]
                    output["words"]["individual"][cpt][
                        "annotations"] = current_word_annotations
                    output["words"]["individual"][cpt]["nb_annotations"] = len(
                        current_word_annotations)

        output["runtime"] = round(extend_runtime, 5)
        output["interval"] = {
            "start": start,
            "end": end,
            "total": len(res),
            "page": ((start - 1) / range) + 1,
            "nb_pages": ((len(res) - 1) / range) + 1
        }
        output["translation_info"] = {}
        ### Ayas
        cpt = start - 1
        output["ayas"] = {}
        for r in reslist:
            cpt += 1
            output["ayas"][cpt] = {

                "identifier": {"gid": r["gid"],
                               "aya_id": r["aya_id"],
                               "sura_id": r["sura_id"],
                               "sura_name": keywords(r["sura"])[0],
                               "sura_arabic_name": keywords(r["sura_arabic"])[0],
                               },

                "aya": {
                    "id": r["aya_id"],
                    "text": H(V(r["aya_"])) if script == "standard"
                    else H(r["uth_"]),
                    "text_no_highlight": r["aya"] if script == "standard"
                    else r["uth_"],
                    "translation": trad_text[r["gid"]] if (
                            translation != "None" and translation and trad_text.has_key(r["gid"])) else None,
                    "recitation": None if not recitation or not self._recitations.has_key(recitation) \
                        else u"https://www.everyayah.com/data/" + self._recitations[recitation]["subfolder"].encode(
                        "utf-8") + "/%03d%03d.mp3" % (r["sura_id"], r["aya_id"]),
                    "prev_aya": {
                        "id": adja_ayas[r["gid"] - 1]["aya_id"],
                        "sura": adja_ayas[r["gid"] - 1]["sura"],
                        "sura_arabic": adja_ayas[r["gid"] - 1]["sura_arabic"],
                        "text": V(adja_ayas[r["gid"] - 1]["aya_"]) if script == "standard"
                        else adja_ayas[r["gid"] - 1]["uth_"],
                    } if prev_aya else None
                    ,
                    "next_aya": {
                        "id": adja_ayas[r["gid"] + 1]["aya_id"],
                        "sura": adja_ayas[r["gid"] + 1]["sura"],
                        "sura_arabic": adja_ayas[r["gid"] + 1]["sura_arabic"],
                        "text": V(adja_ayas[r["gid"] + 1]["aya_"]) if script == "standard"
                        else adja_ayas[r["gid"] + 1]["uth_"],
                    } if next_aya else None
                    ,

                },

                "sura": {} if not sura_info
                else {
                    "name": keywords(r["sura"])[0],
                    "arabic_name": keywords(r["sura_arabic"])[0],
                    "english_name": keywords(r["sura_english"])[0],
                    "id": r["sura_id"],
                    "type": r["sura_type"],
                    "arabic_type": r["sura_type_arabic"],
                    "order": r["sura_order"],
                    "ayas": r["s_a"],
                    "stat": {} if not sura_stat_info
                    else {
                        "words": N(r["s_w"]),
                        "godnames": N(r["s_g"]),
                        "letters": N(r["s_l"])
                    }

                },

                "position": {} if not aya_position_info
                else {
                    "manzil": r["manzil"],
                    "juz": r["juz"],
                    "hizb": r["hizb"],
                    "rub": r["rub"] % 4,
                    "page": r["page"],
                    "page_IN": r["page_IN"],
                    "ruku": r["ruku"],
                },

                "theme": {} if not aya_theme_info
                else {
                    "chapter": r["chapter"],
                    "topic": r["topic"],
                    "subtopic": r["subtopic"]
                },

                "stat": {} if not aya_stat_info
                else {
                    "words": N(r["a_w"]),
                    "letters": N(r["a_l"]),
                    "godnames": N(r["a_g"])
                },

                "sajda": {} if not aya_sajda_info
                else {
                    "exist": (r["sajda"] == u"نعم"),
                    "type": r["sajda_type"] if (r["sajda"] == u"نعم") else None,
                    "id": N(r["sajda_id"]) if (r["sajda"] == u"نعم") else None,
                },

                "annotations": {} if not annotation_aya or not annotations_by_position.has_key(
                    (r["sura_id"], r["aya_id"]))
                else annotations_by_position[(r["sura_id"], r["aya_id"])]
            }

        return output