def derivation(word, leveldist): """ search in defined field """ # define source level index if word in derivedict["word_"]: indexsrc = "word_" elif word in derivedict["lemma"]: indexsrc = "lemma" elif word in derivedict["root"]: indexsrc = "root" else: indexsrc = None # warning # define destination level index if leveldist == 0: indexdist = "word_" elif leveldist == 1: indexdist = "lemma" elif leveldist == 2: indexdist = "root" else: indexdist = "root" # new levels lst = [] if indexsrc: # if index source level is defined itm = LOCATE(derivedict[indexsrc], derivedict[indexdist], word) if itm: # if different of none lst = FILTER_DOUBLES( FIND(derivedict[indexdist], derivedict["word_"], itm)) else: lst = [word] return lst
def _search_aya( self, flags ): """ return the results of aya search as a dictionary data structure """ #flags query = flags["query"] if flags.has_key( "query" ) \ else self._defaults["flags"]["query"] sortedby = flags["sortedby"] if flags.has_key( "sortedby" ) \ else self._defaults["flags"]["sortedby"] range = int( flags["perpage"] ) if flags.has_key( "perpage" ) \ else flags["range"] if flags.has_key( "range" ) \ else self._defaults["flags"]["range"] ## offset = (page-1) * perpage -- mode paging offset = ( ( int( flags["page"] ) - 1 ) * range ) + 1 if flags.has_key( "page" ) \ else int( flags["offset"] ) if flags.has_key( "offset" ) \ else self._defaults["flags"]["offset"] recitation = flags["recitation"] if flags.has_key( "recitation" ) \ else self._defaults["flags"]["recitation"] translation = flags["translation"] if flags.has_key( "translation" ) \ else self._defaults["flags"]["translation"] romanization = flags["romanization"] if flags.has_key( "romanization" ) \ else self._defaults["flags"]["romanization"] highlight = flags["highlight"] if flags.has_key( "highlight" ) \ else self._defaults["flags"]["highlight"] script = flags["script"] if flags.has_key( "script" ) \ else self._defaults["flags"]["script"] vocalized = TRUE_FALSE( flags["vocalized"] ) if flags.has_key( "vocalized" ) \ else self._defaults["flags"]["vocalized"] fuzzy = TRUE_FALSE( flags["fuzzy"] ) if flags.has_key( "fuzzy" ) \ else self._defaults["flags"]["fuzzy"] view = flags["view"] if flags.has_key( "view" ) \ else self._defaults["flags"]["view"] # pre-defined views if view == "minimal": #fuzzy = True #page = 25 vocalized = False recitation = None translation = None prev_aya = next_aya = False sura_info = False word_info = False word_synonyms = False word_derivations = False word_vocalizations = False aya_position_info = aya_theme_info = aya_sajda_info = False aya_stat_info = False sura_stat_info = False annotation_aya = annotation_word = False elif view == "normal": prev_aya = next_aya = False sura_info = True word_info = True word_synonyms = False word_derivations = True word_vocalizations = True aya_position_info = aya_theme_info = aya_sajda_info = True aya_stat_info = True sura_stat_info = False annotation_aya = annotation_word = False elif view == "full": prev_aya = next_aya = True sura_info = True word_info = True word_synonyms = True word_derivations = True word_vocalizations = True aya_position_info = aya_theme_info = aya_sajda_info = True aya_stat_info = sura_stat_info = True annotation_aya = annotation_word = False romanization = "iso" elif view == "statistic": prev_aya = next_aya = False sura_info = True word_info = True word_synonyms = False word_derivations = True word_vocalizations = True aya_position_info = True aya_theme_info = aya_sajda_info = False aya_stat_info = True sura_stat_info = True annotation_aya = False annotation_word = False elif view == "linguistic": prev_aya = next_aya = False sura_info = False word_info = True word_synonyms = True word_derivations = True word_vocalizations = True aya_position_info = False aya_theme_info = aya_sajda_info = True aya_stat_info = False sura_stat_info = False annotation_aya = False annotation_word = False romanization = "buckwalter" elif view == "recitation": script = "uthmani" prev_aya = next_aya = True sura_info = True word_info = False word_synonyms = False word_derivations = False word_vocalizations = False aya_position_info = True aya_theme_info = False aya_sajda_info = True aya_stat_info = False sura_stat_info = False annotation_aya = False annotation_word = False else: # if view == custom or undefined prev_aya = TRUE_FALSE( flags["prev_aya"] ) if flags.has_key( "prev_aya" ) \ else self._defaults["flags"]["prev_aya"] next_aya = TRUE_FALSE( flags["next_aya"] ) if flags.has_key( "next_aya" ) \ else self._defaults["flags"]["next_aya"] sura_info = TRUE_FALSE( flags["sura_info"] ) if flags.has_key( "sura_info" ) \ else self._defaults["flags"]["sura_info"] sura_stat_info = TRUE_FALSE( flags["sura_stat_info"] ) if flags.has_key( "sura_stat_info" ) \ else self._defaults["flags"]["sura_stat_info"] word_info = TRUE_FALSE( flags["word_info"] ) if flags.has_key( "word_info" ) \ else self._defaults["flags"]["word_info"] word_synonyms = TRUE_FALSE( flags["word_synonyms"] ) if flags.has_key( "word_synonyms" ) \ else self._defaults["flags"]["word_synonyms"] word_derivations = TRUE_FALSE( flags["word_derivations"] ) if flags.has_key( "word_derivations" ) \ else self._defaults["flags"]["word_derivations"] word_vocalizations = TRUE_FALSE( flags["word_vocalizations"] ) if flags.has_key( "word_vocalizations" ) \ else self._defaults["flags"]["word_vocalizations"] aya_position_info = TRUE_FALSE( flags["aya_position_info"] ) if flags.has_key( "aya_position_info" ) \ else self._defaults["flags"]["aya_position_info"] aya_theme_info = TRUE_FALSE( flags["aya_theme_info"] ) if flags.has_key( "aya_theme_info" ) \ else self._defaults["flags"]["aya_theme_info"] aya_stat_info = TRUE_FALSE( flags["aya_stat_info"] ) if flags.has_key( "aya_stat_info" ) \ else self._defaults["flags"]["aya_stat_info"] aya_sajda_info = TRUE_FALSE( flags["aya_sajda_info"] ) if flags.has_key( "aya_sajda_info" ) \ else self._defaults["flags"]["aya_sajda_info"] annotation_aya = TRUE_FALSE( flags["annotation_aya"] ) if flags.has_key( "annotation_aya" ) \ else self._defaults["flags"]["annotation_aya"] annotation_word = TRUE_FALSE( flags["annotation_word"] ) if flags.has_key( "annotation_word" ) \ else self._defaults["flags"]["annotation_word"] #print query #preprocess query query = query.replace( "\\", "" ) if not isinstance( query, unicode ): query = unicode( query , 'utf8' ) if ":" not in query: query = unicode( transliterate( "buckwalter", query, ignore = "'_\"%*?#~[]{}:>+-|" ) ) #Search SE = self.FQSE if fuzzy else self.QSE res, termz = SE.search_all( query , self._defaults["results_limit"]["aya"], sortedby = sortedby ) terms = [term[1] for term in list( termz )[:self._defaults["maxkeywords"]]] terms_uthmani = map( STANDARD2UTHMANI, terms ) #pagination offset = 1 if offset < 1 else offset; range = self._defaults["minrange"] if range < self._defaults["minrange"] else range; range = self._defaults["maxrange"] if range > self._defaults["maxrange"] else range; interval_end = offset + range - 1 end = interval_end if interval_end < len( res ) else len( res ) start = offset if offset <= len( res ) else -1 reslist = [] if end == 0 or start == -1 else list( res )[start - 1:end] output = {} ## disable annotations for aya words if there is more then one result if annotation_aya and len ( res ) > 1: annotation_aya = False #if True: ## strip vocalization when vocalized = true V = QArabicSymbolsFilter( \ shaping = False, \ tashkil = not vocalized, \ spellerrors = False, \ hamza = False \ ).normalize_all strip_vocalization = QArabicSymbolsFilter( \ shaping = False, \ tashkil = True, \ spellerrors = False, \ hamza = False \ ).normalize_all # highligh function that consider None value and non-definition H = lambda X: self.QSE.highlight( X, terms, highlight ) if highlight != "none" and X else X if X else u"-----" # Numbers are 0 if not defined N = lambda X:X if X else 0 # parse keywords lists , used for Sura names kword = re.compile( u"[^,،]+" ) keywords = lambda phrase: kword.findall( phrase ) ########################################## extend_runtime = res.runtime # Words & Annotations words_output = {"individual":{}} if word_info: matches = 0 docs = 0 nb_vocalizations_globale = 0 cpt = 1; annotation_word_query = u"( 0 " for term in termz : if term[0] == "aya" or term[0] == "aya_": if term[2]: matches += term[2] docs += term[3] if term[0] == "aya_": annotation_word_query += u" OR word:%s " % term[1] else: #if aya annotation_word_query += u" OR normalized:%s " % STANDARD2UTHMANI( term[1] ) if word_vocalizations: vocalizations = vocalization_dict[ strip_vocalization( term[1] ) ] if vocalization_dict.has_key( strip_vocalization( term[1] ) ) \ else [] nb_vocalizations_globale += len( vocalizations ) if word_synonyms: synonyms = syndict[term[1]] if syndict.has_key( term[1] ) \ else [] if word_derivations: lemma = LOCATE( derivedict["word_"], derivedict["lemma"], term[1] ) root = LOCATE( derivedict["word_"], derivedict["root"], term[1] ) if lemma: # if different of none derivations = FILTER_DOUBLES( FIND( derivedict["lemma"], derivedict["word_"], lemma ) ) else: derivations = [] words_output[ "individual" ][ cpt ] = { "word":term[1], "romanization": transliterate( romanization, term[1], ignore = "" , reverse = True ) if romanization in self.DOMAINS["romanization"] else None, "nb_matches":term[2], "nb_ayas":term[3], "nb_vocalizations": len( vocalizations ) if word_vocalizations else 0,#unneeded "vocalizations": vocalizations if word_vocalizations else [], "nb_synonyms": len( synonyms ) if word_synonyms else 0,#unneeded "synonyms": synonyms if word_synonyms else [], "lemma": lemma if word_derivations else "", "root": root if word_derivations else "", "nb_derivations": len( derivations ) if word_derivations else 0, #unneeded "derivations": derivations if word_derivations else [] } cpt += 1 annotation_word_query += u" ) " words_output["global"] = {"nb_words":cpt - 1, "nb_matches":matches, "nb_vocalizations": nb_vocalizations_globale} output["words"] = words_output; #Magic_loop to built queries of Adjacents,translations and annotations in the same time if prev_aya or next_aya or translation or annotation_aya: adja_query = trad_query = annotation_aya_query = u"( 0" for r in reslist : if prev_aya: adja_query += u" OR gid:%s " % unicode( r["gid"] - 1 ) if next_aya: adja_query += u" OR gid:%s " % unicode( r["gid"] + 1 ) if translation: trad_query += u" OR gid:%s " % unicode( r["gid"] ) if annotation_aya: annotation_aya_query += u" OR ( aya_id:%s AND sura_id:%s ) " % ( unicode( r["aya_id"] ) , unicode( r["sura_id"] ) ) adja_query += u" )" trad_query += u" )" + u" AND id:%s " % unicode( translation ) annotation_aya_query += u" )" # Adjacents if prev_aya or next_aya: adja_res = self.QSE.find_extended( adja_query, "gid" ) adja_ayas = {0:{"aya_":u"----", "uth_":u"----", "sura":u"---", "aya_id":0}, 6237:{"aya_":u"----", "uth_":u"----", "sura":u"---", "aya_id":9999}} for adja in adja_res: adja_ayas[adja["gid"]] = {"aya_":adja["aya_"], "uth_":adja["uth_"], "aya_id":adja["aya_id"], "sura":adja["sura"]} extend_runtime += adja_res.runtime #translations if translation: trad_res = self.TSE.find_extended( trad_query, "gid" ) extend_runtime += trad_res.runtime trad_text = {} for tr in trad_res: trad_text[tr["gid"]] = tr["text"] #annotations for aya words if annotation_aya or ( annotation_word and word_info ) : annotation_word_query = annotation_word_query if annotation_word and word_info else u"()" annotation_aya_query = annotation_aya_query if annotation_aya else u"()" annotation_query = annotation_aya_query + u" OR " + annotation_word_query #print annotation_query.encode( "utf-8" ) annot_res = self.WSE.find_extended( annotation_query, "gid" ) extend_runtime += annot_res.runtime ## prepare annotations for use annotations_by_word = {} annotations_by_position = {} for annot in annot_res: if ( annotation_word and word_info ) : if annot["normalized"] in terms_uthmani: if annotations_by_word.has_key( annot["normalized"] ): if annotations_by_word[annot["normalized"]].has_key( annot["word"] ): annotations_by_word[annot["normalized"]][annot["word"]][annot["order"]] = annot; else: annotations_by_word[annot["normalized"]][annot["word"]] = { annot["order"]: annot} ; else: annotations_by_word[annot["normalized"]] = { annot["word"]: { annot["order"]: annot}} if annotation_aya: if annotations_by_position.has_key( ( annot["sura_id"], annot["aya_id"] ) ): annotations_by_position[( annot["sura_id"], annot["aya_id"] )][annot["word_id"]] = annot else: annotations_by_position[( annot["sura_id"], annot["aya_id"] )] = { annot["word_id"]: annot } ## merge word annotations to word output if ( annotation_word and word_info ): for cpt in xrange( 1, len( output["words"]["individual"] ) + 1 ): current_word = STANDARD2UTHMANI( output["words"]["individual"][cpt]["word"] ) #print current_word.encode( "utf-8" ), "=>", annotations_by_word, "=>", list( annot_res ) if annotations_by_word.has_key( current_word ): current_word_annotations = annotations_by_word[ current_word ] output["words"]["individual"][cpt]["annotations"] = current_word_annotations output["words"]["individual"][cpt]["nb_annotations"] = len ( current_word_annotations ) output["runtime"] = round( extend_runtime, 5 ) output["interval"] = { "start":start, "end":end, "total": len( res ), "page": ( ( start - 1 ) / range ) + 1, "nb_pages": ( ( len( res ) - 1 ) / range ) + 1 } output["translation_info"] = {} ### Ayas cpt = start - 1 output["ayas"] = {} for r in reslist : cpt += 1 output["ayas"][ cpt ] = { "identifier": {"gid":r["gid"], "aya_id":r["aya_id"], "sura_id":r["sura_id"], "sura_name":keywords( r["sura"] )[0], }, "aya":{ "id":r["aya_id"], "text": H( V( r["aya_"] ) ) if script == "standard" else H( r["uth_"] ) , "text_no_highlight": V( r["aya_"] ) if script == "standard" else r["uth_"], "translation": trad_text[r["gid"]] if ( translation != "None" and translation and trad_text.has_key( r["gid"] ) ) else None, "recitation": None if not recitation or not self._recitations.has_key( recitation ) \ else u"http://www.everyayah.com/data/" + self._recitations[recitation]["subfolder"].encode( "utf-8" ) + "/%03d%03d.mp3" % ( r["sura_id"], r["aya_id"] ), "prev_aya":{ "id":adja_ayas[r["gid"] - 1]["aya_id"], "sura":adja_ayas[r["gid"] - 1]["sura"], "text": V( adja_ayas[r["gid"] - 1]["aya_"] ) if script == "standard" else adja_ayas[r["gid"] - 1]["uth_"] , } if prev_aya else None , "next_aya":{ "id":adja_ayas[r["gid"] + 1]["aya_id"], "sura":adja_ayas[r["gid"] + 1]["sura"], "text": V( adja_ayas[r["gid"] + 1]["aya_"] ) if script == "standard" else adja_ayas[r["gid"] + 1]["uth_"] , } if next_aya else None , }, "sura": {} if not sura_info else { "name":keywords( r["sura"] )[0] , "id":r["sura_id"], "type": r["sura_type"] , "order":r["sura_order"], "ayas":r["s_a"], "stat":{} if not sura_stat_info else { "words":N( r["s_w"] ), "godnames":N( r["s_g"] ), "letters":N( r["s_l"] ) } }, "position": {} if not aya_position_info else { "manzil":r["manzil"], "juz":r["juz"], "hizb":r["hizb"], "rub":r["rub"] % 4, "page":r["page"], "page_IN":r["page_IN"], "ruku":r["ruku"], }, "theme":{} if not aya_theme_info else { "chapter": r["chapter"], "topic": r["topic"] , "subtopic": r["subtopic"] }, "stat": {} if not aya_stat_info else { "words":N( r["a_w"] ), "letters":N( r["a_l"] ), "godnames":N( r["a_g"] ) } , "sajda":{} if not aya_sajda_info else { "exist":( r["sajda"] == u"نعم" ), "type": r["sajda_type"] if ( r["sajda"] == u"نعم" ) else None, "id":N( r["sajda_id"] ) if ( r["sajda"] == u"نعم" ) else None, }, "annotations": {} if not annotation_aya or not annotations_by_position.has_key( ( r["sura_id"], r["aya_id"] ) ) else annotations_by_position[( r["sura_id"], r["aya_id"] )] } return output