Exemple #1
0
        def derivation(word, leveldist):
            """ search in defined field """
            # define source level index
            if word in derivedict["word_"]: indexsrc = "word_"
            elif word in derivedict["lemma"]: indexsrc = "lemma"
            elif word in derivedict["root"]: indexsrc = "root"
            else: indexsrc = None  # warning
            # define destination level index
            if leveldist == 0: indexdist = "word_"
            elif leveldist == 1: indexdist = "lemma"
            elif leveldist == 2: indexdist = "root"
            else: indexdist = "root"  # new levels

            lst = []
            if indexsrc:  # if index source level is defined
                itm = LOCATE(derivedict[indexsrc], derivedict[indexdist], word)
                if itm:  # if different of none
                    lst = FILTER_DOUBLES(
                        FIND(derivedict[indexdist], derivedict["word_"], itm))
                else:
                    lst = [word]

            return lst
Exemple #2
0
	def _search_aya( self, flags ):
		"""
		return the results of aya search as a dictionary data structure
		"""
		#flags
		query = flags["query"] if flags.has_key( "query" ) \
				else self._defaults["flags"]["query"]
		sortedby = flags["sortedby"] if flags.has_key( "sortedby" ) \
				   else self._defaults["flags"]["sortedby"]
		range = int( flags["perpage"] ) if  flags.has_key( "perpage" )  \
				else flags["range"] if flags.has_key( "range" ) \
									else self._defaults["flags"]["range"]
		## offset = (page-1) * perpage   --  mode paging
		offset = ( ( int( flags["page"] ) - 1 ) * range ) + 1 if flags.has_key( "page" ) \
				 else int( flags["offset"] ) if flags.has_key( "offset" ) \
					  else self._defaults["flags"]["offset"]
		recitation = flags["recitation"] if flags.has_key( "recitation" ) \
					 else self._defaults["flags"]["recitation"]
		translation = flags["translation"] if flags.has_key( "translation" ) \
					  else self._defaults["flags"]["translation"]
		romanization = flags["romanization"] if flags.has_key( "romanization" ) \
					  else self._defaults["flags"]["romanization"]
		highlight = flags["highlight"] if flags.has_key( "highlight" ) \
					else self._defaults["flags"]["highlight"]
		script = flags["script"] if flags.has_key( "script" ) \
				 else self._defaults["flags"]["script"]
		vocalized = TRUE_FALSE( flags["vocalized"] ) if flags.has_key( "vocalized" ) \
					else self._defaults["flags"]["vocalized"]
		fuzzy = TRUE_FALSE( flags["fuzzy"] ) if flags.has_key( "fuzzy" ) \
				else self._defaults["flags"]["fuzzy"]
		view = flags["view"] if flags.has_key( "view" ) \
				else self._defaults["flags"]["view"]

		# pre-defined views
		if view == "minimal":
			#fuzzy = True
			#page = 25
			vocalized = False
			recitation = None
			translation = None
			prev_aya = next_aya = False
			sura_info = False
			word_info = False
			word_synonyms = False
			word_derivations = False
			word_vocalizations = False
			aya_position_info = aya_theme_info = aya_sajda_info = False
			aya_stat_info = False
			sura_stat_info = False
			annotation_aya = annotation_word = False
		elif view == "normal":
			prev_aya = next_aya = False
			sura_info = True
			word_info = True
			word_synonyms = False
			word_derivations = True
			word_vocalizations = True
			aya_position_info = aya_theme_info = aya_sajda_info = True
			aya_stat_info = True
			sura_stat_info = False
			annotation_aya = annotation_word = False
		elif view == "full":
			prev_aya = next_aya = True
			sura_info = True
			word_info = True
			word_synonyms = True
			word_derivations = True
			word_vocalizations = True
			aya_position_info = aya_theme_info = aya_sajda_info = True
			aya_stat_info = sura_stat_info = True
			annotation_aya = annotation_word = False
			romanization = "iso"
		elif view == "statistic":
			prev_aya = next_aya = False
			sura_info = True
			word_info = True
			word_synonyms = False
			word_derivations = True
			word_vocalizations = True
			aya_position_info = True
			aya_theme_info = aya_sajda_info = False
			aya_stat_info = True
			sura_stat_info = True
			annotation_aya = False
			annotation_word = False
		elif view == "linguistic":
			prev_aya = next_aya = False
			sura_info = False
			word_info = True
			word_synonyms = True
			word_derivations = True
			word_vocalizations = True
			aya_position_info = False
			aya_theme_info = aya_sajda_info = True
			aya_stat_info = False
			sura_stat_info = False
			annotation_aya = False
			annotation_word = False
			romanization = "buckwalter"
		elif view == "recitation":
			script = "uthmani"
			prev_aya = next_aya = True
			sura_info = True
			word_info = False
			word_synonyms = False
			word_derivations = False
			word_vocalizations = False
			aya_position_info = True
			aya_theme_info = False
			aya_sajda_info = True
			aya_stat_info = False
			sura_stat_info = False
			annotation_aya = False
			annotation_word = False
		else: # if view == custom or undefined
			prev_aya = TRUE_FALSE( flags["prev_aya"] ) if flags.has_key( "prev_aya" ) \
						else self._defaults["flags"]["prev_aya"]
			next_aya = TRUE_FALSE( flags["next_aya"] ) if flags.has_key( "next_aya" ) \
						else self._defaults["flags"]["next_aya"]
			sura_info = TRUE_FALSE( flags["sura_info"] ) if flags.has_key( "sura_info" ) \
						else self._defaults["flags"]["sura_info"]
			sura_stat_info = TRUE_FALSE( flags["sura_stat_info"] ) if flags.has_key( "sura_stat_info" ) \
						else self._defaults["flags"]["sura_stat_info"]
			word_info = TRUE_FALSE( flags["word_info"] ) if flags.has_key( "word_info" ) \
						else self._defaults["flags"]["word_info"]
			word_synonyms = TRUE_FALSE( flags["word_synonyms"] ) if flags.has_key( "word_synonyms" ) \
						else self._defaults["flags"]["word_synonyms"]
			word_derivations = TRUE_FALSE( flags["word_derivations"] ) if flags.has_key( "word_derivations" ) \
						else self._defaults["flags"]["word_derivations"]
			word_vocalizations = TRUE_FALSE( flags["word_vocalizations"] ) if flags.has_key( "word_vocalizations" ) \
						else self._defaults["flags"]["word_vocalizations"]

			aya_position_info = TRUE_FALSE( flags["aya_position_info"] ) if flags.has_key( "aya_position_info" ) \
								else self._defaults["flags"]["aya_position_info"]
			aya_theme_info = TRUE_FALSE( flags["aya_theme_info"] ) if flags.has_key( "aya_theme_info" ) \
							 else self._defaults["flags"]["aya_theme_info"]
			aya_stat_info = TRUE_FALSE( flags["aya_stat_info"] ) if flags.has_key( "aya_stat_info" ) \
							else self._defaults["flags"]["aya_stat_info"]
			aya_sajda_info = TRUE_FALSE( flags["aya_sajda_info"] ) if flags.has_key( "aya_sajda_info" ) \
							 else self._defaults["flags"]["aya_sajda_info"]
			annotation_aya = TRUE_FALSE( flags["annotation_aya"] ) if flags.has_key( "annotation_aya" ) \
							 else self._defaults["flags"]["annotation_aya"]
			annotation_word = TRUE_FALSE( flags["annotation_word"] ) if flags.has_key( "annotation_word" ) \
							 else self._defaults["flags"]["annotation_word"]

		#print query
		#preprocess query
		query = query.replace( "\\", "" )
		if not isinstance( query, unicode ):
			query = unicode( query , 'utf8' )

		if ":" not in query:
			query = unicode( transliterate( "buckwalter", query, ignore = "'_\"%*?#~[]{}:>+-|" ) )


		#Search
		SE = self.FQSE if fuzzy else self.QSE
		res, termz = SE.search_all( query  , self._defaults["results_limit"]["aya"], sortedby = sortedby )
		terms = [term[1] for term in list( termz )[:self._defaults["maxkeywords"]]]
		terms_uthmani = map( STANDARD2UTHMANI, terms )
		#pagination
		offset = 1 if offset < 1 else offset;
		range = self._defaults["minrange"] if range < self._defaults["minrange"] else range;
		range = self._defaults["maxrange"] if range > self._defaults["maxrange"] else range;
		interval_end = offset + range - 1
		end = interval_end if interval_end < len( res ) else len( res )
		start = offset if offset <= len( res ) else -1
		reslist = [] if end == 0 or start == -1 else list( res )[start - 1:end]
		output = {}

		## disable annotations for aya words if there is more then one result
		if annotation_aya and len ( res ) > 1:
			annotation_aya = False

		#if True:
		## strip vocalization when vocalized = true
		V = QArabicSymbolsFilter( \
								shaping = False, \
								tashkil = not vocalized, \
								spellerrors = False, \
								hamza = False \
								).normalize_all
		strip_vocalization = QArabicSymbolsFilter( \
								shaping = False, \
								tashkil = True, \
								spellerrors = False, \
								hamza = False \
								).normalize_all
		# highligh function that consider None value and non-definition
		H = lambda X:  self.QSE.highlight( X, terms, highlight ) if highlight != "none" and X else X if X else u"-----"
		# Numbers are 0 if not defined
		N = lambda X:X if X else 0
		# parse keywords lists , used for Sura names
		kword = re.compile( u"[^,،]+" )
		keywords = lambda phrase: kword.findall( phrase )
		##########################################
		extend_runtime = res.runtime
		# Words & Annotations
		words_output = {"individual":{}}
		if word_info:
			matches = 0
			docs = 0
			nb_vocalizations_globale = 0
			cpt = 1;
			annotation_word_query = u"( 0 "
			for term in termz :
				if term[0] == "aya" or term[0] == "aya_":
					if term[2]:
						matches += term[2]
					docs += term[3]
					if term[0] == "aya_":
						annotation_word_query += u" OR word:%s " % term[1]
					else: #if aya
						annotation_word_query += u" OR normalized:%s " % STANDARD2UTHMANI( term[1] )
					if word_vocalizations:
						vocalizations = vocalization_dict[ strip_vocalization( term[1] ) ] if vocalization_dict.has_key( strip_vocalization( term[1] ) ) \
										   else []
						nb_vocalizations_globale += len( vocalizations )
					if word_synonyms:
						synonyms = syndict[term[1]] if syndict.has_key( term[1] ) \
										   else []
					if word_derivations:
						lemma = LOCATE( derivedict["word_"], derivedict["lemma"], term[1] )
						root = LOCATE( derivedict["word_"], derivedict["root"], term[1] )
						if lemma:  # if different of none
							derivations = FILTER_DOUBLES( FIND( derivedict["lemma"], derivedict["word_"], lemma ) )
						else:
							derivations = []

					words_output[ "individual" ][ cpt ] = {
															 "word":term[1],
															 "romanization": transliterate( romanization, term[1], ignore = "" , reverse = True ) if romanization in self.DOMAINS["romanization"] else None,
															 "nb_matches":term[2],
															 "nb_ayas":term[3],
															 "nb_vocalizations": len( vocalizations ) if word_vocalizations else 0,#unneeded
															 "vocalizations": vocalizations if word_vocalizations else [],
															 "nb_synonyms": len( synonyms ) if word_synonyms else 0,#unneeded
															 "synonyms": synonyms if word_synonyms else [],
															 "lemma": lemma if word_derivations else "",
															 "root": root if word_derivations else "",
															 "nb_derivations": len( derivations ) if word_derivations else 0, #unneeded
															 "derivations": derivations if word_derivations else []
														 }
					cpt += 1
			annotation_word_query += u" ) "
			words_output["global"] = {"nb_words":cpt - 1, "nb_matches":matches, "nb_vocalizations": nb_vocalizations_globale}
		output["words"] = words_output;
		#Magic_loop to built queries of Adjacents,translations and annotations in the same time
		if prev_aya or next_aya or translation or  annotation_aya:
			adja_query = trad_query = annotation_aya_query = u"( 0"

			for r in reslist :
				if prev_aya: adja_query += u" OR gid:%s " % unicode( r["gid"] - 1 )
				if next_aya: adja_query += u" OR gid:%s " % unicode( r["gid"] + 1 )
				if translation: trad_query += u" OR gid:%s " % unicode( r["gid"] )
				if annotation_aya: annotation_aya_query += u" OR  ( aya_id:%s AND  sura_id:%s ) " % ( unicode( r["aya_id"] ) , unicode( r["sura_id"] ) )

			adja_query += u" )"
			trad_query += u" )" + u" AND id:%s " % unicode( translation )
			annotation_aya_query += u" )"


		# Adjacents
		if prev_aya or next_aya:
			adja_res = self.QSE.find_extended( adja_query, "gid" )
			adja_ayas = {0:{"aya_":u"----", "uth_":u"----", "sura":u"---", "aya_id":0}, 6237:{"aya_":u"----", "uth_":u"----", "sura":u"---", "aya_id":9999}}
			for adja in adja_res:
				adja_ayas[adja["gid"]] = {"aya_":adja["aya_"], "uth_":adja["uth_"], "aya_id":adja["aya_id"], "sura":adja["sura"]}
				extend_runtime += adja_res.runtime

		#translations
		if translation:
			trad_res = self.TSE.find_extended( trad_query, "gid" )
			extend_runtime += trad_res.runtime
			trad_text = {}
			for tr in trad_res:
				trad_text[tr["gid"]] = tr["text"]

		#annotations for aya words
		if annotation_aya or ( annotation_word and word_info ) :
			annotation_word_query = annotation_word_query if annotation_word and word_info else u"()"
			annotation_aya_query = annotation_aya_query if annotation_aya else u"()"
			annotation_query = annotation_aya_query + u" OR  " + annotation_word_query
			#print annotation_query.encode( "utf-8" )
			annot_res = self.WSE.find_extended( annotation_query, "gid" )
			extend_runtime += annot_res.runtime
			## prepare annotations for use
			annotations_by_word = {}
			annotations_by_position = {}
			for annot in annot_res:
				if ( annotation_word and word_info ) :
					if annot["normalized"] in terms_uthmani:
						if annotations_by_word.has_key( annot["normalized"] ):
							if annotations_by_word[annot["normalized"]].has_key( annot["word"] ):
								annotations_by_word[annot["normalized"]][annot["word"]][annot["order"]] = annot;
							else:
								annotations_by_word[annot["normalized"]][annot["word"]] = { annot["order"]: annot} ;
						else:
							annotations_by_word[annot["normalized"]] = { annot["word"]: { annot["order"]: annot}}
				if annotation_aya:
					if annotations_by_position.has_key( ( annot["sura_id"], annot["aya_id"] ) ):
						annotations_by_position[( annot["sura_id"], annot["aya_id"] )][annot["word_id"]] = annot
					else:
						annotations_by_position[( annot["sura_id"], annot["aya_id"] )] = { annot["word_id"]: annot }

		## merge word annotations to word output
		if ( annotation_word and word_info ):
			for cpt in xrange( 1, len( output["words"]["individual"] ) + 1 ):
				current_word = STANDARD2UTHMANI( output["words"]["individual"][cpt]["word"] )
				#print current_word.encode( "utf-8" ), "=>", annotations_by_word, "=>", list( annot_res )
				if annotations_by_word.has_key( current_word ):
					current_word_annotations = annotations_by_word[ current_word ]
					output["words"]["individual"][cpt]["annotations"] = current_word_annotations
					output["words"]["individual"][cpt]["nb_annotations"] = len ( current_word_annotations )

		output["runtime"] = round( extend_runtime, 5 )
		output["interval"] = {
							"start":start,
							"end":end,
							"total": len( res ),
							"page": ( ( start - 1 ) / range ) + 1,
							"nb_pages": ( ( len( res ) - 1 ) / range ) + 1
							}
		output["translation_info"] = {}
		### Ayas
		cpt = start - 1
		output["ayas"] = {}
		for r in reslist :
			cpt += 1
			output["ayas"][ cpt ] = {

					  "identifier": {"gid":r["gid"],
									 "aya_id":r["aya_id"],
									 "sura_id":r["sura_id"],
									 "sura_name":keywords( r["sura"] )[0],
									},

		              "aya":{
		              		"id":r["aya_id"],
		              		"text":   H( V( r["aya_"] ) )  if script == "standard"
		              			else   H( r["uth_"] ) ,
                            "text_no_highlight": V( r["aya_"] )   if script == "standard"
                                  else   r["uth_"],
						"translation": trad_text[r["gid"]] if ( translation != "None" and translation and trad_text.has_key( r["gid"] ) ) else None,
		                	"recitation": None if not recitation or not self._recitations.has_key( recitation ) \
		                				  else u"http://www.everyayah.com/data/" + self._recitations[recitation]["subfolder"].encode( "utf-8" ) + "/%03d%03d.mp3" % ( r["sura_id"], r["aya_id"] ),
		                	"prev_aya":{
						    "id":adja_ayas[r["gid"] - 1]["aya_id"],
						    "sura":adja_ayas[r["gid"] - 1]["sura"],
						    "text": V( adja_ayas[r["gid"] - 1]["aya_"] )  if script == "standard"
		              			else  adja_ayas[r["gid"] - 1]["uth_"] ,
						    } if prev_aya else None
						    ,
		                	"next_aya":{
						    "id":adja_ayas[r["gid"] + 1]["aya_id"],
						    "sura":adja_ayas[r["gid"] + 1]["sura"],
						    "text":  V( adja_ayas[r["gid"] + 1]["aya_"] )  if script == "standard"
		              			else   adja_ayas[r["gid"] + 1]["uth_"] ,
						    } if next_aya else None
						    ,

		              },

		    		"sura": {} if not sura_info
					  else  {
						  "name":keywords( r["sura"] )[0] ,
							  "id":r["sura_id"],
							  "type": r["sura_type"] ,
							  "order":r["sura_order"],
							  "ayas":r["s_a"],
						    "stat":{} if not sura_stat_info
							  	  else	{
										  "words":N( r["s_w"] ),
										  "godnames":N( r["s_g"] ),
										  "letters":N( r["s_l"] )
								      }

		    		},

		                "position": {} if not aya_position_info
		                else {
		                	"manzil":r["manzil"],
		                	"juz":r["juz"],
		                	"hizb":r["hizb"],
		                	"rub":r["rub"] % 4,
		                	"page":r["page"],
		                	"page_IN":r["page_IN"],
		                	"ruku":r["ruku"],
		           	},

		           	"theme":{} if not aya_theme_info
		                else	{
				    		"chapter": r["chapter"],
				    		"topic":  r["topic"] ,
				   		 "subtopic": r["subtopic"]
				 	   },

				"stat":  {} if not aya_stat_info
		                else {
						"words":N( r["a_w"] ),
		    				"letters":N( r["a_l"] ),
		    				"godnames":N( r["a_g"] )
				}       ,

				"sajda":{} if not aya_sajda_info
		                else    {
		    				"exist":( r["sajda"] == u"نعم" ),
		    				"type": r["sajda_type"]  if ( r["sajda"] == u"نعم" ) else None,
		    				"id":N( r["sajda_id"] ) if ( r["sajda"] == u"نعم" ) else None,
		    			},

				"annotations": {} if not annotation_aya or not annotations_by_position.has_key( ( r["sura_id"], r["aya_id"] ) )
							else annotations_by_position[( r["sura_id"], r["aya_id"] )]
		    		}
		return output