def results(page): global tmp_text global tmp_title global tmp_star global tmp_min global tmp_max global tmp_director global tmp_lan global tmp_country global tmp_loc global tmp_minyear global tmp_maxyear global tmp_cats global gresults # convert the <page> parameter in url to integer. if type(page) is not int: page = int(page.encode('utf-8')) # if the method of request is post (for initial query), store query in local global variables # if the method of request is get (for "next" results), extract query contents from client's global variables if request.method == 'POST': # if has query, strip() all whitespace text_query = request.form['query'].strip() star_query = request.form['starring'].strip() mintime_query = request.form['mintime'].strip() if len(mintime_query) != 0: mintime_query = int(mintime_query) maxtime_query = request.form['maxtime'].strip() if len(maxtime_query) != 0: maxtime_query = int(maxtime_query) director_query = request.form['director'].strip() lan_query = request.form['language'].strip() country_query = request.form['country'].strip() loc_query = request.form['location'].strip() minyear_query = request.form['minplottime'].strip() if len(minyear_query) != 0: minyear_query = int(minyear_query) maxyear_query = request.form['maxplottime'].strip() if len(maxyear_query) != 0: maxyear_query = int(maxyear_query) cats_query = request.form['categories'].strip() # update global variable template data tmp_text = text_query tmp_star = star_query tmp_min = mintime_query tmp_max = maxtime_query tmp_director = director_query tmp_lan = lan_query tmp_country = country_query tmp_loc = loc_query tmp_minyear = minyear_query tmp_maxyear = maxyear_query tmp_cats = cats_query else: # use the current values stored in global variables. text_query = tmp_text star_query = tmp_star mintime_query = tmp_min maxtime_query = tmp_max director_query = tmp_director lan_query = tmp_lan country_query = tmp_country loc_query = tmp_loc minyear_query = tmp_minyear maxyear_query = tmp_maxyear cats_query = tmp_cats # store query values to display in search boxes in UI shows = {} shows['text'] = text_query shows['star'] = star_query shows['maxtime'] = maxtime_query shows['mintime'] = mintime_query shows['director'] = director_query shows['lan'] = lan_query shows['country'] = country_query shows['loc'] = loc_query shows['minyear'] = minyear_query shows['maxyear'] = maxyear_query shows['cats'] = cats_query # keep a copy of original text query, in case cull out explicit phrases later full_text_query = text_query # Create a search object to query our index s = Search(index=index_name) # Build up your elasticsearch query in piecemeal fashion based on the user's parameters passed in. # The search API is "chainable". # Each call to search.query method adds criteria to our growing elasticsearch query. # You will change this section based on how you want to process the query data input into your interface. # set flag to default to indicate all terms have been matched all_matched = True # compile a Regex pattern to extract explicit phrases enclosed by "" pattern = re.compile(r'(?:\B\")(.*?)(?:\b\")') phrases = pattern.findall(text_query) # get the rest free terms text_query = pattern.sub('', text_query).strip() # First doing conjunctive search over multiple fields (title and text) using the text_query and phrases passed in if len(text_query) + len(phrases) > 0: # save deep copies for disjunctive search later tmp_s = s.__copy__() tmp_phrases = phrases.copy() # conjunctive search for text_query AND phrases, with boosted field weight if len(text_query) > 0: s = s.query('multi_match', query=text_query, type='cross_fields', fields=['title^2', 'text'], operator='and') while len(phrases) > 0: s = s.query('multi_match', query=phrases.pop(), type='phrase_prefix', fields=['title^2', 'text']) # if conjunctive search has no result, doing disjunctive ( text_query OR phrases ) if s.count() == 0: # indicate not all terms are matched all_matched = False if len(text_query) > 0: q = Q('multi_match', query=text_query, type='cross_fields', fields=['title^2', 'text'], operator='or') else: q = Q('multi_match', query=tmp_phrases.pop(), type='phrase_prefix', fields=['title^2', 'text']) while len(tmp_phrases) > 0: q |= Q('multi_match', query=tmp_phrases.pop(), type='phrase_prefix', fields=['title^2', 'text']) s = tmp_s.query(q) # search for multiple fields using chained query (AND) if len(mintime_query) > 0: s = s.query('range', runtime={'gte': mintime_query}) if len(maxtime_query) > 0: s = s.query('range', runtime={'lte': maxtime_query}) if len(minyear_query) > 0: s = s.query('range', runtime={'gte': minyear_query}) if len(maxyear_query) > 0: s = s.query('range', runtime={'lte': maxyear_query}) if len(star_query) > 0: s = s.query('match', starring=star_query) if len(director_query) > 0: s = s.query('match', director=director_query) if len(lan_query) > 0: s = s.query('match', language=lan_query) if len(country_query) > 0: s = s.query('match', country=country_query) if len(loc_query) > 0: s = s.query('match', location=loc_query) if len(cats_query) > 0: s = s.query('match', categories=cats_query) # highlight s = s.highlight_options(pre_tags='<mark>', post_tags='</mark>') s = s.highlight('text', fragment_size=999999999, number_of_fragments=1) s = s.highlight('title', fragment_size=999999999, number_of_fragments=1) s = s.highlight('starring', fragment_size=999999999, number_of_fragments=1) s = s.highlight('director', fragment_size=999999999, number_of_fragments=1) s = s.highlight('language', fragment_size=999999999, number_of_fragments=1) s = s.highlight('country', fragment_size=999999999, number_of_fragments=1) s = s.highlight('location', fragment_size=999999999, number_of_fragments=1) s = s.highlight('categories', fragment_size=999999999, number_of_fragments=1) # determine the subset of results to display (based on current <page> value) start = 0 + (page - 1) * 10 end = 10 + (page - 1) * 10 # execute search and return results in specified range. response = s[start:end].execute() # insert data into response resultList = {} for hit in response.hits: result = {} result['score'] = hit.meta.score if 'highlight' in hit.meta: if 'title' in hit.meta.highlight: result['title'] = hit.meta.highlight.title[0] else: result['title'] = hit.title if 'text' in hit.meta.highlight: result['text'] = hit.meta.highlight.text[0] else: result['text'] = hit.text if 'starring' in hit.meta.highlight: result['starring'] = hit.meta.highlight.starring[0] else: result['starring'] = hit.starring if 'director' in hit.meta.highlight: result['director'] = hit.meta.highlight.director[0] else: result['director'] = hit.director if 'language' in hit.meta.highlight: result['language'] = hit.meta.highlight.language[0] else: result['language'] = hit.language if 'country' in hit.meta.highlight: result['country'] = hit.meta.highlight.country[0] else: result['country'] = hit.country if 'location' in hit.meta.highlight: result['location'] = hit.meta.highlight.location[0] else: result['location'] = hit.location if 'categories' in hit.meta.highlight: result['categories'] = hit.meta.highlight.categories[0] else: result['categories'] = hit.categories else: result['title'] = hit.title result['text'] = hit.text result['starring'] = hit.starring result['director'] = hit.director result['language'] = hit.language result['country'] = hit.country result['location'] = hit.location result['categories'] = hit.categories resultList[hit.meta.id] = result # make the result list available globally gresults = resultList # get the total number of matching results result_num = response.hits.total # if we find the results, extract title and text information from doc_data, else do nothing if result_num > 0: return render_template('page_SERP.html', results=resultList, res_num=result_num, page_num=page, queries=shows, all_matched=all_matched) else: message = [] if len(full_text_query) > 0: message.append('Unknown search term: ' + full_text_query) if len(star_query) > 0: message.append('Cannot find star: ' + star_query) if len(director_query) > 0: message.append('Cannot find director: ' + director_query) if len(lan_query) > 0: message.append('Cannot find language: ' + lan_query) if len(country_query) > 0: message.append('Cannot find country: ' + country_query) if len(loc_query) > 0: message.append('Cannot find location: ' + loc_query) if len(cats_query) > 0: message.append('Cannot find categories: ' + cats_query) return render_template('page_SERP.html', results=message, res_num=result_num, page_num=page, queries=shows)