コード例 #1
0
def results(page):
    global tmp_text
    global tmp_title
    global tmp_star
    global tmp_min
    global tmp_max
    global tmp_director
    global tmp_lan
    global tmp_country
    global tmp_loc
    global tmp_minyear
    global tmp_maxyear
    global tmp_cats
    global gresults

    # convert the <page> parameter in url to integer.
    if type(page) is not int:
        page = int(page.encode('utf-8'))
        # if the method of request is post (for initial query), store query in local global variables
    # if the method of request is get (for "next" results), extract query contents from client's global variables
    if request.method == 'POST':
        # if has query, strip() all whitespace
        text_query = request.form['query'].strip()
        star_query = request.form['starring'].strip()

        mintime_query = request.form['mintime'].strip()
        if len(mintime_query) != 0:
            mintime_query = int(mintime_query)

        maxtime_query = request.form['maxtime'].strip()
        if len(maxtime_query) != 0:
            maxtime_query = int(maxtime_query)

        director_query = request.form['director'].strip()
        lan_query = request.form['language'].strip()
        country_query = request.form['country'].strip()
        loc_query = request.form['location'].strip()

        minyear_query = request.form['minplottime'].strip()
        if len(minyear_query) != 0:
            minyear_query = int(minyear_query)

        maxyear_query = request.form['maxplottime'].strip()
        if len(maxyear_query) != 0:
            maxyear_query = int(maxyear_query)

        cats_query = request.form['categories'].strip()

        # update global variable template data
        tmp_text = text_query
        tmp_star = star_query
        tmp_min = mintime_query
        tmp_max = maxtime_query
        tmp_director = director_query
        tmp_lan = lan_query
        tmp_country = country_query
        tmp_loc = loc_query
        tmp_minyear = minyear_query
        tmp_maxyear = maxyear_query
        tmp_cats = cats_query
    else:
        # use the current values stored in global variables.
        text_query = tmp_text
        star_query = tmp_star
        mintime_query = tmp_min
        maxtime_query = tmp_max
        director_query = tmp_director
        lan_query = tmp_lan
        country_query = tmp_country
        loc_query = tmp_loc
        minyear_query = tmp_minyear
        maxyear_query = tmp_maxyear
        cats_query = tmp_cats

    # store query values to display in search boxes in UI
    shows = {}
    shows['text'] = text_query
    shows['star'] = star_query
    shows['maxtime'] = maxtime_query
    shows['mintime'] = mintime_query
    shows['director'] = director_query
    shows['lan'] = lan_query
    shows['country'] = country_query
    shows['loc'] = loc_query
    shows['minyear'] = minyear_query
    shows['maxyear'] = maxyear_query
    shows['cats'] = cats_query
    # keep a copy of original text query, in case cull out explicit phrases later
    full_text_query = text_query

    # Create a search object to query our index
    s = Search(index=index_name)

    # Build up your elasticsearch query in piecemeal fashion based on the user's parameters passed in.
    # The search API is "chainable".
    # Each call to search.query method adds criteria to our growing elasticsearch query.
    # You will change this section based on how you want to process the query data input into your interface.

    # set flag to default to indicate all terms have been matched
    all_matched = True

    # compile a Regex pattern to extract explicit phrases enclosed by ""
    pattern = re.compile(r'(?:\B\")(.*?)(?:\b\")')
    phrases = pattern.findall(text_query)
    # get the rest free terms
    text_query = pattern.sub('', text_query).strip()

    # First doing conjunctive search over multiple fields (title and text) using the text_query and phrases passed in
    if len(text_query) + len(phrases) > 0:
        # save deep copies for disjunctive search later
        tmp_s = s.__copy__()
        tmp_phrases = phrases.copy()

        # conjunctive search for text_query AND phrases, with boosted field weight
        if len(text_query) > 0:
            s = s.query('multi_match',
                        query=text_query,
                        type='cross_fields',
                        fields=['title^2', 'text'],
                        operator='and')
        while len(phrases) > 0:
            s = s.query('multi_match',
                        query=phrases.pop(),
                        type='phrase_prefix',
                        fields=['title^2', 'text'])

        # if conjunctive search has no result, doing disjunctive ( text_query OR phrases )
        if s.count() == 0:
            # indicate not all terms are matched
            all_matched = False

            if len(text_query) > 0:
                q = Q('multi_match',
                      query=text_query,
                      type='cross_fields',
                      fields=['title^2', 'text'],
                      operator='or')
            else:
                q = Q('multi_match',
                      query=tmp_phrases.pop(),
                      type='phrase_prefix',
                      fields=['title^2', 'text'])

            while len(tmp_phrases) > 0:
                q |= Q('multi_match',
                       query=tmp_phrases.pop(),
                       type='phrase_prefix',
                       fields=['title^2', 'text'])

            s = tmp_s.query(q)

    # search for multiple fields using chained query (AND)
    if len(mintime_query) > 0:
        s = s.query('range', runtime={'gte': mintime_query})

    if len(maxtime_query) > 0:
        s = s.query('range', runtime={'lte': maxtime_query})

    if len(minyear_query) > 0:
        s = s.query('range', runtime={'gte': minyear_query})

    if len(maxyear_query) > 0:
        s = s.query('range', runtime={'lte': maxyear_query})

    if len(star_query) > 0:
        s = s.query('match', starring=star_query)

    if len(director_query) > 0:
        s = s.query('match', director=director_query)

    if len(lan_query) > 0:
        s = s.query('match', language=lan_query)

    if len(country_query) > 0:
        s = s.query('match', country=country_query)

    if len(loc_query) > 0:
        s = s.query('match', location=loc_query)

    if len(cats_query) > 0:
        s = s.query('match', categories=cats_query)

    # highlight
    s = s.highlight_options(pre_tags='<mark>', post_tags='</mark>')
    s = s.highlight('text', fragment_size=999999999, number_of_fragments=1)
    s = s.highlight('title', fragment_size=999999999, number_of_fragments=1)
    s = s.highlight('starring', fragment_size=999999999, number_of_fragments=1)
    s = s.highlight('director', fragment_size=999999999, number_of_fragments=1)
    s = s.highlight('language', fragment_size=999999999, number_of_fragments=1)
    s = s.highlight('country', fragment_size=999999999, number_of_fragments=1)
    s = s.highlight('location', fragment_size=999999999, number_of_fragments=1)
    s = s.highlight('categories',
                    fragment_size=999999999,
                    number_of_fragments=1)

    # determine the subset of results to display (based on current <page> value)
    start = 0 + (page - 1) * 10
    end = 10 + (page - 1) * 10

    # execute search and return results in specified range.
    response = s[start:end].execute()

    # insert data into response
    resultList = {}
    for hit in response.hits:
        result = {}
        result['score'] = hit.meta.score

        if 'highlight' in hit.meta:
            if 'title' in hit.meta.highlight:
                result['title'] = hit.meta.highlight.title[0]
            else:
                result['title'] = hit.title

            if 'text' in hit.meta.highlight:
                result['text'] = hit.meta.highlight.text[0]
            else:
                result['text'] = hit.text

            if 'starring' in hit.meta.highlight:
                result['starring'] = hit.meta.highlight.starring[0]
            else:
                result['starring'] = hit.starring

            if 'director' in hit.meta.highlight:
                result['director'] = hit.meta.highlight.director[0]
            else:
                result['director'] = hit.director

            if 'language' in hit.meta.highlight:
                result['language'] = hit.meta.highlight.language[0]
            else:
                result['language'] = hit.language

            if 'country' in hit.meta.highlight:
                result['country'] = hit.meta.highlight.country[0]
            else:
                result['country'] = hit.country

            if 'location' in hit.meta.highlight:
                result['location'] = hit.meta.highlight.location[0]
            else:
                result['location'] = hit.location

            if 'categories' in hit.meta.highlight:
                result['categories'] = hit.meta.highlight.categories[0]
            else:
                result['categories'] = hit.categories

        else:
            result['title'] = hit.title
            result['text'] = hit.text
            result['starring'] = hit.starring
            result['director'] = hit.director
            result['language'] = hit.language
            result['country'] = hit.country
            result['location'] = hit.location
            result['categories'] = hit.categories

        resultList[hit.meta.id] = result

    # make the result list available globally
    gresults = resultList

    # get the total number of matching results
    result_num = response.hits.total

    # if we find the results, extract title and text information from doc_data, else do nothing
    if result_num > 0:
        return render_template('page_SERP.html',
                               results=resultList,
                               res_num=result_num,
                               page_num=page,
                               queries=shows,
                               all_matched=all_matched)
    else:
        message = []
        if len(full_text_query) > 0:
            message.append('Unknown search term: ' + full_text_query)
        if len(star_query) > 0:
            message.append('Cannot find star: ' + star_query)
        if len(director_query) > 0:
            message.append('Cannot find director: ' + director_query)
        if len(lan_query) > 0:
            message.append('Cannot find language: ' + lan_query)
        if len(country_query) > 0:
            message.append('Cannot find country: ' + country_query)
        if len(loc_query) > 0:
            message.append('Cannot find location: ' + loc_query)
        if len(cats_query) > 0:
            message.append('Cannot find categories: ' + cats_query)

        return render_template('page_SERP.html',
                               results=message,
                               res_num=result_num,
                               page_num=page,
                               queries=shows)