예제 #1
def do_search(keywords):

    global user_top_20_database

    # Fetch the current session
    request_session = request.environ["beaker.session"]
    # Fetch the users email for their session
    user_email = request_session.get("user_email", "Anonymous")

    if reduce(and_, map(lambda c: c in math_chars, keywords)):
        result = None
            result = eval(keywords.replace("^", "**").replace("[", "(").replace("]", ")"))
            return result_template(
				<p> {{keywords}} = {{result}} </p>
        except Exception as e:

            # A list of all keywords from the search query.
    keyword_list = map(str.lower, keywords.split())
    keywords = keyword_list
    # -----------------------------------------------------------------------
    counted_keyword_list = [(keyword_list.count(x), x) for x in set(keyword_list)]
    # Sort the list in descending order of frequency.
    counted_keyword_list.sort(key=wordCount, reverse=1)

    page = request.query.get("page")
    if user_email <> "anonymous" and page == None:
        # Fetch the top 20 list for that users email
        user_top_20 = user_top_20_database.get(user_email)

        if user_top_20 != None:
            # Add to the top 20 list and update totals.
            # Iterate through the counted keyword list.
            for keywords1 in counted_keyword_list:
                # If any keywords are already in the top 20 list, merge them into the top 20 list.
                if any(keywords1[1] in element for element in user_top_20):
                    # Iterator to keep track of which keyword in the top 20 list we are at.
                    i = 0
                    # Iterate through the keyword pairs and add the values from the counted_keyword_list into the top20 list.
                    for keywords2 in user_top_20:
                        # If the keywords match.
                        if keywords2[1] == keywords1[1]:
                            # Save the count value of the user_top_20 version.
                            keyword_count = keywords2[0]
                            # Delete the old user_top_20 keyword and count.
                            del user_top_20[i]
                            # Add the keyword with updated count to the front of the top_20 list.
                            user_top_20.insert(0, ((keywords1[0] + keyword_count), keywords1[1]))
                            # Iterate
                        i = i + 1

                        # If the word isn't already in the top 20 list add it.

                    # Organize the top 20 list in decending order by the frequency of a keyword.
            user_top_20.sort(key=wordCount, reverse=1)

            # Update the database of user search history
            user_top_20_database["user_email"] = user_top_20

            # If the user_top_20 list is longer than 20 keywords, trim it.
            # while len(user_top_20) > 20:
            # 	del user_top_20[-1]

    # ------------------------------------------------------------------------

    # Grab the first keyword that was inputted by the user
    if keyword_list == []:
        results_list = []
        return generate_page_results(1, results_list, [], user_email)

    if page == None:
        page = 1
        page = int(page)

    db = DBHandler()

    # Get the word_ids through a getter in the database
    word_ids = []
    ignored_words = set(

    for keyword in keyword_list:
        if keyword in ignored_words:

        # Get the doc_ids from the word_ids in the database
    list_of_doc_id_lists = []
    for word_id in word_ids:
        if word_id == None:

            # Find lists of doc_ids that intersect with each other, this will give us doc ids that contain both keywords
    intersecting_doc_ids = find_intersections(list_of_doc_id_lists)

    # Get the url_ranks from pagerank in the database
    ranks = db.get_pageranks(intersecting_doc_ids)

    # Zip the doc_ids with the corresponding url_ranks to make ranked_doc_ids
    ranked_doc_ids = zip(ranks, intersecting_doc_ids)

    # Sort the ranked_doc_ids to make sorted_doc_ids and get the sorted_urls from the database
    ranked_sorted_doc_ids = sorted(ranked_doc_ids, key=itemgetter(0))
    results_list = map(itemgetter(0), db.get_urls(map(itemgetter(1), ranked_sorted_doc_ids)))
    return generate_page_results(page, results_list, keyword_list, user_email)
예제 #2
def do_search(keywords):

    global user_top_20_database

    # Fetch the current session
    request_session = request.environ['beaker.session']
    # Fetch the users email for their session
    user_email = request_session.get('user_email', 'Anonymous')

    if reduce(and_, map(lambda c: c in math_chars, keywords)):
        result = None
            result = eval(
                keywords.replace('^', '**').replace('[',
                                                    '(').replace(']', ')'))
            return result_template(
                user_email, keywords,
				<p> {{keywords}} = {{result}} </p>
        except Exception as e:

    # A list of all keywords from the search query.
    keyword_list = map(str.lower, keywords.split())
    keywords = keyword_list
    counted_keyword_list = [(keyword_list.count(x), x)
                            for x in set(keyword_list)]
    # Sort the list in descending order of frequency.
    counted_keyword_list.sort(key=wordCount, reverse=1)

    page = request.query.get('page')
    if user_email <> 'anonymous' and page == None:
        # Fetch the top 20 list for that users email
        user_top_20 = user_top_20_database.get(user_email)

        if user_top_20 != None:
            # Add to the top 20 list and update totals.
            # Iterate through the counted keyword list.
            for keywords1 in counted_keyword_list:
                # If any keywords are already in the top 20 list, merge them into the top 20 list.
                if any(keywords1[1] in element for element in user_top_20):
                    # Iterator to keep track of which keyword in the top 20 list we are at.
                    i = 0
                    # Iterate through the keyword pairs and add the values from the counted_keyword_list into the top20 list.
                    for keywords2 in user_top_20:
                        # If the keywords match.
                        if keywords2[1] == keywords1[1]:
                            # Save the count value of the user_top_20 version.
                            keyword_count = keywords2[0]
                            # Delete the old user_top_20 keyword and count.
                            del user_top_20[i]
                            # Add the keyword with updated count to the front of the top_20 list.
                                ((keywords1[0] + keyword_count), keywords1[1]))
                        # Iterate
                        i = i + 1

                # If the word isn't already in the top 20 list add it.

            # Organize the top 20 list in decending order by the frequency of a keyword.
            user_top_20.sort(key=wordCount, reverse=1)

            # Update the database of user search history
            user_top_20_database["user_email"] = user_top_20

            # If the user_top_20 list is longer than 20 keywords, trim it.
            # while len(user_top_20) > 20:
            #	del user_top_20[-1]


# Grab the first keyword that was inputted by the user
    if keyword_list == []:
        results_list = []
        return generate_page_results(1, results_list, [], user_email)

    if page == None:
        page = 1
        page = int(page)

    db = DBHandler()

    # Get the word_ids through a getter in the database
    word_ids = []
    ignored_words = set([

    for keyword in keyword_list:
        if keyword in ignored_words:

    # Get the doc_ids from the word_ids in the database
    list_of_doc_id_lists = []
    for word_id in word_ids:
        if word_id == None:

    # Find lists of doc_ids that intersect with each other, this will give us doc ids that contain both keywords
    intersecting_doc_ids = find_intersections(list_of_doc_id_lists)

    # Get the url_ranks from pagerank in the database
    ranks = db.get_pageranks(intersecting_doc_ids)

    # Zip the doc_ids with the corresponding url_ranks to make ranked_doc_ids
    ranked_doc_ids = zip(ranks, intersecting_doc_ids)

    # Sort the ranked_doc_ids to make sorted_doc_ids and get the sorted_urls from the database
    ranked_sorted_doc_ids = sorted(ranked_doc_ids, key=itemgetter(0))
    results_list = map(itemgetter(0),
                       db.get_urls(map(itemgetter(1), ranked_sorted_doc_ids)))
    return generate_page_results(page, results_list, keyword_list, user_email)