Esempio n. 1
0
def do_search(keywords):

    global user_top_20_database

    # Fetch the current session
    request_session = request.environ["beaker.session"]
    # Fetch the users email for their session
    user_email = request_session.get("user_email", "Anonymous")

    if reduce(and_, map(lambda c: c in math_chars, keywords)):
        result = None
        try:
            result = eval(keywords.replace("^", "**").replace("[", "(").replace("]", ")"))
            return result_template(
                user_email,
                keywords,
                template(
                    """
				<p> {{keywords}} = {{result}} </p>
				""",
                    keywords=keywords,
                    result=result,
                ),
            )
        except Exception as e:
            pass

            # A list of all keywords from the search query.
    keyword_list = map(str.lower, keywords.split())
    keywords = keyword_list
    # -----------------------------------------------------------------------
    counted_keyword_list = [(keyword_list.count(x), x) for x in set(keyword_list)]
    # Sort the list in descending order of frequency.
    counted_keyword_list.sort(key=wordCount, reverse=1)

    page = request.query.get("page")
    if user_email <> "anonymous" and page == None:
        # Fetch the top 20 list for that users email
        user_top_20 = user_top_20_database.get(user_email)

        if user_top_20 != None:
            # Add to the top 20 list and update totals.
            # Iterate through the counted keyword list.
            for keywords1 in counted_keyword_list:
                # If any keywords are already in the top 20 list, merge them into the top 20 list.
                if any(keywords1[1] in element for element in user_top_20):
                    # Iterator to keep track of which keyword in the top 20 list we are at.
                    i = 0
                    # Iterate through the keyword pairs and add the values from the counted_keyword_list into the top20 list.
                    for keywords2 in user_top_20:
                        # If the keywords match.
                        if keywords2[1] == keywords1[1]:
                            # Save the count value of the user_top_20 version.
                            keyword_count = keywords2[0]
                            # Delete the old user_top_20 keyword and count.
                            del user_top_20[i]
                            # Add the keyword with updated count to the front of the top_20 list.
                            user_top_20.insert(0, ((keywords1[0] + keyword_count), keywords1[1]))
                            # Iterate
                        i = i + 1

                        # If the word isn't already in the top 20 list add it.
                else:
                    user_top_20.append(keywords1)

                    # Organize the top 20 list in decending order by the frequency of a keyword.
            user_top_20.sort(key=wordCount, reverse=1)

            # Update the database of user search history
            user_top_20_database["user_email"] = user_top_20

            # If the user_top_20 list is longer than 20 keywords, trim it.
            # while len(user_top_20) > 20:
            # 	del user_top_20[-1]

    # ------------------------------------------------------------------------

    # Grab the first keyword that was inputted by the user
    if keyword_list == []:
        results_list = []
        return generate_page_results(1, results_list, [], user_email)

    if page == None:
        page = 1
    else:
        page = int(page)

    db = DBHandler()

    # Get the word_ids through a getter in the database
    word_ids = []
    ignored_words = set(
        [
            "",
            "the",
            "of",
            "at",
            "on",
            "in",
            "is",
            "it",
            "a",
            "b",
            "c",
            "d",
            "e",
            "f",
            "g",
            "h",
            "i",
            "j",
            "k",
            "l",
            "m",
            "n",
            "o",
            "p",
            "q",
            "r",
            "s",
            "t",
            "u",
            "v",
            "w",
            "x",
            "y",
            "z",
            "and",
            "or",
        ]
    )

    for keyword in keyword_list:
        if keyword in ignored_words:
            continue
        word_ids.append(db.get_word_id(keyword))

        # Get the doc_ids from the word_ids in the database
    list_of_doc_id_lists = []
    for word_id in word_ids:
        if word_id == None:
            list_of_doc_id_lists.append([])
        else:
            list_of_doc_id_lists.append(db.get_doc_ids(word_id))

            # Find lists of doc_ids that intersect with each other, this will give us doc ids that contain both keywords
    intersecting_doc_ids = find_intersections(list_of_doc_id_lists)

    # Get the url_ranks from pagerank in the database
    ranks = db.get_pageranks(intersecting_doc_ids)

    # Zip the doc_ids with the corresponding url_ranks to make ranked_doc_ids
    ranked_doc_ids = zip(ranks, intersecting_doc_ids)

    # Sort the ranked_doc_ids to make sorted_doc_ids and get the sorted_urls from the database
    ranked_sorted_doc_ids = sorted(ranked_doc_ids, key=itemgetter(0))
    results_list = map(itemgetter(0), db.get_urls(map(itemgetter(1), ranked_sorted_doc_ids)))
    return generate_page_results(page, results_list, keyword_list, user_email)
Esempio n. 2
0
def do_search(keywords):

    global user_top_20_database

    # Fetch the current session
    request_session = request.environ['beaker.session']
    # Fetch the users email for their session
    user_email = request_session.get('user_email', 'Anonymous')

    if reduce(and_, map(lambda c: c in math_chars, keywords)):
        result = None
        try:
            result = eval(
                keywords.replace('^', '**').replace('[',
                                                    '(').replace(']', ')'))
            return result_template(
                user_email, keywords,
                template('''
				<p> {{keywords}} = {{result}} </p>
				''',
                         keywords=keywords,
                         result=result))
        except Exception as e:
            pass

    # A list of all keywords from the search query.
    keyword_list = map(str.lower, keywords.split())
    keywords = keyword_list
    #-----------------------------------------------------------------------
    counted_keyword_list = [(keyword_list.count(x), x)
                            for x in set(keyword_list)]
    # Sort the list in descending order of frequency.
    counted_keyword_list.sort(key=wordCount, reverse=1)

    page = request.query.get('page')
    if user_email <> 'anonymous' and page == None:
        # Fetch the top 20 list for that users email
        user_top_20 = user_top_20_database.get(user_email)

        if user_top_20 != None:
            # Add to the top 20 list and update totals.
            # Iterate through the counted keyword list.
            for keywords1 in counted_keyword_list:
                # If any keywords are already in the top 20 list, merge them into the top 20 list.
                if any(keywords1[1] in element for element in user_top_20):
                    # Iterator to keep track of which keyword in the top 20 list we are at.
                    i = 0
                    # Iterate through the keyword pairs and add the values from the counted_keyword_list into the top20 list.
                    for keywords2 in user_top_20:
                        # If the keywords match.
                        if keywords2[1] == keywords1[1]:
                            # Save the count value of the user_top_20 version.
                            keyword_count = keywords2[0]
                            # Delete the old user_top_20 keyword and count.
                            del user_top_20[i]
                            # Add the keyword with updated count to the front of the top_20 list.
                            user_top_20.insert(
                                0,
                                ((keywords1[0] + keyword_count), keywords1[1]))
                        # Iterate
                        i = i + 1

                # If the word isn't already in the top 20 list add it.
                else:
                    user_top_20.append(keywords1)

            # Organize the top 20 list in decending order by the frequency of a keyword.
            user_top_20.sort(key=wordCount, reverse=1)

            # Update the database of user search history
            user_top_20_database["user_email"] = user_top_20

            # If the user_top_20 list is longer than 20 keywords, trim it.
            # while len(user_top_20) > 20:
            #	del user_top_20[-1]


#------------------------------------------------------------------------

# Grab the first keyword that was inputted by the user
    if keyword_list == []:
        results_list = []
        return generate_page_results(1, results_list, [], user_email)

    if page == None:
        page = 1
    else:
        page = int(page)

    db = DBHandler()

    # Get the word_ids through a getter in the database
    word_ids = []
    ignored_words = set([
        '',
        'the',
        'of',
        'at',
        'on',
        'in',
        'is',
        'it',
        'a',
        'b',
        'c',
        'd',
        'e',
        'f',
        'g',
        'h',
        'i',
        'j',
        'k',
        'l',
        'm',
        'n',
        'o',
        'p',
        'q',
        'r',
        's',
        't',
        'u',
        'v',
        'w',
        'x',
        'y',
        'z',
        'and',
        'or',
    ])

    for keyword in keyword_list:
        if keyword in ignored_words:
            continue
        word_ids.append(db.get_word_id(keyword))

    # Get the doc_ids from the word_ids in the database
    list_of_doc_id_lists = []
    for word_id in word_ids:
        if word_id == None:
            list_of_doc_id_lists.append([])
        else:
            list_of_doc_id_lists.append(db.get_doc_ids(word_id))

    # Find lists of doc_ids that intersect with each other, this will give us doc ids that contain both keywords
    intersecting_doc_ids = find_intersections(list_of_doc_id_lists)

    # Get the url_ranks from pagerank in the database
    ranks = db.get_pageranks(intersecting_doc_ids)

    # Zip the doc_ids with the corresponding url_ranks to make ranked_doc_ids
    ranked_doc_ids = zip(ranks, intersecting_doc_ids)

    # Sort the ranked_doc_ids to make sorted_doc_ids and get the sorted_urls from the database
    ranked_sorted_doc_ids = sorted(ranked_doc_ids, key=itemgetter(0))
    results_list = map(itemgetter(0),
                       db.get_urls(map(itemgetter(1), ranked_sorted_doc_ids)))
    return generate_page_results(page, results_list, keyword_list, user_email)