def do_search(keywords): global user_top_20_database # Fetch the current session request_session = request.environ["beaker.session"] # Fetch the users email for their session user_email = request_session.get("user_email", "Anonymous") if reduce(and_, map(lambda c: c in math_chars, keywords)): result = None try: result = eval(keywords.replace("^", "**").replace("[", "(").replace("]", ")")) return result_template( user_email, keywords, template( """ <p> {{keywords}} = {{result}} </p> """, keywords=keywords, result=result, ), ) except Exception as e: pass # A list of all keywords from the search query. keyword_list = map(str.lower, keywords.split()) keywords = keyword_list # ----------------------------------------------------------------------- counted_keyword_list = [(keyword_list.count(x), x) for x in set(keyword_list)] # Sort the list in descending order of frequency. counted_keyword_list.sort(key=wordCount, reverse=1) page = request.query.get("page") if user_email <> "anonymous" and page == None: # Fetch the top 20 list for that users email user_top_20 = user_top_20_database.get(user_email) if user_top_20 != None: # Add to the top 20 list and update totals. # Iterate through the counted keyword list. for keywords1 in counted_keyword_list: # If any keywords are already in the top 20 list, merge them into the top 20 list. if any(keywords1[1] in element for element in user_top_20): # Iterator to keep track of which keyword in the top 20 list we are at. i = 0 # Iterate through the keyword pairs and add the values from the counted_keyword_list into the top20 list. for keywords2 in user_top_20: # If the keywords match. if keywords2[1] == keywords1[1]: # Save the count value of the user_top_20 version. keyword_count = keywords2[0] # Delete the old user_top_20 keyword and count. del user_top_20[i] # Add the keyword with updated count to the front of the top_20 list. user_top_20.insert(0, ((keywords1[0] + keyword_count), keywords1[1])) # Iterate i = i + 1 # If the word isn't already in the top 20 list add it. else: user_top_20.append(keywords1) # Organize the top 20 list in decending order by the frequency of a keyword. user_top_20.sort(key=wordCount, reverse=1) # Update the database of user search history user_top_20_database["user_email"] = user_top_20 # If the user_top_20 list is longer than 20 keywords, trim it. # while len(user_top_20) > 20: # del user_top_20[-1] # ------------------------------------------------------------------------ # Grab the first keyword that was inputted by the user if keyword_list == []: results_list = [] return generate_page_results(1, results_list, [], user_email) if page == None: page = 1 else: page = int(page) db = DBHandler() # Get the word_ids through a getter in the database word_ids = [] ignored_words = set( [ "", "the", "of", "at", "on", "in", "is", "it", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "and", "or", ] ) for keyword in keyword_list: if keyword in ignored_words: continue word_ids.append(db.get_word_id(keyword)) # Get the doc_ids from the word_ids in the database list_of_doc_id_lists = [] for word_id in word_ids: if word_id == None: list_of_doc_id_lists.append([]) else: list_of_doc_id_lists.append(db.get_doc_ids(word_id)) # Find lists of doc_ids that intersect with each other, this will give us doc ids that contain both keywords intersecting_doc_ids = find_intersections(list_of_doc_id_lists) # Get the url_ranks from pagerank in the database ranks = db.get_pageranks(intersecting_doc_ids) # Zip the doc_ids with the corresponding url_ranks to make ranked_doc_ids ranked_doc_ids = zip(ranks, intersecting_doc_ids) # Sort the ranked_doc_ids to make sorted_doc_ids and get the sorted_urls from the database ranked_sorted_doc_ids = sorted(ranked_doc_ids, key=itemgetter(0)) results_list = map(itemgetter(0), db.get_urls(map(itemgetter(1), ranked_sorted_doc_ids))) return generate_page_results(page, results_list, keyword_list, user_email)
def do_search(keywords): global user_top_20_database # Fetch the current session request_session = request.environ['beaker.session'] # Fetch the users email for their session user_email = request_session.get('user_email', 'Anonymous') if reduce(and_, map(lambda c: c in math_chars, keywords)): result = None try: result = eval( keywords.replace('^', '**').replace('[', '(').replace(']', ')')) return result_template( user_email, keywords, template(''' <p> {{keywords}} = {{result}} </p> ''', keywords=keywords, result=result)) except Exception as e: pass # A list of all keywords from the search query. keyword_list = map(str.lower, keywords.split()) keywords = keyword_list #----------------------------------------------------------------------- counted_keyword_list = [(keyword_list.count(x), x) for x in set(keyword_list)] # Sort the list in descending order of frequency. counted_keyword_list.sort(key=wordCount, reverse=1) page = request.query.get('page') if user_email <> 'anonymous' and page == None: # Fetch the top 20 list for that users email user_top_20 = user_top_20_database.get(user_email) if user_top_20 != None: # Add to the top 20 list and update totals. # Iterate through the counted keyword list. for keywords1 in counted_keyword_list: # If any keywords are already in the top 20 list, merge them into the top 20 list. if any(keywords1[1] in element for element in user_top_20): # Iterator to keep track of which keyword in the top 20 list we are at. i = 0 # Iterate through the keyword pairs and add the values from the counted_keyword_list into the top20 list. for keywords2 in user_top_20: # If the keywords match. if keywords2[1] == keywords1[1]: # Save the count value of the user_top_20 version. keyword_count = keywords2[0] # Delete the old user_top_20 keyword and count. del user_top_20[i] # Add the keyword with updated count to the front of the top_20 list. user_top_20.insert( 0, ((keywords1[0] + keyword_count), keywords1[1])) # Iterate i = i + 1 # If the word isn't already in the top 20 list add it. else: user_top_20.append(keywords1) # Organize the top 20 list in decending order by the frequency of a keyword. user_top_20.sort(key=wordCount, reverse=1) # Update the database of user search history user_top_20_database["user_email"] = user_top_20 # If the user_top_20 list is longer than 20 keywords, trim it. # while len(user_top_20) > 20: # del user_top_20[-1] #------------------------------------------------------------------------ # Grab the first keyword that was inputted by the user if keyword_list == []: results_list = [] return generate_page_results(1, results_list, [], user_email) if page == None: page = 1 else: page = int(page) db = DBHandler() # Get the word_ids through a getter in the database word_ids = [] ignored_words = set([ '', 'the', 'of', 'at', 'on', 'in', 'is', 'it', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'and', 'or', ]) for keyword in keyword_list: if keyword in ignored_words: continue word_ids.append(db.get_word_id(keyword)) # Get the doc_ids from the word_ids in the database list_of_doc_id_lists = [] for word_id in word_ids: if word_id == None: list_of_doc_id_lists.append([]) else: list_of_doc_id_lists.append(db.get_doc_ids(word_id)) # Find lists of doc_ids that intersect with each other, this will give us doc ids that contain both keywords intersecting_doc_ids = find_intersections(list_of_doc_id_lists) # Get the url_ranks from pagerank in the database ranks = db.get_pageranks(intersecting_doc_ids) # Zip the doc_ids with the corresponding url_ranks to make ranked_doc_ids ranked_doc_ids = zip(ranks, intersecting_doc_ids) # Sort the ranked_doc_ids to make sorted_doc_ids and get the sorted_urls from the database ranked_sorted_doc_ids = sorted(ranked_doc_ids, key=itemgetter(0)) results_list = map(itemgetter(0), db.get_urls(map(itemgetter(1), ranked_sorted_doc_ids))) return generate_page_results(page, results_list, keyword_list, user_email)