def search(): DEBUG_INFO = "[ROBLY] webapp.py - /search - " #logging.debug(DEBUG_INFO + "POST request = " + request.form) query = request.form['search_box'] query.strip() logging.debug(DEBUG_INFO + "query = " + query) #Create query parser object with query string provided by user query_parser = QueryParser(query) search_query, search_context = query_parser.extract_context_and_search_query( ) if len(query) > 1: logging.debug(DEBUG_INFO + "SearchQuery = " + search_query) logging.debug(DEBUG_INFO + "SearchContext = " + search_context) #query should now have been pruned of unnecessary words and characters #Get info from database try: mongo = WebsiteMongo() logging.debug( DEBUG_INFO + "Attempthing to connect with mongodb searchquery='{}' " "context=''".format(search_query, search_context)) websites, stats = mongo.search_websites(search_query, search_context) #convert microseconds to seconds seconds = stats.time_micros / 1000000 #Use duckduckgo api if no results found if len(websites) < 1: websites = get_websites_from_duckduckgo(search_query) else: #Add page rank to full text search score try: for w in websites: w.score += w.pagerank except: print( "[ROBLY] Problem calculating search result score with page rank" ) #Sort list of websites by score from operator import attrgetter websites.sort(key=attrgetter('score'), reverse=False) return render_template('search_results.html', search_results=websites, stats=stats, seconds=seconds) except Exception as e: logging.error( DEBUG_INFO + "Error searching mongodb with the searchquery '{} - {}'". format(search_query, str(e))) return redirect(url_for('index')) else: return redirect(url_for('index'))
def search(): DEBUG_INFO = "[ROBLY] webapp.py - /search - " #logging.debug(DEBUG_INFO + "POST request = " + request.form) query = request.form['search_box'] query.strip() logging.debug(DEBUG_INFO + "query = " + query) #Create query parser object with query string provided by user query_parser = QueryParser(query) search_query, search_context = query_parser.extract_context_and_search_query() if len(query) > 1: logging.debug(DEBUG_INFO + "SearchQuery = " + search_query) logging.debug(DEBUG_INFO + "SearchContext = " + search_context) #query should now have been pruned of unnecessary words and characters #Get info from database try: mongo = WebsiteMongo() logging.debug(DEBUG_INFO + "Attempthing to connect with mongodb searchquery='{}' " "context=''".format(search_query, search_context)) websites, stats = mongo.search_websites(search_query, search_context) #convert microseconds to seconds seconds = stats.time_micros / 1000000 #Use duckduckgo api if no results found if len(websites) < 1: websites = get_websites_from_duckduckgo(search_query) else: #Add page rank to full text search score try: for w in websites: w.score += w.pagerank except: print("[ROBLY] Problem calculating search result score with page rank") #Sort list of websites by score from operator import attrgetter websites.sort(key=attrgetter('score'), reverse=False) return render_template('search_results.html', search_results=websites, stats=stats, seconds=seconds) except Exception as e: logging.error(DEBUG_INFO + "Error searching mongodb with the searchquery '{} - {}'".format(search_query, str(e))) return redirect(url_for('index')) else: return redirect(url_for('index'))
def test_search_query_and_context_with_invalid_context(self): qp = QueryParser("randomContext:my custom search") qp.extract_context_and_search_query() self.assertEqual("", qp.search_context) self.assertEqual("custom search", qp.search_query)
def test_search_query_and_context_with_doc(self): qp = QueryParser("doc:my custom search") qp.extract_context_and_search_query() self.assertEqual("doc", qp.search_context) self.assertEqual("custom search", qp.search_query)
def test_remove_stop_words(self): tokens = ["and", "any", "covert", "are"] expected_tokens = ["covert"] qp = QueryParser() self.assertEqual(expected_tokens, remove_stop_words(tokens))
def test_stem_token_list(self): tokens = ["actually", "running", "covert", "money", "rabbits"] expected_tokens = ["actual", "run", "covert", "money", "rabbit"] qp = QueryParser() self.assertEqual(expected_tokens, stem_token_list(tokens))
def test_tokenise_string(self): qp = QueryParser("test") test_string = "this is a test string" expected_list = ["this", "is", "a", "test", "string"] self.assertEqual(expected_list, tokenise_string(test_string))
def test_remove_unwanted_chars(self): qp = QueryParser() test_string = "[}a>..,b()>,<c::d~ef~~" expected_string = "abcdef" self.assertEqual(expected_string, remove_unwanted_chars(test_string))