def insert_keywords_from_search_history(self): min_freq = config.get("keywordsextractor", "search_history_autosuggest_min_freq") search_log_data = self.extract_search_history(int(min_freq)) logger.info("No of keywords from search history = %s"%len(search_log_data)) search_history = config.get("redis", "search_history_keywords") redis_connect = redis_connector.get_instance() return redis_connect.insert_set(search_log_data, search_history)
def insert_keywords_from_search_history_and_documents_rank(self): #Add the term phrases to the Redis based on the count in descending order #self.term_frequency keywords = [] for key, value in sorted(self.term_frequency_dict.items(), key=itemgetter(1), reverse=True): keywords.append(key) keywords_rank = config.get("redis", "key_search_history_rank") redis_connect = redis_connector.get_instance() return redis_connect.insert_set(keywords, keywords_rank)
def get_suggested_query_list(in_query): no_of_suggested_queries = 8 #no of related suggestions being returned redis_obj = redis_connector.get_instance() query = utils.clean_text(in_query) suggested_queries = [] related_keys = redis_obj.query_db_with_scores( query) #returns dictionary with suggestions as keys and rank as value if related_keys != None: related_keys.pop( in_query, None) #remove the query from related suggestions if exists related_keys.pop( query, None) # remove the clean query from related suggestions if exists sorted_related_keys = OrderedDict((sorted(related_keys.items(), key=lambda kv: kv[1]))) suggested_queries = list(sorted_related_keys.keys()) return suggested_queries[:no_of_suggested_queries] else: return suggested_queries
def insert_keywords_from_documents(self): data = self.extract_keywords() terms = data['term_phrase'].tolist() count = data['count'].tolist() #Append the keywords from the documents in term_frequency_dict for i, value in enumerate(terms): if value.lower() not in self.term_frequency_dict.keys(): self.term_frequency_dict[value.lower()] = count[i] else: self.term_frequency_dict[value.lower()] = self.term_frequency_dict[value.lower()] + count[i] keywords = list(data["term_phrase"]) keywords = [keyword.lower() for keyword in keywords] logger.info("No of keywords extracted from documents = %s"%len(keywords)) # dummy = keywords # dummy.sort() # outfile = open("NewCompletefilter.txt", "w") # outfile.write("\n".join(dummy)) # outfile.close() key_suggestion_keywords = config.get("redis", "key_suggestion_keywords") redis_connect = redis_connector.get_instance() return redis_connect.insert_set(keywords, key_suggestion_keywords)
def suggest(is_authenticated, is_authorized): """ This endpoint connects to redis server and gets the matched keywords Args : Query to be searched in the file Returns : list of related keywords uploaded in redis from the mock data.""" if not is_authorized: return render_template("unauthorized_user.html"), 401 data_json = request.get_json() payload_data = {} payload_data['query'] = data_json.get('query') for field in payload_data: response_msg = check_field_validations(payload_data, field) if response_msg: return Response(json.dumps(response_msg), status=400, mimetype='application/json') try: redis_obj = redis_connector.get_instance() if payload_data['query']: query = utils.clean_text(payload_data['query']) related_keys = redis_obj.query_db( query) #we get only matched keys from redis. related_keywords = [] if related_keys is not None: if len(related_keys) != 0: for word in related_keys: related_keywords.append(utils.clean_text(str(word))) Data = {} Data.update({ "query": payload_data['query'], "related_keywords": related_keywords[:5] }) js = json.dumps(Data) resp = Response(js, status=200, mimetype='application/json') else: Data = {} Data.update({ "query": payload_data['query'], "failure": "sorry We dont have a matching keyword" }) js = json.dumps(Data) resp = Response(js, status=200, mimetype='application/json') else: Data = {} Data.update({ "query": payload_data['query'], "failure": "No keywords available in redis" }) js = json.dumps(Data) resp = Response(js, status=200, mimetype='application/json') except Exception as e: logger.exception(e) resp = Response(str(e), status=400, mimetype='application/json') return resp