def post(self, request, *args, **kwargs): inputs = self.serializer_class(data=request.data) inputs.is_valid(raise_exception=True) payload = inputs.validated_data keyword_weight_mapping = getKeyword(payload["text"], model=payload["algorithm"], num=payload["num_of_keywords"]) if payload["wiki_filter"]: wiki_keyword_redirect_mapping, keyword_weight_mapping = wikifilter( keyword_weight_mapping) keywords = normalize(keyword_weight_mapping) return Response(keyword_weight_mapping)
def fetchTopics(algorithm): init_time_while = time.time() lak_data = getData() lak_data['abstitle'] = lak_data['title'] + " " + lak_data['abstract'] text = ' '.join(list(lak_data['abstitle'].values)) print(text) keywords = getKeyword(text, algorithm, 30) print(keywords) keywords_noquotes = str(keywords).replace("'", "") keywords_noquotes = keywords_noquotes.replace("(", "") keywords_noquotes = keywords_noquotes.replace(")", "") wikis = str(wikifilter(keywords)[1]) wikis_noquotes = wikis.replace("'", "") wikis_noquotes = wikis_noquotes.replace("(", "") wikis_noquotes = wikis_noquotes.replace(")", "") try: up.uses_netloc.append("postgres") url = up.urlparse( "postgres://*****:*****@ruby.db.elephantsql.com:5432/yrrhzsue" ) conn = psycopg2.connect(database=url.path[1:], user=url.username, password=url.password, host=url.hostname, port=url.port) cursor = conn.cursor() postgreSQL_insert_Query = "insert into Topics(year,topics,algorithm,keywords) values('" + str( "all years") + "','" + str(wikis_noquotes) + "','" + str( algorithm) + "','" + str(keywords_noquotes) + "')" cursor.execute(postgreSQL_insert_Query) conn.commit() except (Exception, psycopg2.Error) as error: print("Error while fetching data from PostgreSQL", error) finally: #closing database connection. if (conn): cursor.close() conn.close() print("PostgreSQL connection is closed") fin_time_while = time.time() print("Execution time (while loop): ", (fin_time_while - init_time_while)) return "success"
def generate_short_term_model(user_id, source): blacklisted_keywords = list( BlacklistedKeyword.objects.filter(user_id=user_id).values_list( "keyword__name", flat=True)) if source == ShortTermInterest.TWITTER: tweet_candidates = Tweet.objects.filter(user_id=user_id, used_in_calc=False) month_wise_text = {} for tweet in tweet_candidates: key = f"{tweet.created_at.month}_{tweet.created_at.year}" if key not in month_wise_text: month_wise_text[key] = "" month_wise_text[key] = f"{month_wise_text[key]} {tweet.full_text}" for key, text in month_wise_text.items(): month, year = key.split("_") try: keywords = getKeyword(text or "", model="Yake", num=20) except: # silencing errors like # interests/Keyword_Extractor/utils/datarepresentation.py:106: RuntimeWarning: Mean of empty slice continue print(f"got keywords {keywords}") if not len(keywords.keys()): print("No keywords found") continue wiki_keyword_redirect_mapping, keyword_weight_mapping = wikifilter( keywords) print(keyword_weight_mapping) if not len(keyword_weight_mapping.keys()): print("No keywords found in weight mapping") continue keywords = normalize(keyword_weight_mapping) for keyword, weight in keywords.items(): original_keyword_name = wiki_keyword_redirect_mapping.get( keyword, keyword) keyword = keyword.lower() if keyword in blacklisted_keywords: print("Skipping {} as its blacklisted".format(keyword)) continue keyword_instance, created = Keyword.objects.get_or_create( name=keyword.lower()) if created: print("getting wiki categories") categories = wikicategory(keyword) for category in categories: category_instance, _ = Category.objects.get_or_create( name=category) keyword_instance.categories.add(category_instance) keyword_instance.save() try: original_keywords = json.loads( keyword_instance.original_keywords) except: original_keywords = [] original_keywords.append(original_keyword_name.lower()) keyword_instance.original_keywords = json.dumps( list(set(original_keywords))) keyword_instance.save() s_interest, _ = ShortTermInterest.objects.update_or_create( user_id=user_id, keyword=keyword_instance, model_month=month, model_year=year, defaults={ "source": source, "weight": weight }, ) for t in tweet_candidates.filter(full_text__icontains=keyword): s_interest.tweets.add(t) tweet_candidates.update(used_in_calc=True) if source == ShortTermInterest.SCHOLAR: paper_candidates = Paper.objects.filter(user_id=user_id, used_in_calc=False) year_wise_text = {} for paper in paper_candidates: if paper.year not in year_wise_text: year_wise_text[paper.year] = "" year_wise_text[ paper. year] = f"{year_wise_text[paper.year]} {paper.title} {paper.abstract}" for year, text in year_wise_text.items(): try: keywords = getKeyword(text, model="SingleRank", num=20) except: # silencing errors like # interests/Keyword_Extractor/utils/datarepresentation.py:106: RuntimeWarning: Mean of empty slice continue print(f"got keywords {keywords}") if not len(keywords.keys()): print("No keywords found") continue wiki_keyword_redirect_mapping, keyword_weight_mapping = wikifilter( keywords) if not len(keyword_weight_mapping.keys()): print("No keywords found in weight mapping") continue keywords = normalize(keyword_weight_mapping) for keyword, weight in keywords.items(): original_keyword_name = wiki_keyword_redirect_mapping.get( keyword, keyword) keyword = keyword.lower() if keyword in blacklisted_keywords: print("Skipping {} as its blacklisted".format(keyword)) continue keyword_instance, created = Keyword.objects.get_or_create( name=keyword.lower()) if created: print("getting wiki categories") categories = wikicategory(keyword) for category in categories: category_instance, _ = Category.objects.get_or_create( name=category) keyword_instance.categories.add(category_instance) keyword_instance.save() try: original_keywords = json.loads( keyword_instance.original_keywords) except: original_keywords = [] original_keywords.append(original_keyword_name.lower()) keyword_instance.original_keywords = json.dumps( list(set(original_keywords))) keyword_instance.save() s_interest, _ = ShortTermInterest.objects.update_or_create( user_id=user_id, keyword=keyword_instance, model_month=1, model_year=year, defaults={ "source": source, "weight": weight }, ) for p in paper_candidates.filter( Q(title__icontains=keyword) | Q(abstract__icontains=keyword)): s_interest.papers.add(p) paper_candidates.update(used_in_calc=True)
def get_recommended_tweets(tags): user_interest_model_list = [] # creates a list of user interest model full_result = [] for tag in tags: extra_kwargs = {} geo_code = generate_geo_code(tag) if geo_code is not None: extra_kwargs['geocode'] = geo_code language = tag.get("lang", None) if language is not None: extra_kwargs['lang'] = language user_interest_model_list.append(tag["text"]) response = API.search( q=tag["text"], tweet_mode="extended", count=tag["n_tweets"], # count=25, **extra_kwargs) results = [ extract_tweet_from_response(x, tag) for x in response["statuses"] ] full_result.extend(results) # TODO: # 1. Get five tweets for each user interest model and put it in an array # 2. Take the full_text of each tweet and perform the keywords extraction function # 2.1 use full_result[0].get("full_text") to get the text of the tweet # 2.2 extracted_keywords = getKeyword("tweet full_text", "TopicRank") # -> returns an object { "Timesheet": "5" } -> need to use the key # 3. Take the extracted_keywords and tags and calculate similarity # 4. Sort the list according to score # Extract unique tweets according to their IDs unique_tweets = {each['id_str']: each for each in full_result}.values() print(len(full_result)) print(len(unique_tweets)) tweets_with_scores = [] for result in unique_tweets: text = result.get("full_text") algorithm = "Yake" extract_keywords_from_tweet = getKeyword(text, algorithm) # wiki_keyword_redirect_mapping, keywords_extracted = wikifilter(extract_keywords_from_tweet) # keywords_list = list(keywords_extracted.keys()) keywords_list = list(extract_keywords_from_tweet.keys()) # # uncomment "score" before DOCKER deployment score = round((get_interest_similarity_score( user_interest_model_list, keywords_list) or 0) * 100, 2) # # comment "score" before DOCKER deployment # score = round((random.random() or 0) * 100, 2) if score > 40: result["score"] = score tweets_with_scores.append(result) sorted_list = sorted(tweets_with_scores, key=lambda k: k['score'], reverse=True) return sorted_list