def generate_long_term_model(user_id): print("updating long term model for {}".format(user_id)) short_term_model = ShortTermInterest.objects.filter(user_id=user_id, used_in_calc=False) short_term_data = { item.keyword.name: item.weight for item in short_term_model } long_term_data = { item.keyword.name: item.weight for item in LongTermInterest.objects.filter(user_id=user_id) } if not short_term_data: return new_data = update_interest_models(short_term_data, long_term_data) LongTermInterest.objects.filter(user_id=user_id).delete() short_term_model.update(used_in_calc=True) for keyword, weight in new_data.items(): print(keyword, weight) keyword_instance, created = Keyword.objects.get_or_create( name=keyword.lower()) if created: print("getting wiki categories") categories = wikicategory(keyword) for category in categories: category_instance, _ = Category.objects.get_or_create( name=category) keyword_instance.categories.add(category_instance) keyword_instance.save() else: print("Keyword found in db") print("keyword obtained") long_term_model = LongTermInterest.objects.create(**{ "user_id": user_id, "keyword": keyword_instance, "weight": weight }) tweet_list = [ tweet for tweet in Tweet.objects.filter( user_id=user_id, full_text__icontains=keyword.lower()) ] paper_list = [ paper for paper in Paper.objects.filter( Q(user_id=user_id) & (Q(abstract__icontains=keyword.lower()) | Q(title__icontains=keyword.lower()))) ] if tweet_list: long_term_model.tweets.add(*tweet_list) long_term_model.source = ShortTermInterest.TWITTER if paper_list: long_term_model.papers.add(*paper_list) long_term_model.source = ShortTermInterest.SCHOLAR if tweet_list and paper_list: long_term_model.source = ( f"{ShortTermInterest.SCHOLAR} & {ShortTermInterest.TWITTER}") long_term_model.save()
def post(self, request, *args, **kwargs): inputs = self.serializer_class(data=request.data) inputs.is_valid(raise_exception=True) payload = inputs.validated_data categories = {} for interest in payload["interests"]: category = wikicategory(interest) categories[interest] = category return Response(categories)
def generate_short_term_model(user_id, source): blacklisted_keywords = list( BlacklistedKeyword.objects.filter(user_id=user_id).values_list( "keyword__name", flat=True)) if source == ShortTermInterest.TWITTER: tweet_candidates = Tweet.objects.filter(user_id=user_id, used_in_calc=False) month_wise_text = {} for tweet in tweet_candidates: key = f"{tweet.created_at.month}_{tweet.created_at.year}" if key not in month_wise_text: month_wise_text[key] = "" month_wise_text[key] = f"{month_wise_text[key]} {tweet.full_text}" for key, text in month_wise_text.items(): month, year = key.split("_") try: keywords = getKeyword(text or "", model="Yake", num=20) except: # silencing errors like # interests/Keyword_Extractor/utils/datarepresentation.py:106: RuntimeWarning: Mean of empty slice continue print(f"got keywords {keywords}") if not len(keywords.keys()): print("No keywords found") continue wiki_keyword_redirect_mapping, keyword_weight_mapping = wikifilter( keywords) print(keyword_weight_mapping) if not len(keyword_weight_mapping.keys()): print("No keywords found in weight mapping") continue keywords = normalize(keyword_weight_mapping) for keyword, weight in keywords.items(): original_keyword_name = wiki_keyword_redirect_mapping.get( keyword, keyword) keyword = keyword.lower() if keyword in blacklisted_keywords: print("Skipping {} as its blacklisted".format(keyword)) continue keyword_instance, created = Keyword.objects.get_or_create( name=keyword.lower()) if created: print("getting wiki categories") categories = wikicategory(keyword) for category in categories: category_instance, _ = Category.objects.get_or_create( name=category) keyword_instance.categories.add(category_instance) keyword_instance.save() try: original_keywords = json.loads( keyword_instance.original_keywords) except: original_keywords = [] original_keywords.append(original_keyword_name.lower()) keyword_instance.original_keywords = json.dumps( list(set(original_keywords))) keyword_instance.save() s_interest, _ = ShortTermInterest.objects.update_or_create( user_id=user_id, keyword=keyword_instance, model_month=month, model_year=year, defaults={ "source": source, "weight": weight }, ) for t in tweet_candidates.filter(full_text__icontains=keyword): s_interest.tweets.add(t) tweet_candidates.update(used_in_calc=True) if source == ShortTermInterest.SCHOLAR: paper_candidates = Paper.objects.filter(user_id=user_id, used_in_calc=False) year_wise_text = {} for paper in paper_candidates: if paper.year not in year_wise_text: year_wise_text[paper.year] = "" year_wise_text[ paper. year] = f"{year_wise_text[paper.year]} {paper.title} {paper.abstract}" for year, text in year_wise_text.items(): try: keywords = getKeyword(text, model="SingleRank", num=20) except: # silencing errors like # interests/Keyword_Extractor/utils/datarepresentation.py:106: RuntimeWarning: Mean of empty slice continue print(f"got keywords {keywords}") if not len(keywords.keys()): print("No keywords found") continue wiki_keyword_redirect_mapping, keyword_weight_mapping = wikifilter( keywords) if not len(keyword_weight_mapping.keys()): print("No keywords found in weight mapping") continue keywords = normalize(keyword_weight_mapping) for keyword, weight in keywords.items(): original_keyword_name = wiki_keyword_redirect_mapping.get( keyword, keyword) keyword = keyword.lower() if keyword in blacklisted_keywords: print("Skipping {} as its blacklisted".format(keyword)) continue keyword_instance, created = Keyword.objects.get_or_create( name=keyword.lower()) if created: print("getting wiki categories") categories = wikicategory(keyword) for category in categories: category_instance, _ = Category.objects.get_or_create( name=category) keyword_instance.categories.add(category_instance) keyword_instance.save() try: original_keywords = json.loads( keyword_instance.original_keywords) except: original_keywords = [] original_keywords.append(original_keyword_name.lower()) keyword_instance.original_keywords = json.dumps( list(set(original_keywords))) keyword_instance.save() s_interest, _ = ShortTermInterest.objects.update_or_create( user_id=user_id, keyword=keyword_instance, model_month=1, model_year=year, defaults={ "source": source, "weight": weight }, ) for p in paper_candidates.filter( Q(title__icontains=keyword) | Q(abstract__icontains=keyword)): s_interest.papers.add(p) paper_candidates.update(used_in_calc=True)