コード例 #1
0
 def post(self, request, *args, **kwargs):
     inputs = self.serializer_class(data=request.data)
     inputs.is_valid(raise_exception=True)
     payload = inputs.validated_data
     keyword_weight_mapping = getKeyword(payload["text"],
                                         model=payload["algorithm"],
                                         num=payload["num_of_keywords"])
     if payload["wiki_filter"]:
         wiki_keyword_redirect_mapping, keyword_weight_mapping = wikifilter(
             keyword_weight_mapping)
     keywords = normalize(keyword_weight_mapping)
     return Response(keyword_weight_mapping)
コード例 #2
0
def fetchTopics(algorithm):
    init_time_while = time.time()
    lak_data = getData()
    lak_data['abstitle'] = lak_data['title'] + " " + lak_data['abstract']
    text = ' '.join(list(lak_data['abstitle'].values))
    print(text)
    keywords = getKeyword(text, algorithm, 30)
    print(keywords)
    keywords_noquotes = str(keywords).replace("'", "")
    keywords_noquotes = keywords_noquotes.replace("(", "")
    keywords_noquotes = keywords_noquotes.replace(")", "")
    wikis = str(wikifilter(keywords)[1])
    wikis_noquotes = wikis.replace("'", "")
    wikis_noquotes = wikis_noquotes.replace("(", "")
    wikis_noquotes = wikis_noquotes.replace(")", "")
    try:
        up.uses_netloc.append("postgres")
        url = up.urlparse(
            "postgres://*****:*****@ruby.db.elephantsql.com:5432/yrrhzsue"
        )
        conn = psycopg2.connect(database=url.path[1:],
                                user=url.username,
                                password=url.password,
                                host=url.hostname,
                                port=url.port)
        cursor = conn.cursor()
        postgreSQL_insert_Query = "insert into Topics(year,topics,algorithm,keywords) values('" + str(
            "all years") + "','" + str(wikis_noquotes) + "','" + str(
                algorithm) + "','" + str(keywords_noquotes) + "')"
        cursor.execute(postgreSQL_insert_Query)
        conn.commit()

    except (Exception, psycopg2.Error) as error:
        print("Error while fetching data from PostgreSQL", error)
    finally:
        #closing database connection.
        if (conn):
            cursor.close()
            conn.close()
            print("PostgreSQL connection is closed")

    fin_time_while = time.time()
    print("Execution time (while loop): ", (fin_time_while - init_time_while))
    return "success"
コード例 #3
0
def generate_short_term_model(user_id, source):
    blacklisted_keywords = list(
        BlacklistedKeyword.objects.filter(user_id=user_id).values_list(
            "keyword__name", flat=True))

    if source == ShortTermInterest.TWITTER:
        tweet_candidates = Tweet.objects.filter(user_id=user_id,
                                                used_in_calc=False)
        month_wise_text = {}

        for tweet in tweet_candidates:
            key = f"{tweet.created_at.month}_{tweet.created_at.year}"
            if key not in month_wise_text:
                month_wise_text[key] = ""
            month_wise_text[key] = f"{month_wise_text[key]} {tweet.full_text}"

        for key, text in month_wise_text.items():
            month, year = key.split("_")
            try:
                keywords = getKeyword(text or "", model="Yake", num=20)
            except:
                # silencing errors like
                # interests/Keyword_Extractor/utils/datarepresentation.py:106: RuntimeWarning: Mean of empty slice
                continue
            print(f"got keywords {keywords}")
            if not len(keywords.keys()):
                print("No keywords found")
                continue
            wiki_keyword_redirect_mapping, keyword_weight_mapping = wikifilter(
                keywords)
            print(keyword_weight_mapping)
            if not len(keyword_weight_mapping.keys()):
                print("No keywords found in weight mapping")
                continue
            keywords = normalize(keyword_weight_mapping)
            for keyword, weight in keywords.items():
                original_keyword_name = wiki_keyword_redirect_mapping.get(
                    keyword, keyword)
                keyword = keyword.lower()
                if keyword in blacklisted_keywords:
                    print("Skipping {} as its blacklisted".format(keyword))
                    continue
                keyword_instance, created = Keyword.objects.get_or_create(
                    name=keyword.lower())
                if created:
                    print("getting wiki categories")
                    categories = wikicategory(keyword)
                    for category in categories:
                        category_instance, _ = Category.objects.get_or_create(
                            name=category)
                        keyword_instance.categories.add(category_instance)
                    keyword_instance.save()
                try:
                    original_keywords = json.loads(
                        keyword_instance.original_keywords)
                except:
                    original_keywords = []
                original_keywords.append(original_keyword_name.lower())
                keyword_instance.original_keywords = json.dumps(
                    list(set(original_keywords)))
                keyword_instance.save()

                s_interest, _ = ShortTermInterest.objects.update_or_create(
                    user_id=user_id,
                    keyword=keyword_instance,
                    model_month=month,
                    model_year=year,
                    defaults={
                        "source": source,
                        "weight": weight
                    },
                )
                for t in tweet_candidates.filter(full_text__icontains=keyword):
                    s_interest.tweets.add(t)
        tweet_candidates.update(used_in_calc=True)

    if source == ShortTermInterest.SCHOLAR:
        paper_candidates = Paper.objects.filter(user_id=user_id,
                                                used_in_calc=False)
        year_wise_text = {}
        for paper in paper_candidates:
            if paper.year not in year_wise_text:
                year_wise_text[paper.year] = ""
            year_wise_text[
                paper.
                year] = f"{year_wise_text[paper.year]} {paper.title} {paper.abstract}"

        for year, text in year_wise_text.items():
            try:
                keywords = getKeyword(text, model="SingleRank", num=20)
            except:
                # silencing errors like
                # interests/Keyword_Extractor/utils/datarepresentation.py:106: RuntimeWarning: Mean of empty slice
                continue
            print(f"got keywords {keywords}")
            if not len(keywords.keys()):
                print("No keywords found")
                continue
            wiki_keyword_redirect_mapping, keyword_weight_mapping = wikifilter(
                keywords)
            if not len(keyword_weight_mapping.keys()):
                print("No keywords found in weight mapping")
                continue
            keywords = normalize(keyword_weight_mapping)
            for keyword, weight in keywords.items():
                original_keyword_name = wiki_keyword_redirect_mapping.get(
                    keyword, keyword)
                keyword = keyword.lower()
                if keyword in blacklisted_keywords:
                    print("Skipping {} as its blacklisted".format(keyword))
                    continue
                keyword_instance, created = Keyword.objects.get_or_create(
                    name=keyword.lower())
                if created:
                    print("getting wiki categories")
                    categories = wikicategory(keyword)
                    for category in categories:
                        category_instance, _ = Category.objects.get_or_create(
                            name=category)
                        keyword_instance.categories.add(category_instance)
                    keyword_instance.save()
                try:
                    original_keywords = json.loads(
                        keyword_instance.original_keywords)
                except:
                    original_keywords = []
                original_keywords.append(original_keyword_name.lower())
                keyword_instance.original_keywords = json.dumps(
                    list(set(original_keywords)))

                keyword_instance.save()

                s_interest, _ = ShortTermInterest.objects.update_or_create(
                    user_id=user_id,
                    keyword=keyword_instance,
                    model_month=1,
                    model_year=year,
                    defaults={
                        "source": source,
                        "weight": weight
                    },
                )
                for p in paper_candidates.filter(
                        Q(title__icontains=keyword)
                        | Q(abstract__icontains=keyword)):
                    s_interest.papers.add(p)
        paper_candidates.update(used_in_calc=True)
コード例 #4
0
def get_recommended_tweets(tags):
    user_interest_model_list = []  # creates a list of user interest model
    full_result = []
    for tag in tags:
        extra_kwargs = {}
        geo_code = generate_geo_code(tag)
        if geo_code is not None:
            extra_kwargs['geocode'] = geo_code
        language = tag.get("lang", None)
        if language is not None:
            extra_kwargs['lang'] = language
        user_interest_model_list.append(tag["text"])
        response = API.search(
            q=tag["text"],
            tweet_mode="extended",
            count=tag["n_tweets"],
            # count=25,
            **extra_kwargs)

        results = [
            extract_tweet_from_response(x, tag) for x in response["statuses"]
        ]
        full_result.extend(results)

    # TODO:
    #   1. Get five tweets for each user interest model and put it in an array
    #   2. Take the full_text of each tweet and perform the keywords extraction function
    #       2.1 use full_result[0].get("full_text") to get the text of the tweet
    #       2.2 extracted_keywords = getKeyword("tweet full_text", "TopicRank")
    #       -> returns an object { "Timesheet": "5" } -> need to use the key
    #   3. Take the extracted_keywords and tags and calculate similarity
    #   4. Sort the list according to score

    # Extract unique tweets according to their IDs
    unique_tweets = {each['id_str']: each for each in full_result}.values()

    print(len(full_result))
    print(len(unique_tweets))

    tweets_with_scores = []
    for result in unique_tweets:
        text = result.get("full_text")
        algorithm = "Yake"
        extract_keywords_from_tweet = getKeyword(text, algorithm)
        # wiki_keyword_redirect_mapping, keywords_extracted = wikifilter(extract_keywords_from_tweet)
        # keywords_list = list(keywords_extracted.keys())
        keywords_list = list(extract_keywords_from_tweet.keys())
        # # uncomment "score" before DOCKER deployment
        score = round((get_interest_similarity_score(
            user_interest_model_list, keywords_list) or 0) * 100, 2)

        # # comment "score" before DOCKER deployment
        # score = round((random.random() or 0) * 100, 2)
        if score > 40:
            result["score"] = score
            tweets_with_scores.append(result)

    sorted_list = sorted(tweets_with_scores,
                         key=lambda k: k['score'],
                         reverse=True)
    return sorted_list