Beispiel #1
0
def generate_recommendations(data, title):
    df = data
    df = df[['Name', 'Cause', 'City', 'Details']]
    df.fillna("null", inplace=True)

    count = CountVectorizer()

    df['Keywords'] = ""
    df['bag_of_words'] = ""

    for index, row in df.iterrows():
        details = row['Details']
        causes = row['Cause']
        r = Rake()
        r.extract_keywords_from_text(details)
        keywords_dict_scores = r.get_word_degrees()
        keywords = list(keywords_dict_scores.keys())
        keywordString = ""
        for keyword in keywords:
            keywordString = keywordString + " " + keyword
        x = Rake()
        x.extract_keywords_from_text(causes)
        keywords_cause = x.get_word_degrees()
        keys = list(keywords_cause.keys())
        causeString = ""
        for cause in keys:
            causeString = causeString + " " + cause

        keywordString += " "
        keywordString += causeString
        row['Keywords'] = keywordString

        cityString = ""

        cities = row['City']
        if cities is not "null":
            cityString = cities.lower()

        row['bag_of_words'] = cityString + keywordString

    df.drop(columns=['Cause', 'City', 'Details', 'Keywords'], inplace=True)
    df.set_index('Name', inplace=True)

    count_matrix = count.fit_transform(df['bag_of_words'])
    cosine_sim = cosine_similarity(count_matrix, count_matrix)

    indices = pd.Series(df.index)

    # def recommendations(title, cosine_sim = cosine_sim):
    recommended_ngos = []
    idx = indices[indices == title].index[0]
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending=False)
    top_10_indexes = list(score_series.iloc[1:4].index)
    for i in top_10_indexes:
        recommended_ngos.append(list(df.index)[i])
    return recommended_ngos
Beispiel #2
0
def rec_events(data, title):
    df = data
    df = df[['Name', 'Purpose']]
    count = CountVectorizer()
    df['bag_of_words'] = ""

    for index, row in df.iterrows():
        purpose = row["Purpose"]
        r = Rake()
        r.extract_keywords_from_text(purpose)
        keywords_dict_scores = r.get_word_degrees()
        keywords = list(keywords_dict_scores.keys())
        keywordString = ""
        for keyword in keywords:
            keywordString = keywordString + " " + keyword
        row['bag_of_words'] = keywordString

    count_matrix = count.fit_transform(df['bag_of_words'])
    cosine_sim = cosine_similarity(count_matrix, count_matrix)

    df.drop(columns=["Purpose"], inplace=True)
    df.set_index("Name", inplace=True)
    indices = pd.Series(df.index)

    recommended_events = []
    idx = indices[indices == title].index[0]
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)
    top_3_indexes = list(score_series.iloc[1:4].index)
    for i in top_3_indexes:
        recommended_events.append(list(df.index)[i])
    return recommended_events
def clean(data):
    for index, row in data.iterrows():
        plot = row['Plot']
        actors = row['Actors']
        director = row['Director']
        genre = row['Genre']
        # instantiating Rake, by default it uses english stopwords from NLTK
        # and discards all puntuation characters as well
        r = Rake()

        for item in [plot,actors,director,genre]:


            # extracting the words by passing the text
            r.extract_keywords_from_text(item)

            # getting the dictionary whith key words as keys and their scores as values
            key_words_dict_scores = r.get_word_degrees()

            # assigning the key words to the new column for the corresponding movie
            row['Key_words'] += ' '.join(list(key_words_dict_scores.keys()))


    # dropping the Plot column
    data.drop(columns = ['Plot'], inplace = True)
    data.drop(columns = ['Actors'], inplace = True)
    data.drop(columns = ['Director'], inplace = True)
    data.drop(columns = ['Genre'], inplace = True)
    data = data.set_index('Title')
Beispiel #4
0
def preprocessing_overview(data):

    plot = data
    rake = Rake()
    rake.extract_keywords_from_text(plot)
    scores = rake.get_word_degrees()
    return (list(scores.keys()))
Beispiel #5
0
def keyword(str):
    # Remove stop words, etc.
    r = Rake()
    r.extract_keywords_from_text(str)
    rake_list = list(r.get_word_degrees().keys())

    # Special case for topic: remove "xd".

    for i in range(len(rake_list)):
        if re.search(r'^xd\d', rake_list[i]):
            rake_list[i] = rake_list[i][3:]
            if len(rake_list[i]) <= 1:
                rake_list[i] = ""

    for word in rake_list:
        if re.search(r'^xd\d', word):
            word = word[3:]
            if len(word) <= 1:
                word = ""

    # Stem the list.
    ps = PorterStemmer()
    stemmed_list = []

    for word in rake_list:
        stemmed_list.append(ps.stem(word))
    stemmed_list = list(filter(None, stemmed_list))

    return list(set(stemmed_list))
def recommend(item_id, num,df_1):
    item_id = str(item_id)
    tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 4), min_df=0, stop_words='english')
    df_1['key_words'] = ''
    r = Rake() 
    for index, row in df_1.iterrows():
        r.extract_keywords_from_text(row['description'])
        key_words_dict_scores = r.get_word_degrees()
        row['key_words'] = list(key_words_dict_scores.keys())
    columns = ['key_words'] 
    for index, row in df_1.iterrows():
        words = ''
        for col in columns:
            words += ' '.join(row[col]) + ' '
        row['description'] = words
    tfidf_matrix = tf.fit_transform(df_1['description'])
    cosine_similarities = cosine_similarity(tfidf_matrix, tfidf_matrix)
    results = {}
    for index, row in df_1.iterrows():        
        similar_index = cosine_similarities[index].argsort()
        similar_items = [(cosine_similarities[index][i], df_1['item_id'][i]) for i in similar_index]
        results[row['item_id']] = similar_items[1:]
#     print(item_id)
    recs = results[item_id][:num]
    rec_list = []
    for rec in recs:
        rec_list.append(rec[1]) 
    return rec_list
def extract_keywords(text):
    stop_words_file = open(STOP_WORDS, 'r')
    contents = list(stop_words_file.read().split('\n'))
    rake_object = Rake(stopwords=contents)
    rake_object.extract_keywords_from_text(text)
    key_words_scores = rake_object.get_word_degrees()
    return key_words_scores
    def token_proc_thread(self):
        tokens = []

        # print("YO")
        tokens.extend([
            x for x in nltk.word_tokenize(self.docs.lower())
            if re.match("[a-zA-Z]{2,}", x)
        ])
        for word in tokens[:]:
            try:
                if not (word.lower() in self.dict_of_words.keys()):
                    tokens.remove(word)
            except:
                pass
        tokens = ' '.join(map(str, tokens))

        r = Rake(stopwords=self.stop_words)
        r.extract_keywords_from_text(tokens)
        # word_degrees = []
        word_degrees = sorted(r.get_word_degrees().items(),
                              key=lambda item: item[1],
                              reverse=True)
        print(word_degrees)
        for word_tuple in word_degrees[:]:
            if len(word_tuple[0]) <= 4:
                word_degrees.remove(word_tuple)

        return word_degrees
    def main(self, df):
        self.df = df
        for index, row in df.iterrows():
            row['Director'] = ''.join(row['Director']).lower()
            row['Cast'] = ''.join(row['Cast']).lower()
            row['Writers'] = ''.join(row['Writers']).lower()

        df['Key_words'] = ""
        for index, row in df.iterrows():
            summary = row['Short Summary']
            r = Rake()
            r.extract_keywords_from_text(summary)
            key_words_dict_scores = r.get_word_degrees()
            row['Key_words'] = list(key_words_dict_scores.keys())
        df.drop(columns=['Short Summary'], inplace=True)
        df.set_index('Title', inplace=True)
        df['bag_of_words'] = df[[
            'Genres', 'Director', 'Writers', 'Cast', 'Key_words'
        ]].agg(' '.join, axis=1)
        df.drop(columns=[col for col in df.columns if col != 'bag_of_words'],
                inplace=True)
        count = CountVectorizer()
        count_matrix = count.fit_transform(df['bag_of_words'])

        return pd.Series(df.index), cosine_similarity(count_matrix,
                                                      count_matrix)
Beispiel #10
0
def extract_keyword(text):
    sentences = splitSentence(text)
    result = []
    for sentence in sentences:
        r = Rake()
        r.extract_keywords_from_text(sentence)
        result += list(r.get_word_degrees().keys())
    return result
def get_RAKE_keywords(sample, stoplist, T_RATIO=0.33):

    rake = Rake(stopwords=stoplist)
    rake.extract_keywords_from_text(sample.abstract)

    t = int(len(rake.get_word_degrees()) * T_RATIO)

    return rake.get_ranked_phrases()[:t]
Beispiel #12
0
def recomendacionChampion(request):
    formulario = ChampionBusquedaForm()
    campeones = None
    dat = []
    champDat = []
    datos = {}
    if request.method == 'POST':
        formulario = ChampionBusquedaForm(request.POST)
        if formulario.is_valid():
            campeones = Champion.objects.all()
            for champ in campeones:
                tiers = Tier.objects.filter(idChampion=champ.idChampion)
                for tie in tiers:
                    position = Position.objects.get(name=tie.idPosition)
                    actual = 'l'+str(tie.level) + ' ' + \
                        position.name + ' ' + 'w'+str(tie.winrate)
                    name = champ.name
                    if name in champDat:
                        anterior = datos.get(name)
                        upgrade = actual + ' ' + anterior
                        datos.update({name: upgrade})
                        dat.append(actual)
                    else:
                        datos.update({name: actual})
                        champDat.append(name)
            values = datos.values()
            values = list(values)
            d = {'Nombre': champDat, 'Valores': values}
            df = pd.DataFrame(data=d, index=champDat)
            df = df[['Nombre', 'Valores']]
            df.head()
            df['Key_words'] = ''
            for index, row in df.iterrows():
                valor = row['Valores']
                r = Rake()
                r.extract_keywords_from_text(valor)
                key_words_dict_scores = r.get_word_degrees()
                row['Key_words'] = str(list(key_words_dict_scores.keys()))
            df.drop(columns=['Valores'], inplace=True)
            count = CountVectorizer()
            count_matrix = count.fit_transform(df['Key_words'])
            cosine_sim = cosine_similarity(count_matrix, count_matrix)
            indices = pd.Series(df.index)
            recommended_champs = []
            champion_name = formulario.cleaned_data['champion_name']
            idx = indices[indices == champion_name].index[0]
            score_series = pd.Series(
                cosine_sim[idx]).sort_values(ascending=False)
            top_10_indexes = list(score_series.iloc[1:11].index)
            for i in top_10_indexes:
                recommended_champs.append(list(df.index)[i])
            campeones = []
            for name_c in recommended_champs:
                campeones.append(Champion.objects.get(name=name_c))
    return render(request, 'campeones_recomendados.html', {
        'campeones': campeones,
        'STATIC_URL': settings.STATIC_URL
    })
Beispiel #13
0
def extract_keywords(df_work):
    # Using RAKE(nlp lib) to extracct keywords from description and put them in new column
    df_work['Key_words'] = ""
    for index, row in df_work.iterrows():
        descrip = row['description']
        r = Rake()
        r.extract_keywords_from_text(descrip)
        key_words_dict_scores = r.get_word_degrees()
        row['Key_words'] = list(key_words_dict_scores.keys())
Beispiel #14
0
def recomendarPeliculas(id_pelicula):
    pelicula = get_object_or_404(Pelicula, pk=id_pelicula)
    peliculas = Pelicula.objects.all()

    #Se crean los valores de la columna de titulos
    movies_titles = []
    for movie in peliculas:
        if str(movie.titulo) != str(pelicula.titulo):
            movies_titles.append(movie.titulo)
    movies_titles.append(pelicula.titulo)

    #Se crean los valores de la columna de valores
    values = []
    for movie in peliculas:
        movie_values = []
        generos = ''
        if str(movie.titulo) != str(pelicula.titulo):
            for genero in movie.generos.all():
                generos += ', ' + genero.nombre
            movie_values.append(generos[2:])
            values.append(movie_values)
    movie_values = []
    generos = ''
    for genero in pelicula.generos.all():
        generos += ', ' + genero.nombre
    movie_values.append(generos[2:])
    values.append(movie_values)

    #Se crea la tabla
    d = {'titulo': movies_titles, 'valores': values}
    df = pd.DataFrame(data=d, index=movies_titles)
    df = df[['titulo', 'valores']]
    df.head()
    df['key_words'] = ''
    for index, row in df.iterrows():
        valor = row['valores']
        r = Rake()
        r.extract_keywords_from_text(valor[0])
        key_words_dict_scores = r.get_word_degrees()
        row['key_words'] = str(list(key_words_dict_scores.keys()))
    df.drop(columns=['valores'], inplace=True)
    count = CountVectorizer()
    count_matrix = count.fit_transform(df['key_words'])
    cosine_sim = cosine_similarity(count_matrix, count_matrix)
    recommended_movies = []
    indices = pd.Series(df.index)
    idx = indices[indices == pelicula.titulo].index[0]
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending=False)
    top_3_indexes = list(score_series.iloc[1:4].index)
    for i in top_3_indexes:
        recommended_movies.append(list(df.index)[i])
    if str(pelicula.titulo) in recommended_movies:
        recommended_movies.remove(str(pelicula.titulo))
        recommended_movies.append(
            list(df.index)[score_series.iloc[4:5].index[0]])

    return recommended_movies
Beispiel #15
0
def shorten_title(title, max_title_len, alpha_only=True):
    """
    Shortens a title using important phrases and keywords in the title.

    Args:
        title (str): Title to shorten.
        max_title_len (int): Maximum length of the final title.
        alpha_only (bool): Whether to only use alphabetic characters.

    Returns:
        str: Shortened, all lower-case title with a length less than `max_title_len`.
    """
    title = title.lower()
    if len(title) <= max_title_len:
        # Title is already short enough.
        return title

    blob = TextBlob(title)
    if len(blob.sentences) == 1:
        first_blob = blob
    else:
        first_blob = blob.sentences[0]
    tags_kept = []
    for word, tag in first_blob.tags:
        if tag != "DT":
            tags_kept.append((word, tag))
    new_title = _join_tags(tags_kept)
    if len(new_title) <= max_title_len:
        return new_title

    if alpha_only:
        filter = re.compile("[^a-z ]")
        title = filter.sub("", title)

    # Try using the highest ranked phrase from the title.
    r = Rake()
    r.extract_keywords_from_text(title)
    new_title = r.get_ranked_phrases()[0]
    if len(new_title) <= max_title_len:
        return new_title

    # Title is still too long. Use as many of the important words as will fit within the max
    # title length.
    words = sorted(r.get_word_degrees())
    new_title = words[0]
    if len(new_title) > max_title_len:
        # Cut the single-word title short.
        return new_title[:max_title_len]

    for w in words[1:]:
        append_title = "{} {}".format(new_title, w)
        if len(append_title) > max_title_len:
            break
        new_title = append_title

    return new_title
Beispiel #16
0
def extract_keywords(text):
    r = Rake()
    r.extract_keywords_from_text(text)
    keywords_dict_scores = r.get_word_degrees()
    keywords = list(keywords_dict_scores.keys())
    keywordString = ""
    for keyword in keywords:
        keywordString = keywordString + " " + keyword
    keywordString = keywordString.lstrip()
    return keywordString
Beispiel #17
0
    def rakeResult(self):
        text_doc = self.getDoc()
        r = Rake()
        r.extract_keywords_from_text(text_doc)
        ranked_w_score = r.get_ranked_phrases_with_scores()
        ranked = r.get_ranked_phrases()
        word_degree = r.get_word_degrees()
        word_freq_dist = r.get_word_frequency_distribution()

        return ranked_w_score, ranked, word_degree, word_freq_dist
Beispiel #18
0
    def clean_Data(self):
        for i in range (len(self.plot)):
            r = Rake() # Uses stopwords for english from NLTK, and all puntuation characters.
            r.extract_keywords_from_text(self.plot[i])

            # getting the dictionary whith key words as keys and their scores as values
            key_words_dict = r.get_word_degrees()

            # assigning the key words to the new column for the corresponding movie
            self.plot[i]=" ".join(list(key_words_dict.keys()))
def main(query=None, serving=True):

    df_main = pd.read_csv('./Program list.csv')
    df = df_main.copy()

    # initializing the new column
    df['Key_words'] = ""

    for index, row in df.iterrows():
        plot = str(row['Program Info'])

        # instantiating Rake, by default it uses english stopwords from NLTK
        # and discards all puntuation characters as well
        r = Rake()

        # extracting the words by passing the text
        r.extract_keywords_from_text(plot)

        # getting the dictionary whith key words as keys and their scores as values
        key_words_dict_scores = r.get_word_degrees()

        # assigning the key words to the new column for the corresponding movie
        row['Key_words'] = concatenate_list(list(key_words_dict_scores.keys()))

    # dropping the Plot column
    df.drop(columns=['Program Info'], inplace=True)
    df.drop(columns=['Timing'], inplace=True)

    df['desc'] = df[['Dept/Domain',
                     'Key_words']].apply(lambda x: ''.join(x.map(str)).lower(),
                                         axis=1)
    df['Program Name'] = df['Program Name'].apply(lambda x: str(x).lower())

    df3 = df.copy().drop(['Dept/Domain', 'Key_words'], axis=1)
    df3.set_index(['Program Name'], inplace=True)

    # instantiating and generating the count matrix
    count = CountVectorizer()
    count_matrix = count.fit_transform(df3['desc'])

    # generating the cosine similarity matrix
    cosine_sim = cosine_similarity(count_matrix, count_matrix)

    # creating a Series for the movie titles so they are associated to an ordered numerical
    # list I will use in the function to match the indexes
    indices = pd.Series(df3.index)
    # print(indices)
    descs = pd.Series(df3['desc'])

    if serving:
        pass
    else:
        query = input("Enter search keyword: \n")

    print(get_recommendation(query, indices, cosine_sim))
Beispiel #20
0
def similar_items(item_id):
    movie = pd.read_csv(movies_utils.data_path)
    # tvshow = pd.read_csv(shows_utils.data_path)

    movie['item_data'] = movie['movie_title'] + ' ' + movie['movie_plot']
    movie['item_id'] = movie['movie_id']
    movie = movie.drop([
        'movie_id', 'movie_title', 'movie_genre', 'actors', 'movie_plot',
        'imdb_rating', 'movie_link', 'director'
    ],
                       axis=1)
    # tvshow['item_data'] = tvshow['show_name'] + ' ' + tvshow['show_plot']
    # tvshow['item_id'] = tvshow['show_id']
    # tvshow = tvshow.drop(['show_id', 'show_name', 'show_genre', 'show_plot', 'show_rating', 'show_link'], axis=1)

    data = movie

    data['key_words'] = ""

    for index, row in data.iterrows():
        item_data = row['item_data']
        r = Rake()
        r.extract_keywords_from_text(item_data)
        key_words_dict_scores = r.get_word_degrees()
        row['key_words'] = list(key_words_dict_scores.keys())
    data.drop(columns=['item_data'], inplace=True)

    data.set_index('item_id', inplace=True)

    data['bag_of_words'] = ''
    columns = data.columns
    for index, row in data.iterrows():
        words = ''
        for col in columns:
            words = words + ' '.join(row[col]) + ' '
        data.at[index, 'bag_of_words'] = words

    data.drop(columns=[col for col in data.columns if col != 'bag_of_words'],
              inplace=True)

    count = TfidfVectorizer()
    count_matrix = count.fit_transform(data['bag_of_words'])
    indices = pd.Series(data.index)
    cosine_sim = cosine_similarity(count_matrix, count_matrix)

    idx = indices[indices == item_id].index[0]

    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending=False)

    top_30_indexes = list(score_series.iloc[1:51].index)

    ans = []
    for i in top_30_indexes:
        ans.append(data.iloc[i].name)
    return ans
def calculate_rake_ranking(just_words):

    # Initializes the Rake object
    r = Rake()

    # Meant to contain each word in a string
    words_string = ''

    # Extracts only the word itself as a string
    for word_array in just_words:
        words_string += word_array[0] + " "

    # The Rake object ranks all the words in the string
    r.extract_keywords_from_text(words_string)

    # The return type of both functions called below is Dictionary (key -> value)
    frequency_distribution = r.get_word_frequency_distribution(
    )  # word -> frequency (number of times it occurs)
    word_degrees = r.get_word_degrees(
    )  # word -> degree (linguistic co-occurrence)

    # Meant to contain RAKE ranking which aren't scaled yet
    rake_not_scaled = []

    # Appends the ranking to each word's array
    for word_array in just_words:

        word_frequency = 1
        word_degree = 1

        # Linear search to match a word to its frequency
        for word, value in frequency_distribution.items():

            if word_array[0] == word:
                word_frequency = value

        # Linear search to match a word to its degree
        for word, value in word_degrees.items():

            if word_array[0] == word:
                word_degree = value

        # Formula in accordance with the chosen metric
        ranking = word_degree / word_frequency

        rake_not_scaled.append(ranking)

    # Scales the values of the RAKE rankings to [0, 2]
    scaler = MinMaxScaler(feature_range=(0, 2))
    rake_scaled = scaler.fit_transform(
        np.asarray(rake_not_scaled).reshape(-1, 1))
    rake_scaled = [float(ranking) for ranking in rake_scaled]

    return rake_scaled
Beispiel #22
0
    def test_build_word_co_occurance_graph(self):
        r = Rake()

        phrase_list = [['red', 'apples'], ['good'], ['red'], ['flavour']]
        degree = defaultdict(lambda: 0)
        degree['apples'] = 2
        degree['good'] = 1
        degree['flavour'] = 1
        degree['red'] = 3
        r._build_word_co_occurance_graph(phrase_list)
        self.assertEqual(r.get_word_degrees(), degree)
Beispiel #23
0
def extract_keywords(df, feature):
    r = Rake()
    keyword_lists = []
    for i in range(1000):
        descr = df[feature][i]
        r.extract_keywords_from_text(descr)
        key_words_dict_scores = r.get_word_degrees()
        keywords_string = " ".join(list(key_words_dict_scores.keys()))
        keyword_lists.append(keywords_string)

    return keyword_lists
Beispiel #24
0
    def test_build_word_co_occurance_graph(self):
        r = Rake()

        phrase_list = [["red", "apples"], ["good"], ["red"], ["flavour"]]
        degree = defaultdict(lambda: 0)
        degree["apples"] = 2
        degree["good"] = 1
        degree["flavour"] = 1
        degree["red"] = 3
        r._build_word_co_occurance_graph(phrase_list)
        self.assertEqual(r.get_word_degrees(), degree)
def do_keyword_extraction(words):
    if debug: print("---\n", words)
        
    rake_all = Rake()
    rake_all.extract_keywords_from_sentences(_t["context"].value_counts().index.values)

    word_degrees = dict(rake_all.get_word_degrees())
    
    r = Rake()
    r.extract_keywords_from_text(words)

    keywords = dict(r.get_word_degrees())
    
    if debug: print(keywords)
        
    for k, v in keywords.items():
        keywords[k] = word_degrees[k]
    
    if debug: print(keywords)

    return Counter(keywords).most_common(1)[0]
Beispiel #26
0
def recomendacionPlayer(request):
    formulario = PlayerBusquedaForm()
    jugadores = None
    dat = []
    playerDat = []
    datos = {}
    if request.method == 'POST':
        formulario = PlayerBusquedaForm(request.POST)
        if formulario.is_valid():
            jugadores = Player.objects.all()
            for player in jugadores:
                idsChampions = player.idsChampion.all()
                name_p = player.name
                for champ in idsChampions:
                    name_player = player.name
                    actual = []
                    actual.append(champ.name)
                datos.update(
                    {name_player: (str(actual) + ' w' + str(player.winrate))})
                playerDat.append(name_p)
            values = datos.values()
            values = list(values)
            d = {'Nombre': playerDat, 'Valores': values}
            df = pd.DataFrame(data=d, index=playerDat)
            df = df[['Nombre', 'Valores']]
            df.head()
            df['Key_words'] = ''
            for index, row in df.iterrows():
                valor = row['Valores']
                r = Rake()
                r.extract_keywords_from_text(valor)
                key_words_dict_scores = r.get_word_degrees()
                row['Key_words'] = str(list(key_words_dict_scores.keys()))
            df.drop(columns=['Valores'], inplace=True)
            count = CountVectorizer()
            count_matrix = count.fit_transform(df['Key_words'])
            cosine_sim = cosine_similarity(count_matrix, count_matrix)
            indices = pd.Series(df.index)
            recommended_player = []
            player_name = formulario.cleaned_data['player_name']
            idx = indices[indices == player_name].index[0]
            score_series = pd.Series(
                cosine_sim[idx]).sort_values(ascending=False)
            top_10_indexes = list(score_series.iloc[1:11].index)
            for i in top_10_indexes:
                recommended_player.append(list(df.index)[i])
            jugadores = []
            for name_c in recommended_player:
                jugadores.append(Player.objects.get(name=name_c))
    return render(request, 'jugadores_recomendados.html', {
        'jugadores': jugadores,
        'STATIC_URL': settings.STATIC_URL
    })
def key_words(content_id):
    content = imdb.get_movie(content_id)
    plot = content['plot'][0]
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
    for i in plot:
        if i in punctuations:
            plot = plot.replace(i, "")
    r = Rake()
    r.extract_keywords_from_text(plot)
    key_words_dict_scores = r.get_word_degrees()
    keyword = list(key_words_dict_scores.keys())
    keyword = " ".join(str(i) for i in keyword)
    return keyword
Beispiel #28
0
def getKeywords(df, column_name, new_column_name):
    # add a new empty column to df
    df[new_column_name] = ""
    for index, row in df.iterrows():
        column_data = row[column_name]
        #word rake, automatically uses stop words and gets rid of punctuation!!! :D
        r = Rake()
        # extract key words
        r.extract_keywords_from_text(column_data)
        # get the dictionary with key words as keys and their scores as values (maybe use later? idk)
        keywords_scores = r.get_word_degrees()
        # assigning the key words to the new column
        row[new_column_name] = list(keywords_scores.keys())
def extract_bag_of_words(row):
    bag_of_words = row[1].lower() + ' ' + row[2].lower() + ' ' + row[3].lower()
    #keyword extraction from overview using Rake
    r = Rake()
    r.extract_keywords_from_text(row[4].lower())

    key_words_dict_scores = r.get_word_degrees()

    overview_keywords = ' '.join(list(key_words_dict_scores.keys()))

    bag_of_words = bag_of_words + ' ' + overview_keywords

    return bag_of_words
def make_keywords(description):
    '''Makes keywords of description using "Rake" '''
    # instantiating Rake, by default is uses english stopwords from NLTK
    # and discard all puntuation characters
    r = Rake()

    # extracting the words by passing the text
    r.extract_keywords_from_text(description)

    # getting the dictionary with key words and their scores
    key_words_dict_scores = r.get_word_degrees()

    # return the key words
    return list(key_words_dict_scores.keys())