-
Notifications
You must be signed in to change notification settings - Fork 0
/
flaskNode.py
99 lines (72 loc) · 2.89 KB
/
flaskNode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from flask import Flask, request, jsonify
import pickle
import json
app=Flask(__name__)
@app.route("/")
def index():
return "Flask Server"
@app.route("/postdata",methods=['POST'])
def postdata():
data=request.get_json()
print(data)
import pandas as pd
from rake_nltk import Rake
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
pd.set_option('display.max_columns', 100)
df = pd.read_csv('https://query.data.world/s/uikepcpffyo2nhig52xxeevdialfl7')
df.head()
df = df[['Title','Genre','Director','Actors','Plot']]
df.head()
#df['Actors'] = df['Actors'].map(lambda x: x.split(',')[:3])
#df['Genre'] = df['Genre'].map(lambda x: x.lower().split(','))
#df['Director'] = df['Director'].map(lambda x: x.split(' '))
#for index, row in df.iterrows():
#row['Actors'] = [x.lower().replace(' ','') for x in row['Actors']]
#row['Director'] = ''.join(row['Director']).lower()
df['Key_words'] = ""
for index, row in df.iterrows():
plot = row['Plot']
r = Rake()
r.extract_keywords_from_text(plot)
key_words_dict_scores = r.get_word_degrees()
row['Key_words'] = list(key_words_dict_scores.keys())
df.drop(columns = ['Plot'], inplace = True)
df.set_index('Title', inplace = True)
df.head()
df['bag_of_words'] = ''
columns = df.columns
for index, row in df.iterrows():
words = ''
for col in columns:
if col != 'Director':
words = words + ' '.join(row[col])+ ' '
else:
words = words + row[col]+ ' '
row['bag_of_words'] = words
df.drop(columns = [col for col in df.columns if col!= 'bag_of_words'], inplace = True)
df.head()
count = CountVectorizer()
count_matrix = count.fit_transform(df['bag_of_words'])
indices = pd.Series(df.index)
indices[:5]
cosine_sim = cosine_similarity(count_matrix, count_matrix)
cosine_sim
recommended_movies = []
def recommendations(title, cosine_sim = cosine_sim):
print("You are in the recommendations section")
idx = indices[indices == title].index[0]
score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)
top_10_indexes = list(score_series.iloc[1:11].index)
for i in top_10_indexes:
recommended_movies.append(list(df.index)[i])
return recommended_movies
for key,value in data.items():
recommendations(value)
return json.dumps(recommended_movies)
if __name__=="__main__":
app.run(host='127.0.0.1',port=5000)