/
imdb.py
47 lines (38 loc) · 1.47 KB
/
imdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def main():
import json
import requests
from bs4 import BeautifulSoup
import predict
title = input('Search by title: ')
year = input('Year of title (Optional): ')
omdbapi_url = 'http://www.omdbapi.com/?t=' + title + '&y=' + year + '&apikey=1fb69a60'
s = requests.session()
r = s.get(omdbapi_url)
omdbapi_reponse = json.loads(r.text)
reviews = []
if (omdbapi_reponse['Response'] == 'True'):
imdbID = omdbapi_reponse['imdbID']
title = omdbapi_reponse['Title']
year = omdbapi_reponse['Year']
elif (omdbapi_reponse['Response'] == 'False'):
print(omdbapi_reponse['Error'])
input('No movie is found.\nPress Enter to continue...')
return reviews
imdb_url = 'https://www.imdb.com/title/' + imdbID + '/reviews/_ajax?'
processed = 0
while True:
r = s.get(imdb_url)
soup = BeautifulSoup(r.text, 'html.parser')
for div in soup.findAll('div', 'text show-more__control'):
reviews.append(div.text)
processed += 1
print(processed, ' reviews retrieved.', end='\r')
load_more_data = soup.findAll('div', 'load-more-data')
if load_more_data == []:
break
else:
imdb_url = 'http://www.imdb.com/title/' + imdbID + '/reviews/_ajax?paginationKey=' + load_more_data[0]['data-key']
print('\nAll reviews retrieved.')
predict.main(reviews, title=title, year=year)
if __name__ == '__main__':
main()