-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
85 lines (69 loc) · 2.77 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from flask import Flask, render_template
import requests
from bs4 import BeautifulSoup, re
app = Flask(__name__)
MOVEEK_URL = "https://moveek.com/en/"
HBO_URL = "https://www.hbo.com/series"
def get_URL(URL):
"""Get HTML from(URL)
"""
r = requests.get(URL)
soup = BeautifulSoup(r.text, 'html.parser')
return soup
def crawl_moveek(URL):
soup = get_URL(URL)
movies = soup.find_all(href=re.compile("/phim/"))
movies_list = list()
for movie in movies:
_movie = {}
if movie.img:
_movie["title"] = movie["title"]
_movie["link"] = movie["href"]
_movie["img"] = movie.img["data-src"]
movies_list.append(_movie)
return movies_list
def crawl_rating_moveek(URL):
movies_list = crawl_moveek(URL)
for i in range(len(movies_list)):
movie = movies_list[i]
soup = get_URL("https://moveek.com"+movie["link"])
movie["gerne"] = soup.find(class_ = "mb-0 text-muted text-truncate").string.strip().strip("-").strip()
try:
movie["description"] = soup.find(class_ = "mb-3 text-justify").text
except:
if "description" not in movie:
soup=get_URL("https://moveek.com/"+movie["link"].strip("/en"))
movie["description"] = soup.find(class_ = "mb-3 text-justify").text
movie["rating"] = soup.find(href = re.compile("/review/")).text.strip()
if movie["rating"] == "Reviews" or movie["rating"] == "Đánh giá":
movie["rating"] = "No Review"
return movies_list
def crawl_hbo(URL):
soup = get_URL(URL)
movie_list = []
movies = soup.find_all(class_="components/Card--card components/Card--promotional components/Card--withBottomBorder")
for movie in movies:
_movie = {}
_movie["title"] = movie.find(class_="components/CardText--title").string
_movie["link"] = "https://www.hbo.com"+movie["href"]
_movie["img"] = "https://www.hbo.com" + movie.find(class_="components/CardImage--imageContainer").img["src"]
try:
_movie["description"] = movie.find(class_="components/CardText--details").p.string
_movie["show_time"] = movie.find(class_="components/CardText--contextualLabel").string
except:
pass
movie_list.append(_movie)
return movie_list
@app.route('/')
def home():
return render_template('home.html')
@app.route('/movies')
def movies():
data=crawl_rating_moveek(MOVEEK_URL)
return render_template('movies.html', data=data)
@app.route('/series')
def series():
data=crawl_hbo(HBO_URL)
return render_template('series.html', data=data)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)