-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
80 lines (69 loc) · 2.6 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from bs4 import BeautifulSoup
from movie import Movie
import admin
import requests
import os
import display
def main():
display.bienvenida()
# First check if db exists. If it doesn't then create it
if not admin.checkDb():
admin.createDb()
check = u'\u2713'
print 'DB: \033[92m' + check + '\033[0m'
admin.createMovieTable()
print 'TABLE pelicula: \033[92m' + check + '\033[0m'
admin.createMovieLinkTable()
print 'TABLE linkpelicula: \033[92m' + check + '\033[0m'
else:
check = u'\u2713'
print 'DB: \033[92m' + check + '\033[0m'
i = 1
while True:
r = requests.get('https://miradetodo.co/page/'+str(i))
soup = BeautifulSoup(r.content , 'html.parser')
# Container of all movies
contenedorDiv = soup.find('div' , {'class': 'items'})
itemDivs = contenedorDiv.find_all('div' , {'class': 'item'})
peliculas = []
for itemDiv in itemDivs:
infoDiv = itemDiv.find('div' , {'class': 'fixyear'})
name = infoDiv.find('h2').string[0:-7]
year = infoDiv.find('span').string
playerLink = itemDiv.a.get('href')
boxDiv = itemDiv.find('div' , {'class': 'boxinfo'})
puntuacion = boxDiv.find('div' , {'class': 'cocs imdb_r'}).find('span').text
cantVotos = boxDiv.find('div' , {'class': 'cocs imdb_r'}).find('div' , {'class': 'b'}).find_all('b')[1].text[0:-6]
# Por ahora ignoro las series
if playerLink.find('/series/') == -1:
links = movieLinks(playerLink)
pelicula = Movie({
'name': name,
'year': year,
'original-link': playerLink,
'links': links,
'score': puntuacion,
'totalVotes': cantVotos
})
pelicula.save()
progreso = 'Se guardo ' + pelicula.name
display.progress(progreso)
print "\n"
i = i + 1
def movieLinks(playerLink):
r = requests.get(playerLink)
soup = BeautifulSoup(r.content , 'html.parser')
player = soup.find('div' , {'id': 'div2'})
try:
sourcesLink = player.iframe.get('data-lazy-src')
r = requests.get(sourcesLink)
soup = BeautifulSoup(r.content , 'html.parser')
linksDOM = soup.find_all('a')
links = []
for link in linksDOM:
links.append(link.get('href'))
return links
except:
print 'No se pudo obtener los links de: '+playerLink
return []
main()