def handle(self, *args, **options): # numero_videos = int(args[0]) if args else 999 #Inicio url_inicial = 'http://www.eporner.com/category/hd1080p/' pagina_tube = PaginaTube.objects.get( nombre='eporner.com' ) for i in range(0, 3): # Peticion y sopa if i == 0: peticion = requests.get( url_inicial ) else: peticion = requests.get( url_inicial + '%s/' % str(i)) sopa = BeautifulSoup( peticion.content, 'html.parser' ) # Sacamos urls a videos videos_pagina = [] for video in sopa.find_all('div',{'class':'mbhd'}): # comprovamos si el video existe url_video = 'http://www.eporner.com' + video.find('a').get('href') try: video = Video.objects.get( url_video = url_video ) continue except Video.DoesNotExist: pass videos_pagina.append( url_video ) # Sacamos datos del video for url_video in videos_pagina: # Peticion y sopa peticion = requests.get( url_video ) soup = BeautifulSoup( peticion.content, 'html.parser' ) titulo = soup.find('h1').getText() publicado = datetime.datetime.now() # Info de la tabla ( cast y tags ) cast = [] tags = [] tabla = soup.find('td',{'id':'hd-p**n-tags'}) for tr in tabla.find_all('tr'): # CAST if tr.strong.string == 'Pornstars:': for link in tr.find_all('a'): if 'pornstar' in link.get('href'): cast.append( link.string ) # TAGS if tr.strong.string == 'Tags:': tags = [ t.string for t in tr.find_all('a') ] # cod iframe codigo_iframe = soup.find('div',{'class':'textare1'}) codigo_iframe = BeautifulSoup(codigo_iframe.textarea.string, 'html.parser') codigo_iframe = codigo_iframe.iframe.get('src') # thumbnail url_thumbnail = soup.find_all('div',{'class':'cutscenesbox'})[5] url_thumbnail = url_thumbnail.a.get('href') # Descargamos el thumbnail request_img = requests.get(url_thumbnail, stream = True ) thumbnail = None if request_img.status_code == 200: url_imagen = 'static/imagenes/eporner/%s.jpg' % trim(titulo) with open( url_imagen , 'wb') as f: for chunk in request_img: f.write(chunk) thumbnail = url_imagen # Sacamos previews previews = [] for n in range(0,12): url_prev = soup.find_all('div',{'class':'cutscenesbox'})[n] url_prev = url_prev.a.get('href') prev_n = requests.get( url_prev, stream = True ) if prev_n.status_code == 200: url_prev_n = 'static/imagenes/eporner/%s-thumb-%s.jpg' % ( trim(titulo), str(n) ) with open( url_prev_n, 'wb') as f: for chunk in prev_n: f.write(chunk) previews.append(url_prev_n) # Guardamos el objeto subir_video( previews, cast, [], pagina_tube, tags, titulo, thumbnail, publicado, url_video, codigo_iframe, )
def handle(self, *args, **options): # Variables url_base = 'http://www.porndoe.com' cookies = dict(__language="en") pagina_tube = PaginaTube.objects.get(nombre='porndoe.com') ''' Loop this llegamos a los 1000 en el loop numero 38 ''' for i in range(0, 100): # Info util print 'Pagina %s de 100 ...' % str(i) # Peticion y sopa if i == 0: peticion = requests.get( url_base, cookies=cookies ) else: peticion = requests.get( url_base+'/?page=%s' % str(i+1), cookies=cookies ) soup = BeautifulSoup( peticion.content, 'html.parser' ) # Sacamos todos los vids de la pagina lista_videos = {} for video in soup.find_all('article',{'class':'video-item'}): # Miramos si el video es HD if video.find('span',{'class':'ico-hd'}): link = url_base + video.a.get('href') # Miramos si el video existe try: v = Video.objects.get( url_video = link ) continue except Video.DoesNotExist: pass thumbnail = video.img.get('src') lista_videos[link] = thumbnail # Recorremos video a video for url_video, url_thumbnail in lista_videos.iteritems(): # Peticion y sopa peticion = requests.get( url_video, cookies=cookies ) soup = BeautifulSoup( peticion.content, 'html.parser' ) # Definimos variables titulo = soup.h1.text publicado = datetime.datetime.now() # Codigo_iframe c = soup.find('div', {'id':'my-embed'}).input.get('value') codigo_iframe = BeautifulSoup(c,'html.parser').iframe.get('src') # pagina_pago pagina_pago = [ soup.find('div',{'class':'channel-about'}).a.get('title') ] # Casting casting = [ s.text for s in soup.find_all('span',{'class':'performer-name'}) ] try: casting.remove('Suggest performer') except: pass # Tags tags = [] for p in soup.find_all('p',{'class','data-row'}): if 'Tags:' in p.getText(): tags = [ a.get('title') for a in p.find_all('a') ] # descargamos el thumbnail thumbnail = None peticion_img = requests.get(url_thumbnail, stream=True) if peticion_img.status_code == 200: path_imagen = 'static/imagenes/porndoe/%s.jpg' % trim(url_thumbnail) thumbnail = path_imagen with open( path_imagen , 'wb') as f: for chunk in peticion_img: f.write(chunk) # Guardamos el objeto subir_video( casting, pagina_pago, pagina_tube, tags, titulo, thumbnail, publicado, url_video, codigo_iframe ) print 'FIN sandbox_porndoe_1'
def handle(self, *args, **options): url = 'http://www.porndig.com/posts/load_more_posts' pagina_tube = PaginaTube.objects.get(nombre='porndig.com') # Loop de las peticiones ''' sin llegar a 50 peticiones ya tenemos 1000 videos ''' for i in range(0, 1): # DATA payload = { 'main_category_id': 1, 'type': 'post', 'name': 'category_videos', 'filters': { 'filter_type': 'date', 'filter_period': '' }, 'category_id': { '': 882 }, 'offset': i * 100 if i != 0 else 0 } # Peticion y sopa peticion = requests.post(url, data=payload) respuesta = peticion.json()['data']['content'] soup = BeautifulSoup(respuesta, 'html.parser') # Guardamos todos los videos HD lista_videos = {} for elemento in soup.find_all('div', {'class': 'video_item_wrapper'}): if 'icon-video_full_hd' in str(elemento): link = 'http://www.porndig.com' + elemento.a.get('href') #miramos si el video existe try: video = Video.objects.get(url_video=link) continue except Video.DoesNotExist: pass thumbnail = elemento.img.get('src').replace( '320x180', '400x225') lista_videos[link] = thumbnail # Recorremos todos los videos HD y los guardamos for url_video, url_thumbnail in lista_videos.iteritems(): # Peticion y sopa peticion = requests.get(url_video) sopa = BeautifulSoup(peticion.content, 'html.parser') # Todos los datos del video titulo = sopa.h1.text casting = [] publicado = sopa.find_all( 'div', {'class': 'video_class_value'})[3].text publicado = dateutil.parser.parse(publicado) # pagpago (si existe) y Tags pagina_pago, tags = [], [] for elemento in sopa.find_all( 'div', {'class': 'video_description_item'}): if 'Studio:' in elemento.getText(): pagina_pago = [elemento.a.text] if 'Categories:' in elemento.getText(): tags = [a.text for a in elemento.find_all('a')] if 'Pornstar(s)' in elemento.getText(): casting = [a.text for a in elemento.find_all('a')] codigo_iframe = sopa.find('div', {'class': 'js_video_embed'}) codigo_iframe = codigo_iframe.textarea.iframe.get('src') # Intentamos sacar la pagina pago del Iframe if not pagina_pago: try: headers = {'referer': 'http://www.porndig.com'} sopa_iframe = requests.get(codigo_iframe, headers=headers) sopa_iframe = BeautifulSoup(sopa_iframe.content, 'html.parser') el = sopa_iframe.find( 'span', {'id': 'producer_overlay_content_top_left_text'}) pagina_pago = [el.a.text] except: pass # # Descargamos el thumbnail request_img = requests.get(url_thumbnail, stream=True) thumbnail = None if request_img.status_code == 200: url_imagen = 'static/imagenes/porndig/%s.jpg' % trim( titulo) with open(url_imagen, 'wb') as f: for chunk in request_img: f.write(chunk) thumbnail = url_imagen # Sacamos preview previews = [] for n in range(0, 21): img = url_thumbnail if '/' in url_thumbnail[-7:]: img = url_thumbnail.replace(url_thumbnail[-7:], '/%s.jpg' % str(n)) else: img = url_thumbnail.replace(url_thumbnail[-8:], '/%s.jpg' % str(n)) # intentamos cojer la imagen. prev_n = requests.get(img, stream=True) if prev_n.status_code == 200: url_prev_n = 'static/imagenes/porndig/%s-thumb-%s.jpg' % ( trim(titulo), str(n)) with open(url_prev_n, 'wb') as f: for chunk in prev_n: f.write(chunk) previews.append(url_prev_n) # Guardamos el video subir_video(previews, casting, pagina_pago, pagina_tube, tags, titulo, thumbnail, publicado, url_video, codigo_iframe)