def handle(self, *args, **options): # Variables url_base = 'http://www.porndoe.com' cookies = dict(__language="en") pagina_tube = PaginaTube.objects.get(nombre='porndoe.com') ''' Loop this llegamos a los 1000 en el loop numero 38 ''' for i in range(0, 100): # Info util print 'Pagina %s de 100 ...' % str(i) # Peticion y sopa if i == 0: peticion = requests.get( url_base, cookies=cookies ) else: peticion = requests.get( url_base+'/?page=%s' % str(i+1), cookies=cookies ) soup = BeautifulSoup( peticion.content, 'html.parser' ) # Sacamos todos los vids de la pagina lista_videos = {} for video in soup.find_all('article',{'class':'video-item'}): # Miramos si el video es HD if video.find('span',{'class':'ico-hd'}): link = url_base + video.a.get('href') # Miramos si el video existe try: v = Video.objects.get( url_video = link ) continue except Video.DoesNotExist: pass thumbnail = video.img.get('src') lista_videos[link] = thumbnail # Recorremos video a video for url_video, url_thumbnail in lista_videos.iteritems(): # Peticion y sopa peticion = requests.get( url_video, cookies=cookies ) soup = BeautifulSoup( peticion.content, 'html.parser' ) # Definimos variables titulo = soup.h1.text publicado = datetime.datetime.now() # Codigo_iframe c = soup.find('div', {'id':'my-embed'}).input.get('value') codigo_iframe = BeautifulSoup(c,'html.parser').iframe.get('src') # pagina_pago pagina_pago = [ soup.find('div',{'class':'channel-about'}).a.get('title') ] # Casting casting = [ s.text for s in soup.find_all('span',{'class':'performer-name'}) ] try: casting.remove('Suggest performer') except: pass # Tags tags = [] for p in soup.find_all('p',{'class','data-row'}): if 'Tags:' in p.getText(): tags = [ a.get('title') for a in p.find_all('a') ] # descargamos el thumbnail thumbnail = None peticion_img = requests.get(url_thumbnail, stream=True) if peticion_img.status_code == 200: path_imagen = 'static/imagenes/porndoe/%s.jpg' % trim(url_thumbnail) thumbnail = path_imagen with open( path_imagen , 'wb') as f: for chunk in peticion_img: f.write(chunk) # Guardamos el objeto subir_video( casting, pagina_pago, pagina_tube, tags, titulo, thumbnail, publicado, url_video, codigo_iframe ) print 'FIN sandbox_porndoe_1'
def handle(self, *args, **options): # numero_videos = int(args[0]) if args else 999 #Inicio url_inicial = 'http://www.eporner.com/category/hd1080p/' pagina_tube = PaginaTube.objects.get( nombre='eporner.com' ) for i in range(0, 3): # Peticion y sopa if i == 0: peticion = requests.get( url_inicial ) else: peticion = requests.get( url_inicial + '%s/' % str(i)) sopa = BeautifulSoup( peticion.content, 'html.parser' ) # Sacamos urls a videos videos_pagina = [] for video in sopa.find_all('div',{'class':'mbhd'}): # comprovamos si el video existe url_video = 'http://www.eporner.com' + video.find('a').get('href') try: video = Video.objects.get( url_video = url_video ) continue except Video.DoesNotExist: pass videos_pagina.append( url_video ) # Sacamos datos del video for url_video in videos_pagina: # Peticion y sopa peticion = requests.get( url_video ) soup = BeautifulSoup( peticion.content, 'html.parser' ) titulo = soup.find('h1').getText() publicado = datetime.datetime.now() # Info de la tabla ( cast y tags ) cast = [] tags = [] tabla = soup.find('td',{'id':'hd-p**n-tags'}) for tr in tabla.find_all('tr'): # CAST if tr.strong.string == 'Pornstars:': for link in tr.find_all('a'): if 'pornstar' in link.get('href'): cast.append( link.string ) # TAGS if tr.strong.string == 'Tags:': tags = [ t.string for t in tr.find_all('a') ] # cod iframe codigo_iframe = soup.find('div',{'class':'textare1'}) codigo_iframe = BeautifulSoup(codigo_iframe.textarea.string, 'html.parser') codigo_iframe = codigo_iframe.iframe.get('src') # thumbnail url_thumbnail = soup.find_all('div',{'class':'cutscenesbox'})[5] url_thumbnail = url_thumbnail.a.get('href') # Descargamos el thumbnail request_img = requests.get(url_thumbnail, stream = True ) thumbnail = None if request_img.status_code == 200: url_imagen = 'static/imagenes/eporner/%s.jpg' % trim(titulo) with open( url_imagen , 'wb') as f: for chunk in request_img: f.write(chunk) thumbnail = url_imagen # Sacamos previews previews = [] for n in range(0,12): url_prev = soup.find_all('div',{'class':'cutscenesbox'})[n] url_prev = url_prev.a.get('href') prev_n = requests.get( url_prev, stream = True ) if prev_n.status_code == 200: url_prev_n = 'static/imagenes/eporner/%s-thumb-%s.jpg' % ( trim(titulo), str(n) ) with open( url_prev_n, 'wb') as f: for chunk in prev_n: f.write(chunk) previews.append(url_prev_n) # Guardamos el objeto subir_video( previews, cast, [], pagina_tube, tags, titulo, thumbnail, publicado, url_video, codigo_iframe, )
codigo_iframe = codigo_iframe.iframe.get('src') # thumbnail url_thumbnail = soup.find_all('div', {'class': 'cutscenesbox'})[5] url_thumbnail = url_thumbnail.a.get('href') # Descargamos el thumbnail # request_img = requests.get(url_thumbnail, stream = True ) thumbnail = None # if request_img.status_code == 200: # url_imagen = 'static/imagenes/eporner/%s.jpg' % trim(titulo) # with open( url_imagen , 'wb') as f: # for chunk in request_img: # f.write(chunk) # thumbnail = url_imagen # Guardamos el objeto subir_video( cast, [], pagina_tube, tags, titulo, thumbnail, publicado, video_malo, codigo_iframe, ) print 'FIN '
def handle(self, *args, **options): url_paginatube = 'http://www.porntube.com' pagina_tube = PaginaTube.objects.get( nombre = 'porntube.com' ) # loopeamos las 3 primeras paginas for pagina in range(0, 11): # Hacemos request y definimos sopa if pagina == 0: peticion = requests.get( url_paginatube ) else: peticion = requests.get( url_paginatube +'/videos?p=%s' % str(pagina + 1)) soup = BeautifulSoup( peticion.content, 'html.parser' ) # Metemos todos los videos y thumbs de esa pagina # en un array videos = {} elementos_video = soup.find_all('div' , { 'class' : 'thumb_video' }) for video in elementos_video: # Comprovamos si es hd hd = video.find('li', {'class':'topHD'}) if hd == None: continue url_video = video.find('a').get('href') imagen_video = video.find('img').get('data-original') videos[url_video] = imagen_video # Recorremos todos los videos for video, imagen in videos.iteritems(): # A vences ponen videos que no son de porntube if '4tube' in video: continue if 'pornerbros' in video: continue # Miramos si video existe if not video: continue try: Video.objects.get( url_video = video ) continue except Video.DoesNotExist: pass # Peticion y sopa # Ajustar la url, nueva version va directo a link /video # Sin poner delante el http... video = 'http://www.porntube.com%s' % video peticion = requests.get(video) soup = BeautifulSoup( peticion.content, 'html.parser' ) # Cojemos variables iframe = soup.find('textarea', { 'id' : 'textarea-iframe' }).string iframe = BeautifulSoup(iframe, 'html.parser') codigo_iframe = iframe.find('iframe').get('src') pagina_pago =[soup.find('a', {'class':'item-to-subscribe'}).string] titulo = str(soup.title)[7:][:-22] url_video = video casting = [] try: actores = soup.find( 'ul', {'class':'pornlist'} ).find_all('span') for actor in actores: casting.append(actor.string) except: pass pub = soup.find('div', {'class':'upload-date'}).find('li') publicado = dateutil.parser.parse(pub.getText()) tags = [] for li in soup.find('div', {'class':'tags'}).find_all('li'): tags.append(li.string.strip()) # Descargamos el thumbnail try: request_img = requests.get( imagen.replace('240x180','628x472'), stream = True ) except: request_img = requests.get( imagen, stream = True ) thumbnail = None # Sacamos thumbnail principal if request_img.status_code == 200: url_imagen = 'static/imagenes/porntube/%s.jpg' % video[31:] with open( url_imagen , 'wb') as f: for chunk in request_img: f.write(chunk) thumbnail = url_imagen # sacamos PREVIEWS, necesitamos loop de 40 posibles imagenes, sacar 10 previews = [] for n in range(0,40): # Creamos url img = imagen if '/' in imagen[-7:]: img = imagen.replace(imagen[-7:], '/%s.jpeg' % str(n)) else: img = imagen.replace(imagen[-8:], '/%s.jpeg' % str(n)) # intentamos cojer la imagen. prev_n = requests.get( img, stream = True ) if prev_n.status_code == 200: url_prev_n = 'static/imagenes/porntube/%s-thumb-%s.jpg' % ( video[31:], str(n) ) with open( url_prev_n, 'wb') as f: for chunk in prev_n: f.write(chunk) previews.append(url_prev_n) # Subimos video a la base de datos # excepciones chungas if pagina_pago == [None]: continue subir_video( previews, casting, pagina_pago, pagina_tube, tags, titulo, thumbnail, publicado, url_video, codigo_iframe, )
def handle(self, *args, **options): url_paginatube = 'http://www.porntube.com' pagina_tube = PaginaTube.objects.get(nombre='porntube.com') # loopeamos las 300 primeras paginas for pagina in range(0, 150): # Info util print 'Pagina %s de 150 ...' % str(pagina) # Hacemos request y definimos sopa if pagina == 0: peticion = requests.get(url_paginatube) else: peticion = requests.get(url_paginatube + '/videos?p=%s' % str(pagina + 1)) soup = BeautifulSoup(peticion.content, 'html.parser') # Metemos todos los videos y thumbs de esa pagina # en un array videos = {} elementos_video = soup.find_all('div', {'class': 'thumb_video'}) for video in elementos_video: # Comprovamos si es hd hd = video.find('li', {'class': 'topHD'}) if hd == None: continue url_video = video.find('a').get('href') imagen_video = video.find('img').get('data-original') videos[url_video] = imagen_video # Recorremos todos los videos for video, imagen in videos.iteritems(): # A vences ponen videos que no son de porntube if '4tube' in video: continue # Miramos si video existe if not video: continue try: Video.objects.get(url_video=video) continue except Video.DoesNotExist: pass # Peticion y sopa peticion = requests.get(video) soup = BeautifulSoup(peticion.content, 'html.parser') # Cojemos variables iframe = soup.find('textarea', { 'id': 'textarea-iframe' }).string iframe = BeautifulSoup(iframe, 'html.parser') codigo_iframe = iframe.find('iframe').get('src') pagina_pago = [ soup.find('a', { 'class': 'item-to-subscribe' }).string ] titulo = str(soup.title)[7:][:-22] url_video = video casting = [] try: actores = soup.find('ul', { 'class': 'pornlist' }).find_all('span') for actor in actores: casting.append(actor.string) except: pass publicado = soup.find('div', {'id': 'tab1'}).find('ul') for li in publicado.find_all('li'): if 'upload' in str(li): publicado = dateutil.parser.parse(li.getText()) tags = [] for li in soup.find('div', {'class': 'tags'}).find_all('li'): tags.append(li.string.strip()) # Descargamos el thumbnail try: request_img = requests.get(imagen.replace( '240x180', '628x472'), stream=True) except: request_img = requests.get(imagen, stream=True) thumbnail = None if request_img.status_code == 200: url_imagen = 'static/imagenes/porntube/%s.jpg' % video[31:] with open(url_imagen, 'wb') as f: for chunk in request_img: f.write(chunk) thumbnail = url_imagen # Subimos video a la base de datos # excepciones chungas if pagina_pago == [None]: continue subir_video(casting, pagina_pago, pagina_tube, tags, titulo, thumbnail, publicado, url_video, codigo_iframe) # break print 'FIN porntube_sandbox_1'
def handle(self, *args, **options): url = 'http://www.porndig.com/posts/load_more_posts' pagina_tube = PaginaTube.objects.get(nombre='porndig.com') # Loop de las peticiones ''' sin llegar a 50 peticiones ya tenemos 1000 videos ''' for i in range(0, 1): # DATA payload = { 'main_category_id': 1, 'type': 'post', 'name': 'category_videos', 'filters': { 'filter_type': 'date', 'filter_period': '' }, 'category_id': { '': 882 }, 'offset': i * 100 if i != 0 else 0 } # Peticion y sopa peticion = requests.post(url, data=payload) respuesta = peticion.json()['data']['content'] soup = BeautifulSoup(respuesta, 'html.parser') # Guardamos todos los videos HD lista_videos = {} for elemento in soup.find_all('div', {'class': 'video_item_wrapper'}): if 'icon-video_full_hd' in str(elemento): link = 'http://www.porndig.com' + elemento.a.get('href') #miramos si el video existe try: video = Video.objects.get(url_video=link) continue except Video.DoesNotExist: pass thumbnail = elemento.img.get('src').replace( '320x180', '400x225') lista_videos[link] = thumbnail # Recorremos todos los videos HD y los guardamos for url_video, url_thumbnail in lista_videos.iteritems(): # Peticion y sopa peticion = requests.get(url_video) sopa = BeautifulSoup(peticion.content, 'html.parser') # Todos los datos del video titulo = sopa.h1.text casting = [] publicado = sopa.find_all( 'div', {'class': 'video_class_value'})[3].text publicado = dateutil.parser.parse(publicado) # pagpago (si existe) y Tags pagina_pago, tags = [], [] for elemento in sopa.find_all( 'div', {'class': 'video_description_item'}): if 'Studio:' in elemento.getText(): pagina_pago = [elemento.a.text] if 'Categories:' in elemento.getText(): tags = [a.text for a in elemento.find_all('a')] if 'Pornstar(s)' in elemento.getText(): casting = [a.text for a in elemento.find_all('a')] codigo_iframe = sopa.find('div', {'class': 'js_video_embed'}) codigo_iframe = codigo_iframe.textarea.iframe.get('src') # Intentamos sacar la pagina pago del Iframe if not pagina_pago: try: headers = {'referer': 'http://www.porndig.com'} sopa_iframe = requests.get(codigo_iframe, headers=headers) sopa_iframe = BeautifulSoup(sopa_iframe.content, 'html.parser') el = sopa_iframe.find( 'span', {'id': 'producer_overlay_content_top_left_text'}) pagina_pago = [el.a.text] except: pass # # Descargamos el thumbnail request_img = requests.get(url_thumbnail, stream=True) thumbnail = None if request_img.status_code == 200: url_imagen = 'static/imagenes/porndig/%s.jpg' % trim( titulo) with open(url_imagen, 'wb') as f: for chunk in request_img: f.write(chunk) thumbnail = url_imagen # Sacamos preview previews = [] for n in range(0, 21): img = url_thumbnail if '/' in url_thumbnail[-7:]: img = url_thumbnail.replace(url_thumbnail[-7:], '/%s.jpg' % str(n)) else: img = url_thumbnail.replace(url_thumbnail[-8:], '/%s.jpg' % str(n)) # intentamos cojer la imagen. prev_n = requests.get(img, stream=True) if prev_n.status_code == 200: url_prev_n = 'static/imagenes/porndig/%s-thumb-%s.jpg' % ( trim(titulo), str(n)) with open(url_prev_n, 'wb') as f: for chunk in prev_n: f.write(chunk) previews.append(url_prev_n) # Guardamos el video subir_video(previews, casting, pagina_pago, pagina_tube, tags, titulo, thumbnail, publicado, url_video, codigo_iframe)
def handle(self, *args, **options): # Variables necesarias pagina_tube = PaginaTube.objects.get(nombre='beeg.com') url_paginatube = 'http://www.beeg.com' paginas_video = [] # Peticion y creacion de sopa peticion = requests.get(url_paginatube) soup = BeautifulSoup(peticion.content, 'html.parser') # Sacamos las ids de videos del javascript de la pag # y las guardamos for script in soup.find_all('script'): try: regex = re.search('\[(\d{7},?)+\]', script.string) paginas_video += json.loads(regex.group()) except: continue # Recorremos todas las paginas para extraer la info for pagina in paginas_video: # Miramos si video existe try: Video.objects.get(url_video=url_paginatube + '/' + str(pagina)) continue except Video.DoesNotExist: pass # Peticion y sopa peticion = requests.get(url_paginatube + '/' + str(pagina)) soup = BeautifulSoup(peticion.content, 'html.parser') # Definimos variables para guardar el vid pag_pago_1 = soup.find(title="Visit Paysite").string pag_pago_2 = soup.find(title="Visit Network").string pagina_pago = [pag_pago_1, pag_pago_2] titulo = soup.find('title').string[:-8] url_video = url_paginatube + '/' + str(pagina) meta_tags = soup.findAll(attrs={"name": "keywords"})[0] tags = meta_tags.get('content').split(',') # Cast y datetime estan en una tabla casting = [] for elemento in soup.find_all('tr'): if 'Cast' in str(elemento): if ',' in str(elemento): for c in elemento.find('td').string.split(','): casting.append(c) else: casting.append(elemento.find('td').string) elif 'Published' in str(elemento): publicado = elemento.find('td').string publicado = dateutil.parser.parse(publicado) # Dejamos para el final descargar el thumbnail request_img = requests.get('http://img.beeg.com/320x240/%s.jpg' % str(pagina), stream=True) thumbnail = None if request_img.status_code == 200: url_imagen = 'static/imagenes/beeg/%s.jpg' % str(pagina) with open(url_imagen, 'wb') as f: for chunk in request_img: f.write(chunk) thumbnail = url_imagen # Ahora que tenemos todas las variables necesarias llamamos a # def subir_video( # casting, # pagina_pago, # pagina_tube, # tags, # titulo, # thumbnail, # publicado = False, # url_video = '', # codigo_iframe = ''): subir_video(casting, pagina_pago, pagina_tube, tags, titulo, thumbnail, publicado, url_video)
def handle(self, *args, **options): # BASE url_inicial = 'http://www.faapy.com' pagina_tube = PaginaTube.objects.get(nombre = 'faapy.com') ''' Loop this ''' for i in range(0, 20): # Info util print 'Pagina %s de 20 ...' % str(i) # Links videos por page videos_pagina = {} # Generar peticion y sopa if i == 0: peticion = requests.get( url_inicial ) else: peticion = requests.get( url_inicial + '/latest-updates/%s/' % str(i+1) ) soup = BeautifulSoup( peticion.content, 'html.parser' ) # Cojemos las urls de videos y thumbs for elemento_video in soup.find_all('div', { 'class' : 'thumb'}): url_video = elemento_video.find('a').get('href') # Miramos si video existe try: Video.objects.get( url_video = url_video ) continue except Video.DoesNotExist: pass thumbnail = elemento_video.find('img').get('src') videos_pagina[url_video] = thumbnail # Recorremos pagina a pagina cada video for url_video, thumbnail in videos_pagina.iteritems(): # Peticion y sopa peticion = requests.get( url_video ) soup = BeautifulSoup( peticion.content, 'html.parser' ) # Definimos variables titulo = soup.h1.string casting = [ m.string for m in soup.find_all('a',{'class':'model-link'}) ] for script_tag in soup.find_all('script'): try: regex_exp = re.search('http://faapy\.com/embed/\d{1,10}', script_tag.string) codigo_iframe = regex_exp.group() except: pass try: pagina_pago = [ soup.find(attrs={"rel":"nofollow"}).string ] except AttributeError: pagina_pago = [] publicado = datetime.datetime.now() for elemento in soup.find_all('div',{'class':'row'}): try: if 'Tags' in elemento.getText(): tags = [ t.string for t in elemento.find_all('a') ] except TypeError: pass # descargamos el thumbnail thumb = None peticion_img = requests.get(thumbnail, stream=True) if peticion_img.status_code == 200: u = url_video.replace('http://faapy.com/videos/','').replace('/','_') path_imagen = 'static/imagenes/faapy/%s.jpg' % u with open( path_imagen , 'wb') as f: for chunk in peticion_img: f.write(chunk) thumb = path_imagen # Guardamos el objeto subir_video( casting, pagina_pago, pagina_tube, tags, titulo, thumb, publicado, url_video, codigo_iframe ) print ' FIN faapy_sandbox_1'