def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[filebox.py] get_video_url(page_url='%s')" % page_url) video_urls = [] ''' <input type="hidden" name="op" value="download2"> <input type="hidden" name="id" value="235812b1j9w1"> <input type="hidden" name="rand" value="na73zeeooqyfkndsv4uxzzpbajwi6mhbmixtogi"> <input type="hidden" name="referer" value="http://www.seriesyonkis.com/s/ngo/2/5/1/8/773"> ''' logger.info("[filebox.py] URL ") data = scrapertools.cache_page(page_url) import time time.sleep(5) codigo = scrapertools.get_match(data,'<input type="hidden" name="id" value="([^"]+)">[^<]+') rand = scrapertools.get_match(data,'<input type="hidden" name="rand" value="([^"]+)">') #op=download2&id=xuquejiv6xdf&rand=r6dq7hn7so2ygpnxv2zg2i3cu3sbdsunf57gtni&referer=&method_free=&method_premium=&down_direct=1 post = "op=download2&id="+codigo+"&rand="+rand+"&referer=&method_free=&method_premium=&down_direct=1" data = scrapertools.cache_page( page_url , post=post, headers=[['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14'],['Referer',page_url]] ) logger.info("data="+data) media_url = scrapertools.get_match(data,"this.play\('([^']+)'") video_urls.append( [ scrapertools.get_filename_from_url(media_url)[-4:]+" [filebox]",media_url]) for video_url in video_urls: logger.info("[filebox.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def play(item): logger.info("streamondemand.streamingfilmit play") data = scrapertools.cache_page(item.url, headers=headers) data = scrapertools.decodeHtmlentities(data).replace('http://cineblog01.pw', 'http://k4pp4.pw') url = scrapertools.find_single_match(data, r'<a\s*href="([^"]+)"><h1') data = scrapertools.cache_page(url, headers=headers) if "go.php" in url: data = scrapertools.get_match(data, 'window.location.href = "([^"]+)";') elif "/link/" in url: from lib.jsbeautifier.unpackers import packer try: data = scrapertools.get_match(data, "(eval.function.p,a,c,k,e,.*?)</script>") data = packer.unpack(data) except IndexError: pass data = scrapertools.get_match(data, 'var link(?:\s)?=(?:\s)?"([^"]+)";') else: data = url itemlist = servertools.find_video_items(data=data) for videoitem in itemlist: videoitem.title = item.show videoitem.fulltitle = item.fulltitle videoitem.thumbnail = item.thumbnail videoitem.channel = __channel__ return itemlist
def info(item): logger.info("pelisalacarta.torrentestrenos info") url=item.url data = scrapertools.cachePage(url) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) title= scrapertools.get_match(data,'<h4>(.*?)</h4>') title = title.replace(title,"[COLOR aqua][B]"+title+"[/B][/COLOR]") scrapedplot = scrapertools.get_match(data,'</p><p>([^<]+)</p><p>') scrapedplot = scrapedplot.replace(scrapedplot,"[COLOR white]"+scrapedplot+"[/COLOR]") plot_tag="[COLOR green][B]Sinopsis[/B][/COLOR]" + "[CR]" scrapedplot= plot_tag + scrapedplot scrapedplot = scrapedplot.replace("á","a") scrapedplot = scrapedplot.replace("í","i") scrapedplot = scrapedplot.replace("é","e") scrapedplot = scrapedplot.replace("ó","o") scrapedplot = scrapedplot.replace("ú","u") scrapedplot = scrapedplot.replace("ñ","–") scrapedplot = scrapedplot.replace("Á","A") scrapedplot = scrapedplot.replace("Í","I") scrapedplot = scrapedplot.replace("É","E") scrapedplot = scrapedplot.replace("Ó","O") scrapedplot = scrapedplot.replace("Ú","U") scrapedplot = scrapedplot.replace("Ñ","„") fanart="http://s11.postimg.org/qu66qpjz7/zentorrentsfanart.jpg" tbd = TextBox("DialogTextViewer.xml", os.getcwd(), "Default") tbd.ask(title, scrapedplot,fanart) del tbd return
def get_video_url_from_page(page_url): # Descarga la página data = scrapertools.cache_page(page_url) try: final = scrapertools.get_match(data,"url\:'(mp4\%3A[^']+)'") principio = scrapertools.get_match(data,"netConnectionUrl\: '([^']+)'") if urllib.unquote(principio).startswith("rtmp://aragon") or urllib.unquote(principio).startswith("rtmp://iasoft"): url = principio+"/"+final[9:] else: url = principio+"/"+final url = urllib.unquote(url) host = scrapertools.find_single_match(url,'(rtmp://[^/]+)') app = scrapertools.find_single_match(url,'rtmp://[^/]+/(.*?)/mp4\:') playpath = scrapertools.find_single_match(url,'rtmp://[^/]+/.*?/(mp4\:.*?)$') url = host+' app='+app+' playpath='+playpath logger.info("url="+url) except: url = "" logger.info("url NO encontrada") return url
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("pelisalacarta.servers.openload url=" + page_url) video_urls = [] video = True data = scrapertools.downloadpageWithoutCookies(page_url) if "videocontainer" not in data: video = False url = page_url.replace("/embed/","/f/") data = scrapertools.downloadpageWithoutCookies(url) text_encode = scrapertools.get_match(data,"Click to start Download.*?<script[^>]+>(.*?)</script") text_decode = decode(data) else: text_encode = scrapertools.get_match(data,"<video[^<]+<script[^>]+>(.*?)</script>") text_decode = decode(data) #Header para la descarga header_down = "|User-Agent="+headers['User-Agent']+"|" if video == True: videourl = scrapertools.get_match(text_decode, "(http.*?true)") videourl = scrapertools.get_header_from_response(videourl,header_to_get="location") videourl = videourl.replace("https://","http://").replace("?mime=true","") extension = videourl[-4:] video_urls.append([ extension + " [Openload]", videourl+header_down+extension]) else: videourl = scrapertools.find_single_match(text_decode, '"href",(?:\s|)\'([^\']+)\'') videourl = videourl.replace("https://","http://") extension = videourl[-4:] video_urls.append([ extension + " [Openload]", videourl+header_down+extension]) for video_url in video_urls: logger.info("pelisalacarta.servers.openload %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[one80upload.py] get_video_url(page_url='%s')" % page_url) video_urls = [] data = scrapertools.cache_page(page_url) #op=download2&id=yz6lx411cshb&rand=3wqqg6mjw3nxu254dfw4icuxknqfkzdjnbluhty&referer=&method_free=&method_premium=&down_direct=1 codigo = scrapertools.get_match(data,'<input type="hidden" name="id" value="([^"]+)">[^<]+') rand = scrapertools.get_match(data,'<input type="hidden" name="rand" value="([^"]+)">') post = "op=download2&id="+codigo+"&rand="+rand+"&referer=&method_free=&method_premium=&down_direct=1" data = scrapertools.cache_page( page_url , post=post, headers=[['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14'],['Referer',page_url]] ) #logger.info("data="+data) # Busca el video online o archivo de descarga patron = 'href="([^"]+)" target="_parent"><span class="style1">Download' matches = re.compile(patron,re.DOTALL).findall(data) #scrapertools.printMatches(matches) if len(matches)>0: logger.info("[180upload.py] encuentra archivo de descarga="+matches[0]) else: logger.info("[180upload.py] buscando video para ver online") patron = "this\.play\('([^']+)'" matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: video_urls.append( ["."+matches[0].rsplit('.',1)[1]+" [180upload]",matches[0]]) for video_url in video_urls: logger.info("[180upload.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def play(item): logger.info("[cineblog01.py] play") data = scrapertools.cache_page(item.url) print "##############################################################" if "go.php" in item.url: data = scrapertools.get_match(data, 'window.location.href = "([^"]+)";') print "##### play go.php data ##\n%s\n##" % data elif "/link/" in item.url: from lib.jsbeautifier.unpackers import packer try: data = scrapertools.get_match(data, "(eval.function.p,a,c,k,e,.*?)</script>") data = packer.unpack(data) print "##### play /link/ unpack ##\n%s\n##" % data except IndexError: print "##### The content is yet unpacked" data = scrapertools.get_match(data, 'var link(?:\s)?=(?:\s)?"([^"]+)";') print "##### play /link/ data ##\n%s\n##" % data else: data = item.url print "##### play else data ##\n%s\n##" % data print "##############################################################" itemlist = servertools.find_video_items(data=data) for videoitem in itemlist: videoitem.title = item.show videoitem.fulltitle = item.fulltitle videoitem.thumbnail = item.thumbnail videoitem.channel = __channel__ return itemlist
def info(item): logger.info("pelisalacarta.sinluces trailer") url=item.url data = scrapertools.cachePage(url) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) title= scrapertools.get_match(data,'<div class=\'post hentry\'.*?<meta content.*?<meta content.*?<meta content.*?<meta content=\'(.*?)\.*? \(') title = title.replace(title,"[COLOR aqua][B]"+title+"[/B][/COLOR]") title = title.replace("Ver","") scrapedplot = scrapertools.get_match(data,'<div class=\'fltl ipost-de\'><div><span><i class=\'icon icon-ok\'>(.*?)</div></div>') plotformat = re.compile('</i> (.*?)</span>',re.DOTALL).findall(scrapedplot) scrapedplot = scrapedplot.replace(scrapedplot,"[COLOR white]"+scrapedplot+"[/COLOR]") for plot in plotformat: scrapedplot = scrapedplot.replace(plot,"[COLOR skyblue][B]"+plot+"[/B][/COLOR]") scrapedplot = scrapedplot.replace("</span>","[CR]") scrapedplot = scrapedplot.replace("</i>","") scrapedplot = scrapedplot.replace("“","") scrapedplot = scrapedplot.replace("<b>","") scrapedplot = scrapedplot.replace("</b>","") scrapedplot = scrapedplot.replace(" ​​","") scrapedplot = scrapedplot.replace("…","") scrapedplot = scrapedplot.replace("</div> </div> <div class='clear'>","") scrapedplot = scrapedplot.replace("</div><div><span><i class='icon icon-ok'>","[CR]") fanart="http://s11.postimg.org/qu66qpjz7/zentorrentsfanart.jpg" tbd = TextBox("DialogTextViewer.xml", os.getcwd(), "Default") tbd.ask(title, scrapedplot,fanart) del tbd return
def info(item): logger.info("pelisalacarta.zentorrents info") url=item.url data = scrapertools.cachePage(url) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) if "web" in item.title or "1080" in item.title or "bluray" in item.title or "HDRip" in item.title: title= scrapertools.get_match(data,'<title>([^"]+) \[') else: title= scrapertools.get_match(data,'<title>([^"]+) -') title = title.replace(title,"[COLOR aqua][B]"+title+"[/B][/COLOR]") plot = scrapertools.get_match(data,'onload="imgLoaded.*?</div>(.*?)<div class="zentorrents_download">') plot = plot.replace(plot,"[COLOR orange]"+plot+"[/COLOR]") plot = plot.replace("á","á") plot = plot.replace("í","í") plot = plot.replace("é","é") plot = plot.replace("ó","ó") plot = plot.replace("ú","ú") plot = plot.replace("ñ","ñ") plot = plot.replace("Á","Á") plot = plot.replace("Í","Í") plot = plot.replace("É","É") plot = plot.replace("Ó","Ó") plot = plot.replace("Ú","Ú") plot = plot.replace("Ñ","Ñ") plot = plot.replace("<p>","") plot = plot.replace("</p>","") fanart="http://s11.postimg.org/qu66qpjz7/zentorrentsfanart.jpg" tbd = TextBox("DialogTextViewer.xml", os.getcwd(), "Default") tbd.ask(title, plot,fanart) del tbd return
def listadofichas(item): logger.info("[enlacia.py] listadofichas") itemlist = [] ### Listado ### data = scrapertools.cache_page(item.url) listado = scrapertools.get_match(data,'<h2>Listado de fichas</h2>(.*?)</div></div></div>') patron = '<a href="([^"]+)" class="ficha ficha2"><img src="([^"]+)" border="0" alt="([^"]+)"/>' patron+= '.*?<span class="categoria">([^<]+)</span>' matches = re.compile(patron,re.DOTALL).findall(listado) for path,thumbnail,title,categoria in matches: item_extra = item.extra if item.extra == "ver etiquetas": title = "[COLOR blue]"+categoria+":[/COLOR] "+title itemlist.append( Item(channel=__channel__, title=title , action="temporadas", url=SITE+path, thumbnail=SITE+"/"+thumbnail.replace('.jpg','_g.jpg'), fanart="http://pelisalacarta.mimediacenter.info/fanart/enlacia.jpg", show=title, extra=item_extra) ) ### Paginación ### try: pagina_actual = scrapertools.get_match(data, '<span class="pagina pag_actual">([^<]+)</span>') pagina_siguiente = scrapertools.get_match(data, '<a href="([^"]+)" class="pagina pag_sig">[^<]+</a>') pagina_final = scrapertools.get_match(data, 'class="pagina">([^<]+)</a><a href="[^"]+" class="pagina pag_sig">') print "### pagina_siguiente: %s" % pagina_siguiente #if pagina_actual != pagina_final: if pagina_siguiente != "": if "tag/" in pagina_siguiente: pagina_siguiente = "/"+pagina_siguiente itemlist.append( Item(channel=__channel__, title=">> Página siguiente", action="listadofichas", url=SITE+pagina_siguiente, fanart="http://pelisalacarta.mimediacenter.info/fanart/enlacia.jpg", extra=item_extra) ) except: pass return itemlist
def detalle_programa(item,data=""): logger.info("[shurweb.py] detalle_programa") # Descarga la página url = item.url if data=="": data = scrapertools.cache_page(url) # Obtiene el thumbnail try: item.thumbnail = scrapertools.get_match(data,'<div class="serie_thumb"><img src="([^"]+)"/>') except: pass plot = scrapertools.get_match(data,'<div class="synopsis clearfix">(.*?)</div>') plot = re.compile("<strong>Idiom[^<]+</strong>[^<]+<br />",re.DOTALL).sub("",plot) plot = re.compile("<strong>Calid[^<]+</strong>[^<]+<br />",re.DOTALL).sub("",plot) plot = re.compile("Sinopsis\:",re.DOTALL).sub("",plot) item.plot = scrapertools.htmlclean(plot).strip() try: item.title = scrapertools.get_match(data,'<h1 class="cat_head">([^<]+)</h1>').strip() except: pass return item
def episodios(item): logger.info("[watchcartoononline.py] episodios") # Descarga la pagina item = detalle_programa(item) data = scrapertools.cache_page(item.url) data = scrapertools.get_match(data,'<table[^<]+<tr[^<]+<td[^<]+<h3>Episode List</h3></td[^<]+</tr[^<]+<tr[^<]+<td[^<]+<div class="menu"[^<]+<div class="menustyle">(.*?)</ul>') patron = '<li><a href="([^"]+)"[^>]+>([^<]+)</a></li>' matches = re.compile(patron,re.DOTALL).findall(data) itemlist = [] for scrapedurl,scrapedtitle in matches: title = scrapedtitle.strip() url = urlparse.urljoin(item.url,scrapedurl) thumbnail = item.thumbnail plot = item.plot if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos" , title=title , url=url, thumbnail=thumbnail, plot=plot, viewmode="movie_with_plot", fanart="http://pelisalacarta.mimediacenter.info/fanart/watchcartoononline.jpg")) try: siguiente = scrapertools.get_match(data,'<a href="([^"]+)" >\«\; Previous Entries</a></div>') scrapedurl = urlparse.urljoin(item.url,siguiente) scrapedtitle = ">> Pagina Siguiente" scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=__channel__, action="episodios", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True, fanart="http://pelisalacarta.mimediacenter.info/fanart/watchcartoononline.jpg") ) except: pass return itemlist
def youtube_search(texto): devuelve = [] # Fetch video list from YouTube feed data = scrapertools.cache_page( "https://gdata.youtube.com/feeds/api/videos?q=" + texto.replace(" ", "+") + "&orderby=published&start-index=1&max-results=50&v=2&lr=es" ) # Extract items from feed matches = re.compile("<entry(.*?)</entry>", re.DOTALL).findall(data) for entry in matches: logger.info("entry=" + entry) # Not the better way to parse XML, but clean and easy title = scrapertools.get_match(entry, "<titl[^>]+>([^<]+)</title>") thumbnail = scrapertools.get_match(entry, "<media\:thumbnail url='([^']+)'") try: url = scrapertools.get_match(entry, "http\://www.youtube.com/watch\?v\=([0-9A-Za-z_-]{11})") except: url = scrapertools.get_match(entry, "https\://www.youtube.com/watch\?v\=([0-9A-Za-z_-]{11})") devuelve.append([title, thumbnail, url]) return devuelve
def calidades(item): logger.info("[" + CHANNELNAME + ".py] calidades") # Descargo la página del clip. data = scrapertools.cache_page(item.url) if (DEBUG): logger.info("data=" + data) ititle = scrapertools.get_match(data, '<h1 id="title">(.*?)</h1>') sRtmp = scrapertools.get_match(data, 'streamer: "(rtmp://.*?)/.*?",') sApp = scrapertools.get_match(data, 'streamer: "rtmp://.*?/(.*?)",') sSwfUrl = MAIN_URL + scrapertools.get_match(data, 'flashplayer: "(.*?)",') # Solicito las calidades del clip. clip_id = scrapertools.get_match(item.url, '/(\d+)/') if (DEBUG): logger.info('ID:' + clip_id) data = scrapertools.cachePage(MAIN_URL + '/clip/ajax/' + clip_id) if (DEBUG): logger.info('Json:' + data) objects = simplejson.loads(data) itemlist = [] for object in objects['versions']: sPlaypath = 'mp4:' + object['src'] sStreamUrl = sRtmp + ' app=' + sApp + ' swfurl=' + sSwfUrl + ' playpath=' + sPlaypath if (DEBUG): logger.info("stream=" + sStreamUrl) # Añado el item de la calidad al listado. itemlist.append( Item(channel=CHANNELNAME, title=object['name'].title()+' ('+object['bitrate']+'kbps)', action="play", url=sStreamUrl, thumbnail=item.thumbnail, extra=ititle, folder=False ) ) return itemlist
def capitulos(item): logger.info("[" + CHANNELNAME + ".py] capitulos") try: # Extraigo el id del programa. programa_id = scrapertools.get_match(item.url, '/(\d+)/') if (DEBUG): logger.info('ID:' + programa_id) # Solicito los capítulos del programa. data = scrapertools.cachePage(MAIN_URL + '/chapters/ajax/' + programa_id) if (DEBUG): logger.info('Json:' + data) objects = simplejson.loads(data, object_hook=to_utf8) itemlist = [] for object in objects['chapters']: try: # Si el nombre del capítulo incluye el nombre del programa, extraigo sólo el nombre del capítulo. ititle = scrapertools.get_match(object['title'], '.*?: (.*)') except: ititle = object['title'] # Añado el item del capítulo al listado. itemlist.append( Item(channel=CHANNELNAME, title=ititle, action="calidades", url=MAIN_URL+'/clip/'+object['id']+'/', thumbnail=item.thumbnail, folder=True ) ) return itemlist except: # Si no existen capítulos para este programa entonces es un clip. return calidades(item)
def get_video_detail(item): data = scrapertools.cache_page(item.url) item.title = scrapertools.get_match(data,'<meta name="title" content="([^"]+)" />') item.plot = scrapertools.get_match(data,'<meta name="description" content="(.*?)" rel="canonical" />') return item
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("pelisalacarta.videomega get_video_url(page_url='%s')" % page_url) data = scrapertools.cache_page(page_url) video_urls = [] # Descarga la página headers = [ ['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'] ] data = scrapertools.cache_page(page_url , headers = headers) #document.write(unescape("%3c%73%63%72%69%70%74%20%74e")); location = scrapertools.get_match(data,'document.write\(unescape\("([^"]+)"\)\)') logger.info("pelisalacarta.videomega location="+location) location = urllib.unquote(location) logger.info("pelisalacarta.videomega location="+location) location = scrapertools.get_match(location,'file\: "([^"]+)"') logger.info("pelisalacarta.videomega location="+location) location = location+"&start=0" logger.info("pelisalacarta.videomega location="+location) #http://st100.u1.videomega.tv/v/bf38b3577874d7ce424c1c87d6d1b8d9.mp4?st=kuiAz1XJ7XFzOCnaleGVxA&start=0 #http://st100.u1.videomega.tv/v/bf38b3577874d7ce424c1c87d6d1b8d9.mp4?st=kuiAz1XJ7XFzOCnaleGVxA video_urls.append( [ scrapertools.get_filename_from_url(location)[-4:]+" [videomega]" , location ] ) for video_url in video_urls: logger.info("pelisalacarta.videomega %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("documentary get_video_url(page_url='%s')" % page_url) video_urls = [] data = scrapertools.cache_page(page_url) #var videoVars = {"videoNonceVar":"94767795ce","post_id":"2835"}; videoNonceVar = scrapertools.get_match(data,'var\s*videoVars\s*\=\s*\{"videoNonceVar"\:"([^"]+)","post_id"\:"\d+"') post_id = scrapertools.get_match(data,'var\s*videoVars\s*\=\s*\{"videoNonceVar"\:"[^"]+","post_id"\:"(\d+)"') #http://documentary.es/wp-admin/admin-ajax.php?postId=2835&videoNonce=94767795ce&action=getVideo&_=1385893877929 import random url = "http://documentary.es/wp-admin/admin-ajax.php?postId="+post_id+"&videoNonce="+videoNonceVar+"&action=getVideo&_="+str(random.randint(10000000000,9999999999999)) data = scrapertools.cache_page(url) #{"videoUrl":"http:\/\/www.dailymotion.com\/embed\/video\/xioggh?autoplay=1&defaultSubtitle=es"} data = data.replace("\\","") import servertools real_urls = servertools.find_video_items(data=data) if len(real_urls)>0: item = real_urls[len(real_urls)-1] exec "import "+item.server exec "servermodule = "+item.server video_urls = servermodule.get_video_url(item.url) for video_url in video_urls: logger.info("documentary %s - %s" % (video_url[0],video_url[1])) return video_urls
def info(item): logger.info("pelisalacarta.zentorrents info") url=item.url data = scrapertools.cachePage(url) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) title= scrapertools.get_match(data,'<title>Ver Película(.*?) \(') title = title.replace(title,"[COLOR orange][B]"+title+"[/B][/COLOR]") plot = scrapertools.get_match(data,'<span class="clms">Sinopsis: </span>(.*?)</div></div></td>') plot = plot.replace(plot,"[COLOR white][B]"+plot+"[/B][/COLOR]") scrapedinfo= scrapertools.get_match(data,'<div class="infopeli">(.*?)<table class="ventana2" border="0">') infoformat = re.compile('(.*?:) .*?<br/>',re.DOTALL).findall(scrapedinfo) for info in infoformat: scrapedinfo= scrapedinfo.replace(info,"[COLOR orange][B]"+info+"[/B][/COLOR]") info= scrapedinfo.replace(info,"[COLOR white][B]"+info+"[/B][/COLOR]") info = scrapedinfo info = re.sub(r'<a href=".*?">|<span>|</a>|</div></td></tr></table>|<span class=".*?".*?>','',scrapedinfo) info = info.replace("</span><br/>"," ") info = info.replace("</span>"," ") info = info.replace("<br/>"," ") info = info.replace("</B>","") info = info.replace("Calificación IMDb:","[COLOR orange][B]Calificación IMDb:[/B][/COLOR]") info = info.replace("Calificación IMDb:[/B]","Calificación IMDb:") info = info.replace("Premios:","[COLOR orange][B]Premios:[/B][/COLOR]") foto = item.show photo= item.extra ventana2 = TextBox1(title=title, plot=plot, info=info, thumbnail=photo, fanart=foto) ventana2.doModal()
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("url="+page_url) # Lo pide una vez headers = [['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14']] data = scrapertools.cache_page( page_url , headers=headers ) try: media_url = scrapertools.get_match( data , 'file\: "([^"]+)"' ) except: post = "" matches = scrapertools.find_multiple_matches(data, '<input.*?name="([^"]+)".*?value="([^"]*)">') for inputname, inputvalue in matches: post += inputname + "=" + inputvalue + "&" post = post.replace("op=download1","op=download2") data = scrapertools.cache_page( page_url , post=post) if 'id="justanotice"' in data: logger.info("data="+data) logger.info("Ha saltado el detector de adblock") return [] # Extrae la URL media_url = scrapertools.get_match( data , 'file\: "([^"]+)"' ) video_urls = [] video_urls.append( [ scrapertools.get_filename_from_url(media_url)[-4:]+" [streamcloud]",media_url]) for video_url in video_urls: logger.info("%s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[xiptv.py] get_video_url(page_url='%s')" % page_url) video_urls = [] data = scrapertools.cache_page(page_url) try: video_url = scrapertools.get_match(data,'src="([^"]+)" type="video/mp4"') video_urls.append( [ "mp4 [xiptv]" , video_url ] ) except: # "url":"legacy/h264/20572","connection_url":"rtmp://cdn.xiptv.cat/vod" url = scrapertools.get_match(data,'"url"\:"([^"]+)"') logger.info("url="+url) connection_url = scrapertools.get_match(data,'"connection_url"\:"([^"]+)"') logger.info("connection_url="+connection_url) if url.startswith("/") or connection_url.endswith("/"): separador="" else: separador = "/" video_url = connection_url+separador+url+" playpath="+url video_urls.append( [ "rtmp [xiptv]" , video_url ] ) for video_url in video_urls: logger.info("[xiptv.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def episodios(item): logger.info("streamondemand.channels.guardaserie episodios") itemlist = [] data = scrapertools.cache_page( item.url ) serie_id = scrapertools.get_match( data, '/?id=(\d+)" rel="nofollow"' ) data = scrapertools.get_match( data, '<div id="episode">(.*?)</div>' ) seasons_episodes = re.compile( '<select name="episode" id="(\d+)">(.*?)</select>', re.DOTALL ).findall( data ) for scrapedseason, scrapedepisodes in seasons_episodes: episodes = re.compile( '<option value="(\d+)"', re.DOTALL ).findall( scrapedepisodes ) for scrapedepisode in episodes: season = str ( int( scrapedseason ) + 1 ) episode = str ( int( scrapedepisode ) + 1 ) if len( episode ) == 1: episode = "0" + episode title = season + "x" + episode + " - " + item.title ## Le pasamos a 'findvideos' la url con tres partes divididas por el caracter "?" ## [host+path]?[argumentos]?[Referer] url = host + "/wp-admin/admin-ajax.php?action=get_episode&id=" + serie_id + "&season=" + scrapedseason + "&episode=" + scrapedepisode + "?" + item.url itemlist.append( Item( channel=__channel__, action="findvideos", title= title, url=url, fulltitle=item.title, show=item.title, thumbnail=item.thumbnail ) ) if config.get_library_support(): itemlist.append( Item(channel=__channel__, title="[COLOR azure]Aggiungi [/COLOR]" + item.title + "[COLOR azure] alla libreria di Kodi[/COLOR]", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show) ) itemlist.append( Item(channel=__channel__, title="[COLOR azure]Scarica tutti gli episodi della serie[/COLOR]", url=item.url, action="download_all_episodes", extra="episodios", show=item.show) ) return itemlist
def get_long_url( short_url ): logger.info("servers.adfly get_long_url(short_url='%s')" % short_url) request_headers = [] request_headers.append(["User-Agent","Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12"]) request_headers.append(["Referer","http://linkdecrypter.com"]) post=urllib.urlencode({"pro_links":short_url,"modo_links":"text","modo_recursivo":"on","link_cache":"on"}) url = "http://linkdecrypter.com/" # Parche porque python no parece reconocer bien la cabecera phpsessid body,response_headers = scrapertools.read_body_and_headers(url,post=post,headers=request_headers) n = 1 while True: for name,value in response_headers: if name=="set-cookie": logger.info("Set-Cookie: "+value) cookie_name = scrapertools.get_match(value,'(.*?)\=.*?\;') cookie_value = scrapertools.get_match(value,'.*?\=(.*?)\;') request_headers.append(["Cookie",cookie_name+"="+cookie_value]) body,response_headers = scrapertools.read_body_and_headers(url,headers=request_headers) logger.info("body="+body) try: location = scrapertools.get_match(body,'<textarea.*?class="caja_des">([^<]+)</textarea>') logger.info("location="+location) break except: n = n + 1 if n>3: break return location
def novedades_episodios(item): logger.info("pelisalacarta.channels.animeid novedades_episodios") # Descarga la pagina #<article> <a href="/ver/uchuu-kyoudai-35"> <header>Uchuu Kyoudai #35</header> <figure><img src="http://static.animeid.com/art/uchuu-kyoudai/normal/b4934a1d.jpg" class="cover" alt="Uchuu Kyoudai" width="250" height="140" /></figure><div class="mask"></div> <aside><span class="p"><strong>Reproducciones: </strong>306</span> <span class="f"><strong>Favoritos: </strong>0</span></aside> </a> <p>Una noche en el año 2006, cuando eran jovenes, los dos hermanos Mutta (el mayor) y Hibito (el menor) vieron un OVNI que hiba en dirección hacia la luna. Esa misma noche decidieron que ellos se convertirian en astronautas y irian al espacio exterior. En el año 2050, Hibito se ha convertido en astronauta y que ademas está incluido en una misión que irá a la luna. En cambio Mutta siguió una carrera mas tradicional, y terminó trabajando en una compañia de fabricación de automoviles. Sin embargo, Mutta termina arruinando su carrera por ciertos problemas que tiene con su jefe. Ahora bien, no sólo perdió su trabajo si no que fue incluido en la lista negra de la industria laboral. Pueda ser que esta sea su unica oportunidad que tenga Mutta de volver a perseguir su sueño de la infancia y convertirse en astronauta, al igual que su perqueño hermano Hibito.</p> </article> #<img pagespeed_high_res_src=" data = scrapertools.cache_page(item.url) data = scrapertools.get_match(data,'<section class="lastcap">(.*?)</section>') patronvideos = '<a href="([^"]+)">[^<]+<header>([^<]+)</header>[^<]+<figure><img[^>]+src="([^"]+)"[\s\S]+?<p>(.+?)</p>' matches = re.compile(patronvideos,re.DOTALL).findall(data) itemlist = [] for url,title,thumbnail,plot in matches: scrapedtitle = scrapertools.entityunescape(title) scrapedurl = urlparse.urljoin(item.url,url) scrapedthumbnail = thumbnail scrapedplot = plot if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") episodio = scrapertools.get_match(scrapedtitle,'\s+#(\d+)$') contentTitle = scrapedtitle.replace('#'+ episodio, '') itemlist.append( Item(channel=item.channel, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, hasContentDetails="true", contentSeason=1, contentTitle=contentTitle, contentEpisodeNumber=int(episodio))) return itemlist
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[nowdownload.py] get_video_url (page_url='%s')" % page_url) ''' <a href="http://f02.nowdownload.co/dl/91efaa9ec507ef4de023cd62bb9a0fe2/50ab76ac/6711c9c90ebf3_family.guy.s11e02.italian.subbed.hdtv.xvid_gannico.avi" class="btn btn-danger"><i class="icon-white icon-download"></i> Download Now</a> ''' data = scrapertools.cache_page( page_url ) logger.debug("[nowdownload.py] data:" + data) try: url = scrapertools.get_match(data,'<a href="([^"]*)" class="btn btn-danger"><i class="icon-white icon-download"></i> Download Now</a>') except: #$.get("/api/token.php?token=7e1ab09df2775dbea02506e1a2651883"); token = scrapertools.get_match(data,'(/api/token.php\?token=[^"]*)') logger.debug("[nowdownload.py] token:" + token) d= scrapertools.cache_page( "http://www.nowdownload.co"+ token ) url = scrapertools.get_match(data,'expiryText: \'<a class="btn btn-danger" href="([^"]*)') logger.debug("[nowdownload.py] url_1:" + url) data = scrapertools.cache_page("http://www.nowdownload.co" + url ) logger.debug("[nowdownload.py] data:" + data) #<a href="http://f03.nowdownload.co/dl/8ec5470153bb7a2177847ca7e1638389/50ab71b3/f92882f4d33a5_squadra.antimafia_palermo.oggi.4x01.episodio.01.ita.satrip.xvid_upz.avi" class="btn btn-success">Click here to download !</a> url = scrapertools.get_match(data,'<a href="([^"]*)" class="btn btn-success">Click here to download !</a>') logger.debug("[nowdownload.py] url_final:" + url) video_urls = [url] return video_urls
def detalle_programa(item,data=""): #http://www.seriesyonkis.sx/serie/gungrave #http://www.seriesyonkis.sx/ficha/serie/gungrave url = item.url if "seriesyonkis.com/serie/" in url: url = url.replace("seriesyonkis.com/serie/","seriesyonkis.com/ficha/serie/") # Descarga la página if data=="": data = scrapertools.cache_page(url) # Obtiene el thumbnail try: item.thumbnail = scrapertools.get_match(data,'<div class="profile-info"[^<]+<a[^<]+<img src="([^"]+)"') except: pass try: item.plot = scrapertools.htmlclean( scrapertools.get_match(data,'<div class="details">(.*?)</div>') ) except: pass logger.info("plot="+item.plot) try: item.title = scrapertools.get_match(data,'<h1 class="underline"[^>]+>([^<]+)</h1>').strip() except: pass return item
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[vimple.ru] get_video_url(page_url=%s)" % page_url) media_url = scrapertools.get_match( re.sub( r'\t|\n|\r|\s', '', scrapertools.cache_page(page_url) ), '"video"[^,]+,"url":"([^"]+)"' ).replace('\\','') media_url+= "|Cookie=" + \ scrapertools.get_match( config.get_cookie_data(), '.vimple.ru.*?(UniversalUserID\t[a-f0-9]+)' ).replace('\t', '=') video_urls = [] video_urls.append( [ scrapertools.get_filename_from_url(media_url)[-4:]+" [vimple.ru]",media_url]) for video_url in video_urls: logger.info("streamondemand.servers.vimpleru %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("servers.tvalacarta get_video_url(page_url='%s')" % page_url) # Descarga la página # http://www.boing.es/serie/video/la-verdad data = scrapertools.cache_page(page_url) player_id = scrapertools.get_match( data,'<param name="playerID" value="([^"]+)"' ) logger.info("servers.tvalacarta player_id="+player_id) publisher_id = scrapertools.get_match( data,'<param name="publisherID" value="([^"]+)"' ) logger.info("servers.tvalacarta publisher_id="+publisher_id) video_id = scrapertools.get_match( data,'<param name="videoId" value="([^"]+)"' ) logger.info("servers.tvalacarta video_id="+video_id) #http://i.cdn.turner.com/tbseurope/big/Boing_ES_16_9/thumbs/SP_SA_JOHNYT0045_01.jpg thumbnail = scrapertools.get_match( data , '(http\://i.cdn.turner.com/.*?.jpg)' ) logger.info("servers.tvalacarta thumbnail="+thumbnail) #http://ht.cdn.turner.com/tbseurope/big/Boing_ES/videos/SP_SA_GERSTI0017_01.mp4 video = thumbnail video = video.replace("i.cdn","ht.cdn") video = video.replace("/thumbs/","/videos/") video = video.replace(".jpg",".mp4") logger.info("servers.tvalacarta video="+video) #http://ht.cdn.turner.com/tbseurope/big/Boing_ES_16_9/videos/SP_SA_FLASH%230076_01.mp4?videoId=3831924019001&lineUpId=&pubId=41801939001&playerId=1156722107001&affiliateId= url = video+"?videoId="+video_id+"&lineUpId=&pubId="+publisher_id+"&playerId="+player_id+"&affiliateId=" logger.info("url="+url) video_urls = [] video_urls.append( [ "(mp4) [boing]" , url ] ) for video_url in video_urls: logger.info("servers.tvalacarta %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("pelisalacarta.servers.streaminto url=" + page_url) data = re.sub(r'\n|\t|\s+', '', scrapertools.cache_page(page_url)) video_urls = [] try: media_url = scrapertools.get_match(data, """.setup\({file:"([^"]+)",image""") except: js_data = scrapertools.find_single_match(data, "(eval.function.p,a,c,k,e.*?)</script>") js_data = unPack(js_data) media_url = scrapertools.get_match(js_data, """.setup\({file:"([^"]+)",image""") if media_url.endswith("v.mp4"): media_url_mp42flv = re.sub(r'/v.mp4$','/v.flv',media_url) video_urls.append([scrapertools.get_filename_from_url(media_url_mp42flv)[-4:] + " [streaminto]", media_url_mp42flv]) if media_url.endswith("v.flv"): media_url_flv2mp4 = re.sub(r'/v.flv$','/v.mp4',media_url) video_urls.append([scrapertools.get_filename_from_url(media_url_flv2mp4)[-4:] + " [streaminto]", media_url_flv2mp4]) video_urls.append([scrapertools.get_filename_from_url(media_url)[-4:] + " [streaminto]", media_url]) for video_url in video_urls: logger.info("pelisalacarta.servers.streaminto %s - %s" % (video_url[0], video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("pelisalacarta.servers.gamovideo get_video_url(page_url='%s')" % page_url) data = scrapertools.cache_page(page_url) packer = scrapertools.find_single_match(data,"<script type='text/javascript'>(eval.function.p,a,c,k,e,d..*?)</script>") unpacker = jsunpack.unpack(data) if packer != "" else "" if unpacker != "": data = unpacker data = re.sub(r'\n|\t|\s+', '', data) host = scrapertools.get_match(data, '\[\{image:"(http://[^/]+/)') mediaurl = scrapertools.get_match(data, ',\{file:"([^"]+)"') if not mediaurl.startswith(host): mediaurl = host + mediaurl rtmp_url = scrapertools.get_match(data, 'file:"(rtmp[^"]+)"') playpath = scrapertools.get_match(rtmp_url, 'vod\?h=[\w]+/(.*$)') rtmp_url = rtmp_url.split(playpath)[0]+" playpath="+playpath+" swfUrl=http://gamovideo.com/player61/jwplayer.flash.swf" video_urls = [] video_urls.append([scrapertools.get_filename_from_url(mediaurl)[-4:]+" [gamovideo]",mediaurl]) video_urls.append(["RTMP [gamovideo]",rtmp_url]) for video_url in video_urls: logger.info("pelisalacarta.servers.gamovideo %s - %s" % (video_url[0],video_url[1])) return video_urls
def novedades_documentales(item): logger.info("[shurweb.py] novedades_documentales") data = scrapertools.cachePage(item.url) data = scrapertools.unescape(data) data = scrapertools.get_match(data,'<div class="tab-pane fade" id="docus">(.*?)<div class="panel panel-primary">') return peliculas(item,data=data)
def episodios(item): logger.info("[boing.py] episodios") # Descarga la página #http://www.boing.es/serie/hora-de-aventuras #http://www.boing.es/videos/hora-de-aventuras data = scrapertools.cachePage(item.url.replace("/serie/","/videos/")) #logger.info(data) bloque = scrapertools.get_match(data,'<div class="Contenedor100">(.*?)<\!-- \/Contenedor100 -->',1) logger.info(str(bloque)) # Extrae los videos ''' <div class="pic"><div class="pic2"><div class="pic3"> <a href="/serie/geronimo-stilton/video/top-model"> <img class="bcvid" height="73" width="130" src="http://i.cdn.turner.com/tbseurope/big/Boing_ES/thumbs/SP_SA_GERSTI0017_01.jpg" /> </a> </div></div></div> <div class="series"><a href="/serie/geronimo-stilton">Gerónimo Stilton</a></div> <div class="title"><a href="/serie/geronimo-stilton/video/top-model">Top Model</a></div> ''' ''' <div class="pic"><div class="pic2"><div class="pic3"> <a href="/serie/generator-rex/video/hombre-contra-hombre"> <img style="margin-top:10px" height="73" width="130" src="http://i.cdn.turner.com/tbseurope/big/Boing_ES_16_9/thumbs/SP_SA_GENREX0047_01.jpg" /> </a> </div></div></div> <div class="stars"><form action="/videos/generator-rex" accept-charset="UTF-8" method="post" id="fivestar-custom-widget" class="fivestar-widget"> <div><div class="fivestar-form-vote-18249 clear-block"><input type="hidden" name="content_type" id="edit-content-type" value="node" /> <input type="hidden" name="content_id" id="edit-content-id" value="18249" /> <div class="fivestar-form-item fivestar-average-stars"><div class="form-item" id="edit-vote-wrapper"> <span class='edit-vote-design'><span class='form-item-value-design1'><span class='form-item-value-design2'><span class='form-item-value-design3'> <input type="hidden" name="vote_count" id="edit-vote-count" value="0" /> <input type="hidden" name="vote_average" id="edit-vote-average" value="76.25" /> <input type="hidden" name="auto_submit_path" id="edit-auto-submit-path" value="/fivestar/vote/node/18249/vote" class="fivestar-path" /> <select name="vote" class="form-select" id="edit-vote-1" ><option value="-">Select rating</option><option value="20">Give it 1/5</option><option value="40">Give it 2/5</option><option value="60">Give it 3/5</option><option value="80" selected="selected">Give it 4/5</option><option value="100">Give it 5/5</option></select><input type="hidden" name="auto_submit_token" id="edit-auto-submit-token" value="36639bc15e086e0bfc3d93bfec3d5287" class="fivestar-token" /> </span></span></span></span></div> </div><input type="hidden" name="destination" id="edit-destination" value="videos/generator-rex" /> <input type="submit" name="op" id="edit-fivestar-submit" value="Rate" class="form-submit fivestar-submit" /> <input type="hidden" name="form_build_id" id="form-d62c4ce5673f9173ca3edb7e81986457" value="form-d62c4ce5673f9173ca3edb7e81986457" /> <input type="hidden" name="form_id" id="edit-fivestar-custom-widget" value="fivestar_custom_widget" /> </div> </div></form></div> <div class="series"><a href="/serie/generator-rex">Generator Rex</a></div> <div class="title"><a href="/serie/generator-rex/video/hombre-contra-hombre">Hombre contra hombre</a></div> ''' ''' <div class="pic3"> <a href="/serie/monster-high/video/monster-high-superpillada" class="imagecache imagecache-130x73 imagecache-linked imagecache-130x73_linked"><img src="http://www.boing.es/sites/default/files/imagecache/130x73/pantallazo2mh.jpg" alt="" title="" class="imagecache imagecache-130x73" width="130" height="73" /></a> </div></div></div> <div class="stars"><form action="/videos/monster-high" accept-charset="UTF-8" method="post" id="fivestar-custom-widget" class="fivestar-widget"> <div><div class="fivestar-form-vote-24388 clear-block"><input type="hidden" name="content_type" id="edit-content-type" value="node" /> <input type="hidden" name="content_id" id="edit-content-id" value="24388" /> <div class="fivestar-form-item fivestar-average-stars"><div class="form-item" id="edit-vote-wrapper"> <span class='edit-vote-design'><span class='form-item-value-design1'><span class='form-item-value-design2'><span class='form-item-value-design3'> <input type="hidden" name="vote_count" id="edit-vote-count" value="0" /> <input type="hidden" name="vote_average" id="edit-vote-average" value="67.9646" /> <input type="hidden" name="auto_submit_path" id="edit-auto-submit-path" value="/fivestar/vote/node/24388/vote" class="fivestar-path" /> <select name="vote" class="form-select" id="edit-vote-1" ><option value="-">Select rating</option><option value="20">Give it 1/5</option><option value="40">Give it 2/5</option><option value="60">Give it 3/5</option><option value="80" selected="selected">Give it 4/5</option><option value="100">Give it 5/5</option></select><input type="hidden" name="auto_submit_token" id="edit-auto-submit-token" value="219ac03ae7ca6956d5484acb00454195" class="fivestar-token" /> </span></span></span></span></div> </div><input type="hidden" name="destination" id="edit-destination" value="videos/monster-high" /> <input type="submit" name="op" id="edit-fivestar-submit" value="Rate" class="form-submit fivestar-submit" /> <input type="hidden" name="form_build_id" id="form-9e308b4823178e9cbca63316130d805e" value="form-9e308b4823178e9cbca63316130d805e" /> <input type="hidden" name="form_id" id="edit-fivestar-custom-widget" value="fivestar_custom_widget" /> </div> </div></form></div> <div class="series"><a href="/serie/monster-high">Monster High</a></div> <div class="title"><a href="/serie/monster-high/video/monster-high-superpillada">Monster High: Superpillada</a></div> ''' patron = '<div class="pic3"[^<]+' patron += '<a href="([^"]+)"[^<]+<img style="[^"]+" height="\d+" width="\d+" src="([^"]+)".*?' patron += '<div class="title"><a[^>]+>([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(bloque) scrapertools.printMatches(matches) #if DEBUG: scrapertools.printMatches(matches) if len(matches)==0: patron = '<div class="pic3"[^<]+' patron += '<a href="([^"]+)"[^<]+<img src="([^"]+)".*?' patron += '<div class="title"><a[^>]+>([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(bloque) scrapertools.printMatches(matches) #if DEBUG: scrapertools.printMatches(matches) itemlist = [] for scrapedurl,scrapedthumbnail,scrapedtitle in matches: if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") url = urlparse.urljoin(item.url,scrapedurl) itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play", server="boing" , url=url, thumbnail=scrapedthumbnail, page=url, show = item.show, folder=False) ) return itemlist
def findvideos(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"<!--.*?-->", "", data) data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) bloque_tab = scrapertools.find_single_match( data, '<div id="verpelicula">(.*?)<div class="tab_container">') patron = '<li><a href="#([^<]+)"><span class="re">\d<\/span><span class="([^<]+)"><\/span><span class=.*?>([^<]+)<\/span>' check = re.compile(patron, re.DOTALL).findall(bloque_tab) servers_data_list = [] patron = '<div id="(tab\d+)" class="tab_content"><script type="text/rocketscript">(\w+)\("([^"]+)"\)</script></div>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) == 0: patron = '<div id="(tab\d+)" class="tab_content"><script>(\w+)\("([^"]+)"\)</script></div>' matches = re.compile(patron, re.DOTALL).findall(data) for check_tab, server, id in matches: if check_tab in str(check): idioma, calidad = scrapertools.find_single_match( str(check), "" + check_tab + "', '(.*?)', '(.*?)'") servers_data_list.append([server, id, idioma, calidad]) url = host + "/Js/videod.js" data = httptools.downloadpage(url).data data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) data = data.replace( '<iframe width="100%" height="400" scrolling="no" frameborder="0"', '') patron = 'function (\w+)\(id\).*?' patron += 'data-src="([^"]+)"' matches = scrapertools.find_multiple_matches(data, patron) for server, url in matches: for enlace, id, idioma, calidad in servers_data_list: if server == enlace: video_url = re.sub(r"embed\-|\-.*?x.*?\.html|u\'|\'\(", "", str(url)) video_url = re.sub(r"'\+codigo\+'", "", video_url) video_url = video_url.replace('embed//', 'embed/') video_url = video_url + id if "goo.gl" in video_url: try: from unshortenit import unshorten url = unshorten(video_url) video_url = scrapertools.get_match( str(url), "u'([^']+)'") except: continue title = "Ver en: %s [" + idioma + "][" + calidad + "]" itemlist.append( item.clone(title=title, url=video_url, action="play", thumbnail=item.category, language=idioma, quality=calidad)) tmdb.set_infoLabels(itemlist) itemlist = servertools.get_servers_itemlist( itemlist, lambda i: i.title % i.server.capitalize()) if __comprueba_enlaces__: itemlist = servertools.check_list_links(itemlist, __comprueba_enlaces_num__) if item.library and config.get_videolibrary_support( ) and len(itemlist) > 0: infoLabels = { 'tmdb_id': item.infoLabels['tmdb_id'], 'title': item.fulltitle } itemlist.append( Item(channel=item.channel, title="Añadir esta película a la videoteca", action="add_pelicula_to_library", url=item.url, infoLabels=infoLabels, text_color="0xFFff6666", thumbnail='http://imgur.com/0gyYvuC.png')) return itemlist
def episodios(item): logger.info("[itafilmtv.py] episodios") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url, headers=headers) patron = '<div class="main-news-text main-news-text2">(.*?)</div>' plot = scrapertools.find_single_match(data, patron) plot = scrapertools.htmlclean(plot).strip() # Extrae las datos - Episodios patron = '<br />(\d+x\d+).*?href="//ads.ad-center.com/[^<]+</a>(.*?)<a href="//ads.ad-center.com/[^<]+</a>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) == 0: patron = ' />(\d+x\d+)(.*?)<br' matches = re.compile(patron, re.DOTALL).findall(data) # Extrae las datos - sub ITA/ITA patron = '<b>.*?STAGIONE.*?(sub|ITA).*?</b>' lang = re.compile(patron, re.IGNORECASE).findall(data) lang_index = 0 for scrapedepisode, scrapedurls in matches: if int(scrapertools.get_match(scrapedepisode, '\d+x(\d+)')) == 1: lang_title = lang[lang_index] if lang_title.lower() == "sub": lang_title += " ITA" lang_index += 1 title = scrapedepisode + " (" + lang_title + ")" scrapedurls = scrapedurls.replace("playreplay", "moevideo") matches_urls = re.compile('href="([^"]+)"', re.DOTALL).findall(scrapedurls) urls = "" for url in matches_urls: urls += url + "|" if urls != "": itemlist.append( Item(channel=__channel__, action="findvideos", title=title, url=urls[:-1], thumbnail=item.thumbnail, plot=plot, fulltitle=item.fulltitle, show=item.show)) if config.get_library_support() and len(itemlist) != 0: itemlist.append( Item(channel=__channel__, title=item.show, url=item.url, action="add_serie_to_library", extra="episodios", show=item.show)) itemlist.append( Item(channel=item.channel, title="Scarica tutti gli episodi della serie", url=item.url, action="download_all_episodes", extra="episodios", show=item.show)) return itemlist
def episodios(item): def load_episodios(html, item, itemlist, lang_title): patron = '((?:.*?<a[^h]+href="[^"]+"[^>]+>[^<][^<]+<(?:b|\/)[^>]+>)+)' matches = re.compile(patron).findall(html) for data in matches: # Estrazione scrapedtitle = data.split('<a ')[0] scrapedtitle = re.sub(r'<[^>]*>', '', scrapedtitle).strip() if scrapedtitle != 'Categorie': scrapedtitle = scrapedtitle.replace('×', 'x') scrapedtitle = scrapedtitle.replace('×', 'x') itemlist.append( Item(channel=__channel__, action="findvideos", title="[COLOR azure]%s[/COLOR]" % (scrapedtitle + " (" + lang_title + ")"), url=data, thumbnail=item.thumbnail, extra=item.extra, fulltitle=scrapedtitle + " (" + lang_title + ")" + ' - ' + item.show, show=item.show)) logger.info("[italiaserie.py] episodios") itemlist = [] # Download pagina data = scrapertools.cache_page(item.url) data = scrapertools.decodeHtmlentities(data) #data = scrapertools.get_match(data, '<span class="rating">(.*?)<div class="clear">') data = scrapertools.get_match( data, '<div class="su-spoiler-title">(.*?)<span style="color: #e0e0e0;">') lang_titles = [] starts = [] patron = r"Stagione.*?ITA" matches = re.compile(patron, re.IGNORECASE).finditer(data) for match in matches: season_title = match.group() if season_title != '': lang_titles.append('SUB ITA' if 'SUB' in season_title.upper() else 'ITA') starts.append(match.end()) i = 1 len_lang_titles = len(lang_titles) while i <= len_lang_titles: inizio = starts[i - 1] fine = starts[i] if i < len_lang_titles else -1 html = data[inizio:fine] lang_title = lang_titles[i - 1] load_episodios(html, item, itemlist, lang_title) i += 1 if config.get_library_support() and len(itemlist) != 0: itemlist.append( Item(channel=__channel__, title="Aggiungi alla libreria " + item.title, url=item.url, action="add_serie_to_library", extra="episodios" + "###" + item.extra, show=item.show)) itemlist.append( Item(channel=__channel__, title="Scarica tutti gli episodi della serie", url=item.url, action="download_all_episodes", extra="episodios" + "###" + item.extra, show=item.show)) return itemlist
def peliculas(item): logger.info("[streamondemand-pureita cineblog01] peliculas") itemlist = [] if item.url == "": item.url = sito # Descarga la página data = httptools.downloadpage(item.url, headers=headers).data # Extrae las entradas (carpetas) patronvideos = '<div class="span4".*?<a.*?<p><img src="([^"]+)".*?' patronvideos += '<div class="span8">.*?<a href="([^"]+)"> <h1>([^"]+)</h1></a>.*?' patronvideos += '<strong>([^<]*)</strong>.*?<br />([^<+]+)' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(3)) scrapedurl = urlparse.urljoin(item.url, match.group(2)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(1)) scrapedthumbnail = scrapedthumbnail.replace(" ", "%20") scrapedplot = scrapertools.unescape("[COLOR orange]" + match.group(4) + "[/COLOR]\n" + match.group(5).strip()) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() scrapedtitle = scrapedtitle.replace("–", "-").replace( "×", "x").replace("[Sub-ITA]", "(Sub Ita)") scrapedtitle = scrapedtitle.replace("/", " - ").replace( "’", "'").replace("…", "...").replace("ò", "o") itemlist.append( Item(channel=__channel__, action="findvid_film", contentType="movie", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, extra=item.extra, viewmode="movie_with_plot")) # Next page mark try: bloque = scrapertools.get_match( data, "<div id='wp_page_numbers'>(.*?)</div>") patronvideos = '<a href="([^"]+)">></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "[COLOR orange]Successivi >>[/COLOR]" scrapedurl = matches[0] scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item( channel=__channel__, action="peliculas", title=scrapedtitle, url=scrapedurl, thumbnail= "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png", extra=item.extra, plot=scrapedplot)) except: pass return itemlist
def play(item): logger.info("[streamondemand-pureita cineblog01] play") itemlist = [] ### Handling new cb01 wrapper if host[9:] + "/film/" in item.url: iurl = httptools.downloadpage(item.url, only_headers=True, follow_redirects=False).headers.get( "location", "") logger.info("/film/ wrapper: %s" % iurl) if iurl: item.url = iurl if '/goto/' in item.url: item.url = item.url.split('/goto/')[-1].decode('base64') item.url = item.url.replace('http://cineblog01.uno', 'http://k4pp4.pw') logger.debug( "##############################################################") if "go.php" in item.url: data = httptools.downloadpage(item.url, headers=headers).data try: data = scrapertools.get_match(data, 'window.location.href = "([^"]+)";') except IndexError: try: # data = scrapertools.get_match(data, r'<a href="([^"]+)">clicca qui</a>') # In alternativa, dato che a volte compare "Clicca qui per proseguire": data = scrapertools.get_match( data, r'<a href="([^"]+)".*?class="btn-wrapper">.*?licca.*?</a>') except IndexError: data = httptools.downloadpage( item.url, only_headers=True, follow_redirects=False).headers.get("location", "") while 'vcrypt' in data: data = httptools.downloadpage(data, only_headers=True, follow_redirects=False).headers.get( "location", "") logger.debug("##### play go.php data ##\n%s\n##" % data) elif "/link/" in item.url: data = httptools.downloadpage(item.url, headers=headers).data from lib import jsunpack try: data = scrapertools.get_match( data, "(eval\(function\(p,a,c,k,e,d.*?)</script>") data = jsunpack.unpack(data) logger.debug("##### play /link/ unpack ##\n%s\n##" % data) except IndexError: logger.debug("##### The content is yet unpacked ##\n%s\n##" % data) data = scrapertools.find_single_match( data, 'var link(?:\s)?=(?:\s)?"([^"]+)";') while 'vcrypt' in data: data = httptools.downloadpage(data, only_headers=True, follow_redirects=False).headers.get( "location", "") if not "http" in data: data = urlparse.urljoin("http://swzz.xyz", data) data = httptools.downloadpage(data, headers=headers).data logger.debug("##### play /link/ data ##\n%s\n##" % data) else: data = item.url logger.debug("##### play else data ##\n%s\n##" % data) logger.debug( "##############################################################") try: itemlist = servertools.find_video_items(data=data) for videoitem in itemlist: videoitem.title = item.show videoitem.fulltitle = item.fulltitle videoitem.show = item.show videoitem.thumbnail = item.thumbnail videoitem.channel = __channel__ except AttributeError: logger.error("vcrypt data doesn't contain expected URL") return itemlist
def get_b64_data(new_id_video, headers): page_url_hqq = "http://hqq.tv/player/embed_player.php?vid=" + new_id_video + "&autoplay=no" data_page_url_hqq = scrapertools.cache_page(page_url_hqq, headers=headers) b64_data = scrapertools.get_match(data_page_url_hqq, 'base64,([^"]+)"') return b64_data
def agregadas(item): logger.info() itemlist = [] ''' # Descarga la pagina if "?search=" in item.url: url_search = item.url.split("?search=") data = scrapertools.cache_page(url_search[0], url_search[1]) else: data = scrapertools.cache_page(item.url) logger.info("data="+data) ''' data = scrapertools.cache_page(item.url) logger.info("data=" + data) # Extrae las entradas fichas = re.sub( r"\n|\s{2}", "", scrapertools.get_match( data, '<div class="review-box-container">(.*?)wp-pagenavi')) #<a href="http://www.descargacineclasico.net/ciencia-ficcion/quatermass-2-1957/" #title="Quatermass II (Quatermass 2) (1957) Descargar y ver Online"> #<img style="border-radius:6px;" #src="//www.descargacineclasico.net/wp-content/uploads/2015/12/Quatermass-II-2-1957.jpg" #alt="Quatermass II (Quatermass 2) (1957) Descargar y ver Online Gratis" height="240" width="160"> patron = '<div class="post-thumbnail"><a href="([^"]+)".*?' # url patron += 'title="([^"]+)".*?' # title patron += 'src="([^"]+).*?' # thumbnail patron += '<p>([^<]+)' # plot matches = re.compile(patron, re.DOTALL).findall(fichas) for url, title, thumbnail, plot in matches: title = title[0:title.find("Descargar y ver Online")] url = urlparse.urljoin(item.url, url) thumbnail = urlparse.urljoin(url, thumbnail) itemlist.append( Item(channel=item.channel, action="findvideos", title=title + " ", fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, show=title)) # Paginación try: #<ul class="pagination"><li class="active"><span>1</span></li><li><span><a href="2">2</a></span></li><li><span><a href="3">3</a></span></li><li><span><a href="4">4</a></span></li><li><span><a href="5">5</a></span></li><li><span><a href="6">6</a></span></li></ul> patron_nextpage = r'<a class="nextpostslink" rel="next" href="([^"]+)' next_page = re.compile(patron_nextpage, re.DOTALL).findall(data) itemlist.append( Item(channel=item.channel, action="agregadas", title="Página siguiente >>", url=next_page[0], viewmode="movie_with_plot")) except: pass return itemlist
def findvideos(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data if item.extra != "dd" and item.extra != "descarga": if item.contentType != "movie": bloque_links = scrapertools.find_single_match( data, '<div class="links">(.*?)<\/i>Selecciona un') if bloque_links == "": bloque_links = scrapertools.find_single_match( data, '<div class="links">(.*?)<div class="enlaces">') else: bloque_links = scrapertools.find_single_match( data, '<div class="links">(.*?)<\/i>Descargar') if bloque_links == "": bloque_links = scrapertools.find_single_match( data, '<div class="links">(.*?)<div class="enlaces">') patron = '<a class="goto" rel="nofollow".*?data-id="([^<]+)".*?' patron += 'src="([^"]+)">' patron += '([^<]+)<.*?' patron += 'src="([^"]+)' patron += '">([^<]+).*?' patron += '<span>([^<]+)' links = scrapertools.find_multiple_matches(bloque_links, patron) for id, thumb, server, idiomapng, idioma, calidad in links: idioma = idioma.strip() calidad = calidad.lower() calidad = re.sub(r' ', '-', calidad) if calidad == "ts": calidad = re.sub(r'ts', 'ts-hq', calidad) url = host + "/goto/" url_post = urllib.urlencode({'id': id}) server_name = scrapertools.get_match(server, '(\w+)\.').replace( "waaw", "netutv") server_parameters = servertools.get_server_parameters(server_name) icon_server = server_parameters.get("thumbnail", "") extra = "online" title = server_name + " (" + calidad + ") (" + idioma + ")" itemlist.append( item.clone(title=title, url=url, action="play", thumbnail=icon_server, folder=True, id=url_post, language=idioma, quality=calidad, server=server_name)) else: bloque_dd = scrapertools.find_single_match( data, '<\/i>Descargar(.*?)<div class="enlaces">') links_dd = scrapertools.find_multiple_matches( bloque_dd, '<a class="low".*?data-id="(.*?)".*?src="([^"]+)">([^<]+)<.*?src[^<]+>([^<]+).*?<span>([^<]+)' ) for id, thumb, server, idioma, calidad in links_dd: idioma = idioma.strip() calidad = calidad.lower() calidad = re.sub(r' ', '-', calidad) if calidad == "ts": calidad = re.sub(r'ts', 'ts-hq', calidad) if CALIDADES.get(calidad): calidad = CALIDADES.get(calidad) else: calidad = "[COLOR brown]" + calidad + "[/COLOR]" if IDIOMAS.get(idioma): idioma = IDIOMAS.get(idioma) else: idioma = "[COLOR brown]" + idioma + "[/COLOR]" url = host + "/goto/" data_post = urllib.urlencode({'id': id}) server_name = scrapertools.get_match(server, '(.*?)\.').strip() icon_server = os.path.join(config.get_runtime_path(), "resources", "images", "servers", "server_" + server_name + ".png") icon_server = icon_server.replace('streamin', 'streaminto') icon_server = icon_server.replace('ul', 'uploadedto') if not os.path.exists(icon_server): icon_server = thumb extra = "descarga" itemlist.append( item.clone(title="[COLOR floralwhite][B]" + server + "[/B][/COLOR] " + calidad + " " + idioma, url=url, action="play", thumbnail=icon_server, id=data_post)) if item.infoLabels["year"]: tmdb.set_infoLabels(itemlist) if item.contentType == "movie" and item.extra != "descarga" and item.extra != "online": if config.get_videolibrary_support() and len(itemlist) > 0: itemlist.append( Item(channel=item.channel, title="Añadir película a la videoteca", action="add_pelicula_to_library", url=item.url, text_color="green", infoLabels={'title': item.fulltitle}, thumbnail="http://imgur.com/xjrGmVM.png", fulltitle=item.fulltitle, extra=extra)) if item.extra != "dd" and item.extra != "descarga" and item.extra != "online": bloque_dd = scrapertools.find_single_match( data, '<\/i>Descargar(.*?)<div class="enlaces">') if bloque_dd: itemlist.append( item.clone( title="[COLOR aqua][B]Ver enlaces Descarga[/B][/COLOR] ", action="findvideos", thumbnail=thumb, fanart="", contentType=item.contentType, bloque_dd=bloque_dd, extra="dd")) return itemlist
def novedades_episodios(item): logger.info("pelisalacarta.channels.hdfull novedades_episodios") itemlist = [] ## Carga estados status = jsontools.load_json( scrapertools.cache_page(host + '/a/status/all')) ## Episodios url = item.url.split("?")[0] post = item.url.split("?")[1] old_start = scrapertools.get_match(post, 'start=([^&]+)&') start = "%s" % (int(old_start) + 24) post = post.replace("start=" + old_start, "start=" + start) next_page = url + "?" + post data = scrapertools.cache_page(url, post=post) episodes = jsontools.load_json(data) for episode in episodes: thumbnail = host + "/thumbs/" + episode['thumbnail'] temporada = episode['season'] episodio = episode['episode'] if len(episodio) == 1: episodio = '0' + episodio if episode['languages'] != "[]": idiomas = "( [COLOR teal][B]" for idioma in episode['languages']: idiomas += idioma + " " idiomas += "[/B][/COLOR])" idiomas = bbcode_kodi2html(idiomas) else: idiomas = "" try: show = episode['show']['title']['es'].strip() except: show = episode['show']['title']['en'].strip() show = bbcode_kodi2html("[COLOR whitesmoke][B]" + show + "[/B][/COLOR]") if episode['title']: try: title = episode['title']['es'].strip() except: title = episode['title']['en'].strip() if len(title) == 0: title = "Temporada " + temporada + " Episodio " + episodio try: title = temporada + "x" + episodio + " - " + show.decode( 'utf-8') + ": " + title.decode('utf-8') + ' ' + idiomas except: title = temporada + "x" + episodio + " - " + show.decode( 'iso-8859-1') + ": " + title.decode( 'iso-8859-1') + ' ' + idiomas str = get_status(status, 'episodes', episode['id']) if str != "": title += str try: title = title.encode('utf-8') except: title = title.encode('iso-8859-1') #try: show = show.encode('utf-8') #except: show = show.encode('iso-8859-1') url = urlparse.urljoin( host, '/serie/' + episode['permalink'] + '/temporada-' + temporada + '/episodio-' + episodio) + "###" + episode['id'] + ";3" itemlist.append( Item(channel=item.channel, action="findvideos", title=title, fulltitle=title, url=url, thumbnail=thumbnail, folder=True)) if len(itemlist) == 24: itemlist.append( Item(channel=item.channel, action="novedades_episodios", title=">> Página siguiente", url=next_page, folder=True)) return itemlist
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("[netutv.py] url=" + page_url) headers = [[ 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0' ]] ## "/netu/tv/" if "www.yaske.net" in page_url: ## Encode a la url para pasarla como valor de parámetro urlEncode = urllib.quote_plus(page_url) ## Carga los datos id_video = scrapertools.get_match(page_url, "embed_([A-Za-z0-9]+)") data = scrapertools.cache_page(page_url, headers=headers) headers.append(['Referer', page_url]) try: ## Nueva id del video page_url_the_new_video_id = scrapertools.get_match( data, 'script src="([^"]+)"></script>') data_with_new_video_id = scrapertools.cache_page( page_url_the_new_video_id, headers=headers) ## Algunos enlaces necesitan el paso pervio de la siguiente línea para coseguir la id data_with_new_video_id = urllib.unquote(data_with_new_video_id) new_id_video = scrapertools.get_match(data_with_new_video_id, "var vid='([^']+)';") ## Petición a hqq.tv con la nueva id de vídeo b64_data = get_b64_data(new_id_video, headers) ## Doble decode y escape utf8 = double_b64(b64_data) except: ## Recoge los datos b64_data = scrapertools.get_match( data, '<script language="javascript" type="text/javascript" src="data:text/javascript;charset=utf-8;base64,([^"]+)"></script>' ) ## Doble decode y escape utf8 = double_b64(b64_data) ## Nueva id del video new_id_video = scrapertools.get_match(utf8, 'value="([^"]+)"') ## Petición a hqq.tv con la nueva id de vídeo y recoger los datos b64_data = get_b64_data(new_id_video, headers) ## Doble decode y escape utf8 = double_b64(b64_data) ### at ### match_at = '<input name="at" id="text" value="([^"]+)">' at = scrapertools.get_match(utf8, match_at) ### m3u8 ### ## Recoger los bytes ofuscados que contiene la url del m3u8 b_m3u8_2 = get_obfuscated(new_id_video, at, urlEncode, headers) ### tb_m3u8 ### ## Obtener la url del m3u8 url_m3u8 = tb(b_m3u8_2) else: ## Encode a la url para pasarla como valor de parámetro con hqq como host urlEncode = urllib.quote_plus(page_url.replace("netu", "hqq")) ### at ### id_video = page_url.split("=")[1] ## Petición a hqq.tv con la nueva id de vídeo y recoger los datos b64_data = get_b64_data(id_video, headers) ## Doble decode y escape utf8 = double_b64(b64_data) match_at = '<input name="at" type="text" value="([^"]+)">' at = scrapertools.get_match(utf8, match_at) ### b_m3u8 ### headers.append(['Referer', page_url]) ## Recoger los bytes ofuscados que contiene la url del m3u8 b_m3u8_2 = get_obfuscated(id_video, at, urlEncode, headers) ### tb ### ## Obtener la url del m3u8 url_m3u8 = tb(b_m3u8_2) ### m3u8 ### media_url = url_m3u8 video_urls = [] video_urls.append([ scrapertools.get_filename_from_url(media_url)[-4:] + " [netu.tv]", media_url ]) for video_url in video_urls: logger.info("[netutv.py] %s - %s" % (video_url[0], video_url[1])) return video_urls
def fichas(item): logger.info("pelisalacarta.channels.hdfull series") itemlist = [] ## Carga estados status = jsontools.load_json( scrapertools.cache_page(host + '/a/status/all')) if item.title == "Buscar...": data = agrupa_datos(scrapertools.cache_page(item.url, post=item.extra)) s_p = scrapertools.get_match( data, '<h3 class="section-title">(.*?)<div id="footer-wrapper">').split( '<h3 class="section-title">') if len(s_p) == 1: data = s_p[0] if 'Lo sentimos</h3>' in s_p[0]: return [ Item( channel=item.channel, title=bbcode_kodi2html( "[COLOR gold][B]HDFull:[/B][/COLOR] [COLOR blue]" + texto.replace('%20', ' ') + "[/COLOR] sin resultados")) ] else: data = s_p[0] + s_p[1] else: data = agrupa_datos(scrapertools.cache_page(item.url)) data = re.sub( r'<div class="span-6[^<]+<div class="item"[^<]+' + \ '<a href="([^"]+)"[^<]+' + \ '<img.*?src="([^"]+)".*?' + \ '<div class="left"(.*?)</div>' + \ '<div class="right"(.*?)</div>.*?' + \ 'title="([^"]+)".*?' + \ 'onclick="setFavorite.\d, (\d+),', r"'url':'\1';'image':'\2';'langs':'\3';'rating':'\4';'title':\5;'id':'\6';", data ) patron = "'url':'([^']+)';'image':'([^']+)';'langs':'([^']+)';'rating':'([^']+)';'title':([^;]+);'id':'([^']+)';" matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedthumbnail, scrapedlangs, scrapedrating, scrapedtitle, scrapedid in matches: thumbnail = scrapedthumbnail.replace("/tthumb/130x190/", "/thumbs/") title = scrapedtitle.strip() show = title if scrapedlangs != ">": textoidiomas = extrae_idiomas(scrapedlangs) title += bbcode_kodi2html(" ( [COLOR teal][B]" + textoidiomas + "[/B][/COLOR])") if scrapedrating != ">": valoracion = re.sub(r'><[^>]+>(\d+)<b class="dec">(\d+)</b>', r'\1,\2', scrapedrating) title += bbcode_kodi2html(" ([COLOR orange]" + valoracion + "[/COLOR])") url = urlparse.urljoin(item.url, scrapedurl) if "/serie" in url or "/tags-tv" in url: action = "episodios" url += "###" + scrapedid + ";1" type = "shows" else: action = "findvideos" url += "###" + scrapedid + ";2" type = "movies" str = get_status(status, type, scrapedid) if str != "": title += str if item.title == "Buscar...": tag_type = scrapertools.get_match(url, 'l.tv/([^/]+)/') title += bbcode_kodi2html(" - [COLOR blue]" + tag_type.capitalize() + "[/COLOR]") itemlist.append( Item(channel=item.channel, action=action, title=title, url=url, fulltitle=title, thumbnail=thumbnail, show=show, folder=True)) ## Paginación next_page_url = scrapertools.find_single_match( data, '<a href="([^"]+)">.raquo;</a>') if next_page_url != "": itemlist.append( Item(channel=item.channel, action="fichas", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page_url), folder=True)) return itemlist
def episodios(item): def load_episodios(html, item, itemlist, lang_title): patron = '.*?<a href="[^"]+"[^o]+ofollow[^>]+>[^<]+<\/a><(?:b|\/)[^>]+>' matches = re.compile(patron).findall(html) for data in matches: # Extrae las entradas scrapedtitle = data.split('<a ')[0] scrapedtitle = re.sub(r'<[^>]*>', '', scrapedtitle).strip() if scrapedtitle != 'Categorie': scrapedtitle = scrapedtitle.replace('×', 'x') itemlist.append( Item(channel=__channel__, action="findvideos_tv", contentType="episode", title="[COLOR azure]%s[/COLOR]" % (scrapedtitle + " (" + lang_title + ")"), url=data, thumbnail=item.thumbnail, extra=item.extra, fulltitle=scrapedtitle + " (" + lang_title + ")" + ' - ' + item.show, show=item.show)) logger.info("[misterstreaming.py] episodios") itemlist = [] # Descarga la página data = scrapertools.anti_cloudflare(item.url) data = scrapertools.decodeHtmlentities(data) data = scrapertools.get_match( data, '<div class="entry-content">(.*?)<p class="post-meta entry-meta">') lang_titles = [] starts = [] patron = r"Stagione.*?ITA" matches = re.compile(patron, re.IGNORECASE).finditer(data) for match in matches: season_title = match.group() if season_title != '': lang_titles.append('SUB ITA' if 'SUB' in season_title.upper() else 'ITA') starts.append(match.end()) i = 1 len_lang_titles = len(lang_titles) while i <= len_lang_titles: inizio = starts[i - 1] fine = starts[i] if i < len_lang_titles else -1 html = data[inizio:fine] lang_title = lang_titles[i - 1] load_episodios(html, item, itemlist, lang_title) i += 1 if config.get_library_support() and len(itemlist) != 0: itemlist.append( Item(channel=__channel__, title="Aggiungi alla libreria", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show)) itemlist.append( Item(channel=__channel__, title="Scarica tutti gli episodi della serie", url=item.url, action="download_all_episodes", extra="episodios", show=item.show)) return itemlist
def get_art(item): logger.info() id = item.infoLabels['tmdb_id'] check_fanart = item.infoLabels['fanart'] if item.contentType != "movie": tipo_ps = "tv" else: tipo_ps = "movie" if not id: year = item.extra otmdb = tmdb.Tmdb(texto_buscado=item.fulltitle, year=year, tipo=tipo_ps) id = otmdb.result.get("id") if id == None: otmdb = tmdb.Tmdb(texto_buscado=item.fulltitle, tipo=tipo_ps) id = otmdb.result.get("id") if id == None: if item.contentType == "movie": urlbing_imdb = "http://www.bing.com/search?q=%s+%s+tv+series+site:imdb.com" % ( item.fulltitle.replace(' ', '+'), year) data = browser(urlbing_imdb) data = re.sub( r"\n|\r|\t|\s{2}| |http://ssl-proxy.my-addr.org/myaddrproxy.php/", "", data) subdata_imdb = scrapertools.find_single_match( data, '<li class="b_algo">(.*?)h="ID.*?<strong>.*?TV Series') else: urlbing_imdb = "http://www.bing.com/search?q=%s+%s+site:imdb.com" % ( item.fulltitle.replace(' ', '+'), year) data = browser(urlbing_imdb) data = re.sub( r"\n|\r|\t|\s{2}| |http://ssl-proxy.my-addr.org/myaddrproxy.php/", "", data) subdata_imdb = scrapertools.find_single_match( data, '<li class="b_algo">(.*?)h="ID.*?<strong>') try: imdb_id = scrapertools.get_match( subdata_imdb, '<a href=.*?http.*?imdb.com/title/(.*?)/.*?"') except: try: imdb_id = scrapertools.get_match( subdata_imdb, '<a href=.*?http.*?imdb.com/.*?/title/(.*?)/.*?"') except: imdb_id = "" otmdb = tmdb.Tmdb(external_id=imdb_id, external_source="imdb_id", tipo=tipo_ps, idioma_busqueda="es") id = otmdb.result.get("id") if id == None: if "(" in item.fulltitle: title = scrapertools.find_single_match( item.fulltitle, '\(.*?\)') if item.contentType != "movie": urlbing_imdb = "http://www.bing.com/search?q=%s+%s+tv+series+site:imdb.com" % ( title.replace(' ', '+'), year) data = browser(urlbing_imdb) data = re.sub( r"\n|\r|\t|\s{2}| |http://ssl-proxy.my-addr.org/myaddrproxy.php/", "", data) subdata_imdb = scrapertools.find_single_match( data, '<li class="b_algo">(.*?)h="ID.*?<strong>.*?TV Series' ) else: urlbing_imdb = "http://www.bing.com/search?q=%s+%s+site:imdb.com" % ( title.replace(' ', '+'), year) data = browser(urlbing_imdb) data = re.sub( r"\n|\r|\t|\s{2}| |http://ssl-proxy.my-addr.org/myaddrproxy.php/", "", data) subdata_imdb = scrapertools.find_single_match( data, '<li class="b_algo">(.*?)h="ID.*?<strong>') try: imdb_id = scrapertools.get_match( subdata_imdb, '<a href=.*?http.*?imdb.com/title/(.*?)/.*?"') except: try: imdb_id = scrapertools.get_match( subdata_imdb, '<a href=.*?http.*?imdb.com/.*?/title/(.*?)/.*?"' ) except: imdb_id = "" otmdb = tmdb.Tmdb(external_id=imdb_id, external_source="imdb_id", tipo=tipo_ps, idioma_busqueda="es") id = otmdb.result.get("id") if not id: fanart = item.fanart id_tvdb = "" imagenes = [] itmdb = tmdb.Tmdb(id_Tmdb=id, tipo=tipo_ps) images = itmdb.result.get("images") if images: for key, value in images.iteritems(): for detail in value: imagenes.append('http://image.tmdb.org/t/p/original' + detail["file_path"]) if len(imagenes) >= 4: if imagenes[0] != check_fanart: item.fanart = imagenes[0] else: item.fanart = imagenes[1] if imagenes[1] != check_fanart and imagenes[ 1] != item.fanart and imagenes[2] != check_fanart: item.extra = imagenes[1] + "|" + imagenes[2] else: if imagenes[1] != check_fanart and imagenes[1] != item.fanart: item.extra = imagenes[1] + "|" + imagenes[3] elif imagenes[2] != check_fanart: item.extra = imagenes[2] + "|" + imagenes[3] else: item.extra = imagenes[3] + "|" + imagenes[3] elif len(imagenes) == 3: if imagenes[0] != check_fanart: item.fanart = imagenes[0] else: item.fanart = imagenes[1] if imagenes[1] != check_fanart and imagenes[ 1] != item.fanart and imagenes[2] != check_fanart: item.extra = imagenes[1] + "|" + imagenes[2] else: if imagenes[1] != check_fanart and imagenes[1] != item.fanart: item.extra = imagenes[0] + "|" + imagenes[1] elif imagenes[2] != check_fanart: item.extra = imagenes[1] + "|" + imagenes[2] else: item.extra = imagenes[1] + "|" + imagenes[1] elif len(imagenes) == 2: if imagenes[0] != check_fanart: item.fanart = imagenes[0] else: item.fanart = imagenes[1] if imagenes[1] != check_fanart and imagenes[1] != item.fanart: item.extra = imagenes[0] + "|" + imagenes[1] else: item.extra = imagenes[1] + "|" + imagenes[0] elif len(imagenes) == 1: item.extra = imagenes + "|" + imagenes else: item.extra = item.fanart + "|" + item.fanart images_fanarttv = fanartv(item, id_tvdb, id) if images_fanarttv: if item.contentType == "movie": if images_fanarttv.get("moviedisc"): item.thumbnail = images_fanarttv.get("moviedisc")[0].get("url") elif images_fanarttv.get("hdmovielogo"): item.thumbnail = images_fanarttv.get("hdmovielogo")[0].get( "url") elif images_fanarttv.get("moviethumb"): item.thumbnail = images_fanarttv.get("moviethumb")[0].get( "url") elif images_fanarttv.get("moviebanner"): item.thumbnail_ = images_fanarttv.get("moviebanner")[0].get( "url") else: item.thumbnail = item.thumbnail else: if images_fanarttv.get("hdtvlogo"): item.thumbnail = images_fanarttv.get("hdtvlogo")[0].get("url") elif images_fanarttv.get("clearlogo"): item.thumbnail = images_fanarttv.get("hdmovielogo")[0].get( "url") if images_fanarttv.get("tvbanner"): item.extra = item.extra + "|" + images_fanarttv.get( "tvbanner")[0].get("url") elif images_fanarttv.get("tvthumb"): item.extra = item.extra + "|" + images_fanarttv.get( "tvthumb")[0].get("url") else: item.extra = item.extra + "|" + item.thumbnail else: item.extra = item.extra + "|" + item.thumbnail
def episodios(item): logger.info("pelisalacarta.channels.hdfull episodios") itemlist = [] ## Carga estados status = jsontools.load_json( scrapertools.cache_page(host + '/a/status/all')) url_targets = item.url if "###" in item.url: id = item.url.split("###")[1].split(";")[0] type = item.url.split("###")[1].split(";")[1] item.url = item.url.split("###")[0] ## Temporadas data = agrupa_datos(scrapertools.cache_page(item.url)) if id == "0": ## Se saca el id de la serie de la página cuando viene de listado_series id = scrapertools.get_match(data, "<script>var sid = '([^']+)';</script>") url_targets = url_targets.replace('###0', '###' + id) str = get_status(status, "shows", id) if str != "" and account and item.category != "Series" and "XBMC" not in item.title: if config.get_library_support(): title = bbcode_kodi2html(" ( [COLOR gray][B]" + item.show + "[/B][/COLOR] )") itemlist.append( Item(channel=item.channel, action="episodios", title=title, fulltitle=title, url=url_targets, thumbnail=item.thumbnail, show=item.show, folder=False)) title = str.replace('green', 'red').replace('Siguiendo', 'Abandonar') itemlist.append( Item(channel=item.channel, action="set_status", title=title, fulltitle=title, url=url_targets, thumbnail=item.thumbnail, show=item.show, folder=True)) elif account and item.category != "Series" and "XBMC" not in item.title: if config.get_library_support(): title = bbcode_kodi2html(" ( [COLOR gray][B]" + item.show + "[/B][/COLOR] )") itemlist.append( Item(channel=item.channel, action="episodios", title=title, fulltitle=title, url=url_targets, thumbnail=item.thumbnail, show=item.show, folder=False)) title = bbcode_kodi2html(" ( [COLOR orange][B]Seguir[/B][/COLOR] )") itemlist.append( Item(channel=item.channel, action="set_status", title=title, fulltitle=title, url=url_targets, thumbnail=item.thumbnail, show=item.show, folder=True)) patron = "<li><a href='([^']+)'>[^<]+</a></li>" matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl in matches: ## Episodios data = agrupa_datos(scrapertools.cache_page(scrapedurl)) sid = scrapertools.get_match(data, "<script>var sid = '(\d+)'") ssid = scrapertools.get_match(scrapedurl, "temporada-(\d+)") post = "action=season&start=0&limit=0&show=%s&season=%s" % (sid, ssid) url = host + "/a/episodes" data = scrapertools.cache_page(url, post=post) episodes = jsontools.load_json(data) for episode in episodes: thumbnail = host + "/thumbs/" + episode['thumbnail'] temporada = episode['season'] episodio = episode['episode'] if len(episodio) == 1: episodio = '0' + episodio if episode['languages'] != "[]": idiomas = "( [COLOR teal][B]" for idioma in episode['languages']: idiomas += idioma + " " idiomas += "[/B][/COLOR])" idiomas = bbcode_kodi2html(idiomas) else: idiomas = "" if episode['title']: try: title = episode['title']['es'].strip() except: title = episode['title']['en'].strip() if len(title) == 0: title = "Temporada " + temporada + " Episodio " + episodio try: title = temporada + "x" + episodio + " - " + title.decode( 'utf-8') + ' ' + idiomas except: title = temporada + "x" + episodio + " - " + title.decode( 'iso-8859-1') + ' ' + idiomas #try: title = temporada + "x" + episodio + " - " + title + ' ' + idiomas #except: pass #except: title = temporada + "x" + episodio + " - " + title.decode('iso-8859-1') + ' ' + idiomas str = get_status(status, 'episodes', episode['id']) if str != "": title += str try: title = title.encode('utf-8') except: title = title.encode('iso-8859-1') url = urlparse.urljoin( scrapedurl, 'temporada-' + temporada + '/episodio-' + episodio) + "###" + episode['id'] + ";3" itemlist.append( Item(channel=item.channel, action="findvideos", title=title, fulltitle=title, url=url, thumbnail=thumbnail, show=item.show, folder=True)) if config.get_library_support() and len(itemlist) > 0: itemlist.append( Item(channel=item.channel, title="Añadir esta serie a la biblioteca de XBMC", url=url_targets, action="add_serie_to_library", extra="episodios", show=item.show)) itemlist.append( Item(channel=item.channel, title="Descargar todos los episodios de la serie", url=url_targets, action="download_all_episodes", extra="episodios", show=item.show)) return itemlist
def episodios(item): logger.info("pelisalacarta.channels.animeflv episodios") itemlist = [] status_code, data = get_page(item.url) logger.info("pelisalacarta.channels.animeflv **** {0}".format(item.url)) logger.info("pelisalacarta.channels.animeflv **** {0}".format(status_code)) if status_code == requests.codes.ok: ''' <div class="tit">Listado de episodios <span class="fecha_pr">Fecha Próximo: 2013-06-11</span></div> <ul class="anime_episodios" id="listado_epis"> <li><a href="/ver/aiura-9.html">Aiura 9</a></li> <li><a href="/ver/aiura-8.html">Aiura 8</a></li> <li><a href="/ver/aiura-7.html">Aiura 7</a></li> <li><a href="/ver/aiura-6.html">Aiura 6</a></li> <li><a href="/ver/aiura-5.html">Aiura 5</a></li> <li><a href="/ver/aiura-4.html">Aiura 4</a></li> <li><a href="/ver/aiura-3.html">Aiura 3</a></li> <li><a href="/ver/aiura-2.html">Aiura 2</a></li> <li><a href="/ver/aiura-1.html">Aiura 1</a></li> </ul> ''' data = scrapertools.get_match(data, '<div class="tit">Listado de episodios.*?</div>(.*?)</ul>') patron = '<li><a href="([^"]+)">([^<]+)</a></li>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.unescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = item.thumbnail plot = item.plot # TODO crear funcion que pasandole el titulo y buscando en un array de series establezca el valor el nombre # y temporada / capitulo para que funcione con trak.tv season = 1 episode = 1 patron = "{0}{1}".format(re.escape(item.show), "\s+(\d+)") # logger.info("title {0}".format(title)) # logger.info("patron {0}".format(patron)) try: episode = scrapertools.get_match(title, patron) episode = int(episode) # logger.info("episode {0}".format(episode)) except IndexError: pass except ValueError: pass season, episode = numbered_for_tratk(item.show, season, episode) if len(str(episode)) == 1: title = "{0}x0{1}".format(season, episode) else: title = "{0}x{1}".format(season, episode) title = "{0} {1}".format(item.show, title) if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail)) itemlist.append(Item(channel=__channel__, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, show=item.show, fulltitle="{0} {1}" .format(item.show, title), fanart=thumbnail, viewmode="movies_with_plot", folder=True)) else: itemlist.append(Item(channel=__channel__, action="mainlist", title="No se ha podido cargar la pagina ERROR:{0}".format(status_code), url="", thumbnail="", plot="")) if config.get_library_support() and len(itemlist) > 0: itemlist.append(Item(channel=__channel__, title="Añadir esta serie a la biblioteca de XBMC", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show)) itemlist.append(Item(channel=item.channel, title="Descargar todos los episodios de la serie", url=item.url, action="download_all_episodes", extra="episodios", show=item.show)) return itemlist
def items_usuario(item): logger.info("pelisalacarta.channels.hdfull menupeliculas") itemlist = [] ## Carga estados status = jsontools.load_json( scrapertools.cache_page(host + '/a/status/all')) ## Fichas usuario url = item.url.split("?")[0] post = item.url.split("?")[1] old_start = scrapertools.get_match(post, 'start=([^&]+)&') limit = scrapertools.get_match(post, 'limit=(\d+)') start = "%s" % (int(old_start) + int(limit)) post = post.replace("start=" + old_start, "start=" + start) next_page = url + "?" + post ## Carga las fichas de usuario data = scrapertools.cache_page(url, post=post) fichas_usuario = jsontools.load_json(data) for ficha in fichas_usuario: try: title = ficha['title']['es'].strip() except: title = ficha['title']['en'].strip() try: title = title.encode('utf-8') except: pass show = title try: thumbnail = host + "/thumbs/" + ficha['thumbnail'] except: thumbnail = host + "/thumbs/" + ficha['thumb'] try: url = urlparse.urljoin( host, '/serie/' + ficha['permalink']) + "###" + ficha['id'] + ";1" action = "episodios" str = get_status(status, 'shows', ficha['id']) if "show_title" in ficha: action = "findvideos" try: serie = ficha['show_title']['es'].strip() except: serie = ficha['show_title']['en'].strip() temporada = ficha['season'] episodio = ficha['episode'] serie = bbcode_kodi2html("[COLOR whitesmoke][B]" + serie + "[/B][/COLOR]") if len(episodio) == 1: episodio = '0' + episodio try: title = temporada + "x" + episodio + " - " + serie + ": " + title except: title = temporada + "x" + episodio + " - " + serie.decode( 'iso-8859-1') + ": " + title.decode('iso-8859-1') url = urlparse.urljoin( host, '/serie/' + ficha['permalink'] + '/temporada-' + temporada + '/episodio-' + episodio) + "###" + ficha['id'] + ";3" except: url = urlparse.urljoin(host, '/pelicula/' + ficha['perma']) + "###" + ficha['id'] + ";2" action = "findvideos" str = get_status(status, 'movies', ficha['id']) if str != "": title += str #try: title = title.encode('utf-8') #except: pass itemlist.append( Item(channel=item.channel, action=action, title=title, fulltitle=title, url=url, thumbnail=thumbnail, show=show, folder=True)) if len(itemlist) == int(limit): itemlist.append( Item(channel=item.channel, action="items_usuario", title=">> Página siguiente", url=next_page, folder=True)) return itemlist
def find_videos(text): encontrados = { 'https://vcrypt.net/images/logo', 'https://vcrypt.net/css/out', 'https://vcrypt.net/images/favicon', 'https://vcrypt.net/css/open', 'http://linkup.pro/js/jquery', 'https://linkup.pro/js/jquery', 'http://www.rapidcrypt.net/open' } devuelve = [] patronvideos = [ r'(https?://(gestyy|rapidteria|sprysphere)\.com/[a-zA-Z0-9]+)', r'(https?://(?:www\.)?(vcrypt|linkup)\.[^/]+/[^/]+/[a-zA-Z0-9_]+)' ] for patron in patronvideos: logger.info(" find_videos #" + patron + "#") matches = re.compile(patron).findall(text) for url, host in matches: if url not in encontrados: logger.info(" url=" + url) encontrados.add(url) import requests headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:59.0) Gecko/20100101 Firefox/59.0' } if host == 'gestyy': resp = httptools.downloadpage( url, follow_redirects=False, cookies=False, only_headers=True, replace_headers=True, headers={'User-Agent': 'curl/7.59.0'}) data = resp.headers.get("location", "") elif 'vcrypt.net' in url: # req = httptools.downloadpage(url) req = requests.get(url, headers=headers) idata = req.content patron = r"document.cookie\s=\s.*?'(.*)'" # matches = re.compile(patron, re.IGNORECASE).findall(idata) matches = re.finditer(patron, idata, re.MULTILINE) mcookie = {} for matchNum, match in enumerate(matches, start=1): for c in match.group(1).split("; "): c, v = c.split('=') mcookie[c] = v try: print mcookie patron = r';URL=([^\"]+)\">' dest = scrapertools.get_match(idata, patron) r = requests.post(dest, cookies=mcookie) url = r.url except: r = requests.get(req.url, headers=headers) if r.url == url: url = "" if "4snip" in url: desturl = url.replace("/out/", "/outlink/") import os par = os.path.basename(desturl) rdata = requests.post(desturl, data={'url': par}) url = rdata.url if "wstream" in url: url = url.replace("/video/", "/") data = url else: data = "" while host in url: resp = httptools.downloadpage(url, follow_redirects=False) url = resp.headers.get("location", "") if not url: data = resp.data elif host not in url: data = url if data: devuelve.append(data) else: logger.info(" url duplicada=" + url) patron = r"""(https?://(?:www\.)?(?:threadsphere\.bid|adf\.ly|q\.gs|j\.gs|u\.bb|ay\.gy|linkbucks\.com|any\.gs|cash4links\.co|cash4files\.co|dyo\.gs|filesonthe\.net|goneviral\.com|megaline\.co|miniurls\.co|qqc\.co|seriousdeals\.net|theseblogs\.com|theseforums\.com|tinylinks\.co|tubeviral\.com|ultrafiles\.net|urlbeat\.net|whackyvidz\.com|yyv\.co|adfoc\.us|lnx\.lu|sh\.st|href\.li|anonymz\.com|shrink-service\.it|rapidcrypt\.net)/[^"']+)""" logger.info(" find_videos #" + patron + "#") matches = re.compile(patron).findall(text) for url in matches: if url not in encontrados: logger.info(" url=" + url) encontrados.add(url) long_url = expurl.expand_url(url) if long_url: devuelve.append(long_url) else: logger.info(" url duplicada=" + url) ret = servertools.findvideos(str(devuelve)) if devuelve else [] return ret
def videos(item): logger.info("[beeg.py] videos") data = scrapertools.downloadpageWithoutCookies(item.url) itemlist = [] ''' var tumbid =[7208081,1022338,1790112,2909082,2410088,8691046,8462396,2014246,8808325,4654327,8062507,7000421,8548437,1767501,6295658,3202102,2773864,9612377,6948607,9943885,5264590,6982322,6165308,9324770,3619002,8925867,5368729,1082464,5214883,8294614,4242507,3240149,4273919,4475499,4804736,8587147,8338151,1038421,1004169,9272512,5305005,5083086,3580156,1874869,9885579,1058926,3174150,1066977,7082903,6530464,4624902,8591759,6102947,4657695,2016527,6577806,2239334,9870613,2440544,8152565,8219890,3030145,8639777,9848873,7349742,2950534,7119152,5997556,2374574,9815315,3214267,5432670,4658434,4580658,8404386,7524628,4124549,4927484,4181863,9276920,2495618,9318916,6860913,1513045,4236984,6061992,4609004,5271124,5401171,8848711,6836799,1980560,4589392,8210830,8490012,5932132,4250379,2306395,1792556,2148371,1700509,5098703,9776305,7905694,4203280,4423411,6712077,9004474,1402706,4263429,5198666,9395866,5457900,4030776,8735416,9222101,5131412,9355090,5153675,3461256,3701598,9727360,3618152,6070007,7335171,5410904,1947651,5306023,6515828,3715868,7608229,3489585,7264184,7743498,3397575,3511213,6619182,7876772,8770245,8093453,7989500,1961547,5522799,6588700,4531347,1437017,2219295,4546557,6982198,2939794,2298045,8080263,7128768,2765167,2319077,9522842,4961084,5604127,9955979,5111810,4778525,6692226,5511006,9344899,4803658,6146385,1775954,4516958,2634309,1630574,4615420,6358106,6047503,6682604,4328147,8154411,7774060,2064366,3405726,5325432,6587525,9334690,8067415,8027270,1138550,4367956,8382277,1024046,4306081,9228857,7817771,6926228,6303685,7710118,4647190,4017796,9559557,5766637,3330727,1944600,5188555,5726903,6942768,4986192,8708803,6080195,9399238,7572599,3186224,4224246,2932085,5380191,3088447,5901127,3887214,2349737,5714776,2563377,4038474,8499863,4227950,6499672,6991633,3024612,6814845,6454321,2843677,8396005,7907216,1301562,8709563,5458247,9226733,8557509,7207699,1922207,1490492,5647366,1591222,8846795,4011662,1303847,3537608,2412365,4457772,9007518,4005900,4225550,8399634,4685661,5839323,7256007,1743108,6664595,9449274,7988727,3676039,4539781,5606635,7285872,5784054,3700937,4002132,1327636,1308207,7921776,4890112,1034360,4438762,7616608,1546748,1764556,7211306,9832815,8328351,7392273,9392812,9536883,3429689,1129731,4112108,6680341,1587601,3872723,7753727,4238235,8065294,6601466,7435260,9834733,6962573,4507291,7187479,8365423,9132777,2375411,2758884,5054277,6612817,2448785,5836846,6353814,6049471,7341687,1989319,4013602,4660258,1981692,5649634,7315856,9405860,6398978,4517613,1315807,8808025,8442117,2160975,5989886,3924302,7065269,8475308,8586280,3215143,4277208,7310326,7217778,7465561,7512526,3067747,8028981,8436023,6517768,5466318,9613743,6767061,3712249,4986686,3187636,3248477,8212121,2837620,8563996,3689648,5153513,5646012,3979442,3023435,3606043,1521306,2602755,7371342,5113191,4669626,1560957,9490908,6871729,2327539,5035151,7543878,3937587]; var tumbalt =['She gives a b*****b and pussy before she goes to work','Toga Orgy! T**s, Asses and B*****b','Cute c********r','Cute fresh girl f*****g and taking facial cumshot','Russian girl playing with her pussy','Euro amateur wife f*****g on camera','I work with my pussy','Take your clothes off for money.., now','Because I\'ve paid her for sex','Rear stuffing','A company for casual sex','Getting wet by the pool','Shyla Jennings & Avril Hall','Pervert Eva anal toying on webcamera','Group sex with her step-mom','Blonde, brunette and a guy','Blonde natural titted Jessica','Lick mommy\'s pussy first','Pretty with perfect ass f*****g and taking facial','Hardcore sex n the club, in front of public','Black girl with booty that she is not afraid to suck for the cam','Tanned blonde teen fycking by the pool side','Prof. Tate letting her student f**k her pussy','Crew Appreciation','Condition of the apartment','F*****g this round ass after two nights in jail','Anjelica & Momoko','Because Tiffany is f*****g awesome','I\'m rested and ready to f**k Eva Angelina','Money Talks crew is taking care of a fur coat shop','Long legged blonde Euro girl Ivana','Dos Buenos','Cute massagist f*****g and taking facial cumshot','A petulant young woman','Young skinny Zoey wants it big','I have absolutely no problem having sex for money','Cutie with natural t**s getting f****d','Masturbating Jasmin','Don\'t worry! It\'s just a f**k','Amateur Yasmine spreading and masturbating on camera','Super cute with amazing body getting f****d','Young busty gets her perfect ass f****d by 2 big black cocks','Russian amateur girl playing with pussy and ass','Homemade video. Natural titted wife gets f****d from behind','Hottie getting f****d in nature','Shake that Ass babe! Shake that Ass!','Bang Me','Sweet ass sugar','Biking booty','Moans and facial "expressions"','Sunset Love','An extra credit assignment','No choice but to eat out each others pussies','Party Pussy','Facial Interview','Lesbian teens playing with their asses','Not a problem... you can f**k my girlfriend...','Women are waaaaay worse!','Lesbian neighbors','Big titted Vanilla DeVille getting facial cumshot','Fulfilling MILF\'s fantasies','Picked up, f****d and creamed','Teens having group sex party','Heart Line reveals she\'s a true s**t','Tracey Sweet','Kitchen Doll','Classy f**k party at the dorm','Angel in white sexy lingerie','I jumped on the opportunity to f**k Brandi Love','I\'m finally ready to do it!','Brittany\'s sex tape. Big round beautiful silicone t**s','Sharing the house','Testing Ashlynn','Lorena, Penelope, C**k','Take the money and put this c**k in your teen mouth','-','Cut and suck some wood','Romantic Oral','Podcast. Girl getting f****d on webcamera','Alone and horny','Tattooed girlfriend gets f****d on a kitchen table','Late to class?','Punished by a c**k','tied and ass f****d','A French girl surprise','Innocent Asian gets her ass f****d by two. Creampie','Young couple in bed','She invites him for... a b*****b','Pretty busty Euro teen playing with her ass on camera','Vacation Sex','Toying teens','Top dance instructor','Birthday Video','Elainas step mom, Diana, joined the action','Havoc has good t**s and loves good t**s','Loving her lesbian friend while her bf f***s her from behind','F*****g mom in front of her step-daughter','Charlotte & Paloma giving head','The Sweethearts prestigious Title','Kris does exactly as he\'s told','Brought to the apartmen','Alicia is a bubbly redhead with loads of personality','Nadin & Stephanie share a c**k','Young blonde petite Ella','Young amateur couple making a creampie','Taking my aggression out on some pussy... or face','Drink my sweet pussy milk','No problem f*****g my sugar daddy','18 yo Shay giving head on camera','Brooklyn Dayne','Young couple','Hottie getting f****d in public','18 years old massagist','Sierra, and Kenzie get f****d','Ramon is Cast Into Hellfire','Lick our college pussies until we cum! Orgy.','Looking for a cowgirl to ride her pony','Dick in Mrs. Lynn\'s mouth, tongue in her pussy','Caprice','Gorgeous French lesbians','Bysty amateur girl masturbating on camera','Lady\'s night hardcore','V****a Golf','Hardcored on a sofa','Sucking and f*****g his his criminal c**k','Exploiting her roommate\'s slutty behavior','Crew Appreciation','Czech cutie with a nice couple of t**s getting f****d','Orgy in Roman style','Send your boyfriend home','Beautiful Russian Valeria masturbating on camera','Sexual Tendencies','Young couple in homemade hardcore video','Lezley Zen gets f****d hard from behind','A tall slender blonde for the fiesta!','Teen with Juggs from Heaven','Between us','I have two jobs','Young Mandy Sky f*****g','18 year old butthole ride','Some strategizing of her own','Girly Passion','She was ready to go','Brooklyn gets her pussy munched and f****d','To help her out','MILF from Barcelona!','Zoey goes Big','Its official!','German granny in action','Shyla and Avril','College sex life','European country girl gets bend over the table and f****d','Gangbanged by Five','In my hotel room','Letting her student eat her pussy','Long legged Ivana Interested in renting with her pussy','Skinny Stephanie in doggy style','Twist Game','Professional Lesbians','Amateur F**k','F**k with great pleasure','Summer gets a pussy pounding','Young teaser','Prance around show off their asses and pussies','Read my C**t','Young with big beautiful natural t**s','Busty blonde patient gets f****d','A banging ass','Lady\'s night blow out','Delicious Lesbians','Because I\'ve paid for it','Sunset Love','Young Rita with juggs from Heaven','Amateur blonde masturbating on camera','Pole Smoker','Polish GF with big natural t**s f*****g on camera','Nurse having sex with patient','She\'ll be tested on this week','Alicia needs money immediately','The Girls School the Newbs','To give you an idea of how fun she is...','I\'m just one of her clients','Cute Katie massaging my c**k with her pussy','A Milf that keeps on giving','I just love to f**k hot women','She\'s dating my father','I am never letting my girl go to a strip!','Kymber Lee needs her pussy wet!','Pornstar f*****g at a sex shop','My pussy got pranked','Euro teen with big beautiful natural t**s f*****g','Toying and f*****g beautiful round ass','Shy Asian sweetie giving head and taking facial cumshot','Why don\'t you do that to my c**k?','Eat this pussy Milk','Amazing Beauties. No sex','Mom gets f****d in front of her step-daughter','Hardcore Pranking','Cute Czech girl gets taken home and f****d','A shady office at night time','Party Asses','I paid this chick good money','Body of Brooklyn','Girls riding the cruisers','Blondie Boom gets it in doggy style','Sex Instruction','College Rituals','Tied up and helpless. No sex','Too big for my teen ass','Classroom Hardcore','Amateur couple licking, sucking, f*****g and cumming!','Emergency help with anal sex','Redhead Euro MILF with big t**s and good ass in action','A classy party at the dorm','Lick my teen pussy','Hot Latin chick gets banged by dancing bear','OK, I have her now!','Florida Students','Pussy Business','Czech girl with big natural t**s gets f****d on a sofa','Four lesbians in Love','Moms set pounded too','My husband lost his job.., I\'m going to work','Long legged professionals with good asses share a c**k','Sunset Lesbian Nymphos','Shy Japanese teen getting f****d on camera','Strip girl getting her ass f****d by two','Educated women','Money Talks XXX experiments','Jada goes to his office','Busty teacher letting him f**k her','To f**k his brains out','Party Pussy','The economics of modeling','Girl to guy ratio seems to be out of whack','Of course I wanted to f**k','Young blonde beautiful massagist','F*****g his neighbor','Zoey goes Big','Schoolgirl taking c**k','A classy party at the dorm','On the kitchen table','Sex in sauna','I want to f**k her hard','Young amateur couple f*****g in bathroom and bedroom','Rear stuffing','Respect for educated women','Tattooed lesbians on a bed','Amateur 19 year old girlfriend giving head','Teen couple','On our way to the office...','Pumping energy into a teacher','Cute teen gets f****d in her bed','Orgy in a club','A new college course','I need to take my aggression out on some pussy','In doggy style','Dance, Dance, Fornication','Dating service for female millionaires','A little cheerleader gets f****d','In his parents\' bedroom','Strip Poker','Crazy group of ladies','And I get to f**k her. Why? Bc I\'ve paid for it!','Her boyfriend was not satisfying her','Super beautiful amateur teen f*****g on camera','The Art','Hardcore in a dorm','We\'re actually lucky to have Tiffany','The Economics of Modeling','The sisters order','With help of Mrs. Avluv','Angelina gives great head','Alone, and in my hotel room','To see my new college counselor','Super skinny Stephanie giving head','He pays for everything that my husband can\'t','Milf with big natural boobs getting f****d','We got cash for your t**s','GF getting f****d in many positions','Asa Akira in action','I f****d my step-mom in doggy style','Hotel Cockafornia','They have their own party','Big black c**k inside inside his step-mom\'s wet pussy','Amateur girls get f****d on camera, in doggy style','Blonde teen with perfect ass','A tall slender blonde with blue eyes','Magic Box','My teacher letting me f**k her','Victoria rides my dick','Little cheerleader','A big big favor for a nice nice neighbor','Blow Job Frenzy','Crazy group of ladies. No sex','We fertilize her v****a with some magic seeds','Blonde teens on a c**k','Sophie & Sasha in college threesome','Hottie With a Body','A company for casual sex','I\'m into her','Young beautiful sexy massagist','Black babe having sex in different places','Lesbian Maids','Jessica Jaymes','Two hot Milfs share a c**k','Signing up for a new class at college','Teen goes Big','Kenzie & Sierra getting assfucked','Ride with Tracey Sweet','Show Me Your Pussy','Long legged babe with amazing body in hardcore','Amateur blonde gets her Big Natural T**s f****d','A new college course','T**s for the password','Long haired brunette in lingerie f*****g','Angela & Luiza','That\'s why I\'ve paid for Lezley Zen','Until she gets foam on her face','Amateur couple f*****g on camera','T**s & Asses. No sex','Another wonderful day','Beautiful Lizz Tayler f*****g and swallowing','She doesn\'t want him to be lonely','Chloe is a dirty little schoolgirl','All Blonde','Cleaning so to speak','F**k my t**s and my ass','Unable to resist my step-mom\'s sexy French charms','Cum on my step-mom\'s face','An artist looking for a model to paint','Girls f*****g, boys watching','Blonde schoolgirl gets her ass f****d and takes facial cumshot','18 yo teen having sex in her room','Feel the hotness','Because her boyfriend was not satisfying her','She needs to f**k for this audition to continue','Big ass amateur redhead gets f****d by her step son','Dorm Life','I\'ve paid for Amy Brooke tonight','Tonight I cum on Priya Anjali Rai\'s face','So he can do well on his test','Mommy takes care of it','Tied and f****d hard by two','You win my c**k','Beauty with perfectly round ass f*****g and taking facial for money','Hot ass Deborah paying the rent with her pussy','They have their own party','The woman in the middle','18 yo Shay decides to shoot her first p**n movie','Blow Job Frenzy','Young busty Haley meets the Beast of C**k','Sex Instruction','Signing up for a new class at college','Read my C**t','Creaming her pretty pussy. Meggan is stoked to leave laid and paid!','Small Pussy, Big Package','Would it be rude to e*******e on my girlfriend\'s face?','Port of C**k, New Whoreleans','He pounds his girlfriend\'s tight pussy on camera']; var writestr = '<div id="thumbs">'; var URLthumb = 'http://beeg.com/'; var IMGthumb = 'http://eu1.anythumb.com/236x177/'; ''' base_thumbnail_url = scrapertools.get_match(data, "var IMGthumb \= '([^']+)'") base_url = scrapertools.get_match(data, "var URLthumb \= '([^']+)'") base_url = urlparse.urljoin("http://beeg.com/", base_url) # Lista de IDs id_list = [] id_string = scrapertools.get_match(data, "var tumbid =\[([^\]]+)\]") patron = "(\d+)" matches = re.compile(patron, re.DOTALL).findall(id_string) for match in matches: id_list.append(match) # Lista de titulos title_list = [] title_string = scrapertools.get_match(data, "var tumbalt \=\[([^\]]+)\]") title_string = title_string.replace("\\'", '"') patron = "'([^']+)'" matches = re.compile(patron, re.DOTALL).findall(title_string) for match in matches: title_list.append(match) for i in range(0, len(id_list)): try: scrapedtitle = unicode(title_list[i], "utf-8").encode("iso-8859-1") except: scrapedtitle = title_list[i] scrapedtitle = scrapedtitle.replace('"', "'") scrapedurl = base_url + id_list[i] scrapedthumbnail = base_thumbnail_url + id_list[i] + ".jpg" scrapedplot = "" # Depuracion if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, show=scrapedtitle, viewmode="movie", folder=False)) return itemlist
def filmaffinity(item,infoLabels): title = infoLabels["title"].replace(" ", "+") try: year = infoLabels["year"] except: year="" sinopsis = infoLabels["sinopsis"] if year== "": if item.contentType!="movie": tipo = "serie" url_bing="http://www.bing.com/search?q=%s+Serie+de+tv+site:filmaffinity.com" % title else: tipo= "película" url_bing="http://www.bing.com/search?q=%s+site:filmaffinity.com" % title try: data = browser (url_bing) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) if "myaddrproxy.php" in data: subdata_bing = scrapertools.get_match(data,'li class="b_algo"><div class="b_title"><h2>(<a href="/myaddrproxy.php/http/www.filmaffinity.com/es/film.*?)"') subdata_bing = re.sub(r'\/myaddrproxy.php\/http\/','',subdata_bing) else: subdata_bing = scrapertools.get_match(data,'li class="b_algo"><h2>(<a href="http://www.filmaffinity.com/.*?/film.*?)"') url_filma = scrapertools.get_match(subdata_bing,'<a href="([^"]+)') if not "http" in url_filma: try: data=httptools.downloadpage("http://"+url_filma,cookies=False,timeout=1).data except: data=httptools.downloadpage("http://"+url_filma,cookies=False,timeout=1).data else: try: data=httptools.downloadpage(url_filma,cookies=False,timeout=1).data except: data=httptools.downloadpage(url_filma,cookies=False,timeout=1).data data = re.sub(r"\n|\r|\t|\s{2}| ","",data) except: pass else: tipo = "Pelicula" url = "http://www.filmaffinity.com/es/advsearch.php?stext={0}&stype%5B%5D=title&country=&genre=&fromyear={1}&toyear={1}".format(title, year) data = httptools.downloadpage(url,cookies=False).data url_filmaf = scrapertools.find_single_match(data, '<div class="mc-poster">\s*<a title="[^"]*" href="([^"]+)"') if url_filmaf: url_filmaf = "http://www.filmaffinity.com%s" % url_filmaf data=httptools.downloadpage(url_filmaf,cookies=False).data else : if item.contentType!="movie": tipo = "serie" url_bing="http://www.bing.com/search?q=%s+Serie+de+tv+site:filmaffinity.com" % title else: tipo= "película" url_bing="http://www.bing.com/search?q=%s+site:filmaffinity.com" % title try: data = browser (url_bing) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) if "myaddrproxy.php" in data: subdata_bing = scrapertools.get_match(data,'li class="b_algo"><div class="b_title"><h2>(<a href="/myaddrproxy.php/http/www.filmaffinity.com/es/film.*?)"') subdata_bing = re.sub(r'\/myaddrproxy.php\/http\/','',subdata_bing) else: subdata_bing = scrapertools.get_match(data,'li class="b_algo"><h2>(<a href="http://www.filmaffinity.com/.*?/film.*?)"') url_filma = scrapertools.get_match(subdata_bing,'<a href="([^"]+)') if not "http" in url_filma: data=httptools.downloadpage("http://"+url_filma,cookies=False).data else: data=httptools.downloadpage(url_filma,cookies=False).data data = re.sub(r"\n|\r|\t|\s{2}| ","",data) except: pass sinopsis_f = scrapertools.find_single_match(data, '<dd itemprop="description">(.*?)</dd>') sinopsis_f = sinopsis_f.replace("<br><br />", "\n") sinopsis_f =re.sub(r"\(FILMAFFINITY\)<br />","",sinopsis_f) try: year_f = scrapertools.get_match(data,'<dt>Año</dt>.*?>(\d+)</dd>') except: year_f= "" try: rating_filma=scrapertools.get_match(data,'itemprop="ratingValue" content="(.*?)">') except: rating_filma = "Sin puntuacion" critica="" patron = '<div itemprop="reviewBody">(.*?)</div>.*?itemprop="author">(.*?)\s*<i alt="([^"]+)"' matches_reviews = scrapertools.find_multiple_matches(data, patron) if matches_reviews: for review, autor, valoracion in matches_reviews: review = dhe(scrapertools.htmlclean(review)) review += "\n" + autor +"[CR]" review = re.sub(r'Puntuac.*?\)','',review) if "positiva" in valoracion: critica += "[COLOR green][B]%s[/B][/COLOR]\n" % review elif "neutral" in valoracion: critica += "[COLOR yellow][B]%s[/B][/COLOR]\n" % review else: critica += "[COLOR red][B]%s[/B][/COLOR]\n" % review else: critica = "[COLOR floralwhite][B]Esta %s no tiene críticas todavía...[/B][/COLOR]" % tipo return critica,rating_filma, year_f,sinopsis_f
def findvideos(item): logger.info("[seriesonlinetv.py] findvideos(%s)" % item.tostring()) itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) patron = '<table class="parrillaDescargas">(.*?)</table>' data = scrapertools.get_match(data, patron) ''' <td class="numMirror"><img src="http://webs.ono.com/divx/img/filmes1.png" align="middle" alt="Ver online" title="Ver online" /> <a target="_blank" href="/video/40-putlocker/82381-007-Al-servicio-secreto-de-su-Majestad-1969.html"> <b>1</ b> <img src="http://webs.ono.com/divx/img/flecha.png" align="middle" /></a></td> <td class="hostParrilla"><a target="_blank" href="/video/40-putlocker/82381-007-Al-servicio-secreto-de-su-Majestad-1969.html"><img src="http://imagenes.divxonline.info/logos_servers/40.jpg" height="23" alt="Host" title="Host" /></a></td> <td class="idiomaParrilla"><a target="_blank" href="/video/40-putlocker/82381-007-Al-servicio-secreto-de-su-Majestad-1969.html"><img src="http://imagenes.divxonline.info/idiomas/1.png" alt="Audio" title="Audio" /></a></td> <td class="partesParrilla"><a target="_blank" href="/video/40-putlocker/82381-007-Al-servicio-secreto-de-su-Majestad-1969.html">1</a></td> <td class="uploaderParrilla"><a target="_blank" href="/video/40-putlocker/82381-007-Al-servicio-secreto-de-su-Majestad-1969.html">anonimo</a></td> ''' patron = '<td class="numMirror">.*?</td>[^<]+' patron += '<td class="hostParrilla"><a target="_blank" href="([^"]+)"><img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) for url, thumbnail in matches: scrapedurl = urlparse.urljoin(item.url, url) scrapedtitle = url try: scrapedtitle = scrapedtitle.split("/")[2] except: pass scrapedtitle = "Ver online " + scrapedtitle itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle, fulltitle=item.title, url=scrapedurl, thumbnail=thumbnail, plot=item.plot, folder=False)) # Descarga la página data = scrapertools.cachePage( item.url.replace("/serie/", "/descarga-directa/")) patron = '<table class="parrillaDescargas">(.*?)</table>' data = scrapertools.get_match(data, patron) patron = '<td class="numMirror">.*?</td>[^<]+' patron += '<td class="hostParrilla"><a target="_blank" href="([^"]+)"><img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) for url, thumbnail in matches: scrapedurl = urlparse.urljoin(item.url, url) scrapedtitle = url try: scrapedtitle = scrapedtitle.split("/")[2] except: pass scrapedtitle = "Descarga directa " + scrapedtitle itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle, fulltitle=item.title, url=scrapedurl, thumbnail=thumbnail, plot=item.plot, folder=False)) return itemlist
def fanart(item): logger.info() itemlist = [] year=item.extra.split("|")[2] if item.contentType!="movie": tipo_ps="tv" else: tipo_ps="movie" url = item.url data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| ","",data) title = item.extra.split("|")[0] title_o=scrapertools.find_single_match(data,'<meta name="description"[^<]+original(.*?)&') item.title = item.extra.split("|")[1] title_imdb=re.sub(r'\[.*?\]','',item.extra.split("|")[1]) title =re.sub(r"\(.*?\)|-Remastered|Black And Chrome Edition|V.extendida|Version Extendida|V.Extendida|HEVC|X\d+|x\d+|LINE|HD|1080p|Screeener|V.O|Hdrip|.*?--|3D|SBS|HOU","",title) sinopsis=scrapertools.find_single_match(data,'Sinopsis<br \/>(.*?)<\/div>') if sinopsis=="": try: sinopsis=scrapertools.find_single_match(data,'sinopsis\'>(.*?)<\/div>') except: sinopsis="" if "Miniserie" in sinopsis: tipo_ps="tv" year= scrapertools.find_single_match(sinopsis,'de TV \((\d+)\)') if year=="": if item.contentType !="movie": try: year =scrapertools.find_single_match(data,'<strong>Estreno:<\/strong>(\d+)<\/span>') except: year="" else: year =scrapertools.find_single_match(data,'<br \/>A.*?(\d+)<br \/>') if year =="": year=scrapertools.find_single_match(data,'Estreno.*?\d+/\d+/(\d+)') if year=="": year=scrapertools.find_single_match(data,'<div class=\'descripcion_top\'>.*?Año<br />.*?(\d\d\d\d)') if year=="": year=scrapertools.find_single_match(data,'<meta name="description"[^<]+Año[^<]+\d\d\d\d') if year=="": year=scrapertools.find_single_match(data,'<h1><strong>.*?(\d\d\d\d).*?<') if year == "": year=" " infoLabels = {'title': title, 'sinopsis': sinopsis, 'year': year} critica, rating_filma, year_f,sinopsis_f = filmaffinity(item,infoLabels) if sinopsis=="": sinopsis = sinopsis_f if year=="": year = year_f otmdb = tmdb.Tmdb(texto_buscado=title, year=year,tipo=tipo_ps) id= otmdb.result.get("id") posterdb= otmdb.result.get("poster_path") if posterdb==None: otmdb = tmdb.Tmdb(texto_buscado=title,tipo=tipo_ps) id= otmdb.result.get("id") posterdb= otmdb.result.get("poster_path") if posterdb==None: if item.contentType!="movie": urlbing_imdb = "http://www.bing.com/search?q=%s+%s+tv+series+site:imdb.com" % (title_imdb.replace(' ', '+'), year) data = browser (urlbing_imdb) data = re.sub(r"\n|\r|\t|\s{2}| |http://ssl-proxy.my-addr.org/myaddrproxy.php/","",data) subdata_imdb =scrapertools.find_single_match(data,'<li class="b_algo">(.*?)h="ID.*?<strong>.*?TV Series') else: urlbing_imdb = "http://www.bing.com/search?q=%s+%s+site:imdb.com" % (title_imdb.replace(' ', '+'), year) data = browser (urlbing_imdb) data = re.sub(r"\n|\r|\t|\s{2}| |http://ssl-proxy.my-addr.org/myaddrproxy.php/","",data) subdata_imdb =scrapertools.find_single_match(data,'<li class="b_algo">(.*?)h="ID.*?<strong>') try: imdb_id = scrapertools.get_match(subdata_imdb,'<a href=.*?http.*?imdb.com/title/(.*?)/.*?"') except: try: imdb_id = scrapertools.get_match(subdata_imdb,'<a href=.*?http.*?imdb.com/.*?/title/(.*?)/.*?"') except: imdb_id="" otmdb = tmdb.Tmdb(external_id=imdb_id, external_source="imdb_id",tipo=tipo_ps,idioma_busqueda="es") id= otmdb.result.get("id") posterdb= otmdb.result.get("poster_path") if not posterdb : if "(" in title_imdb: title =scrapertools.find_single_match(title_imdb,'\(.*?\)') if item.contentType!="movie": urlbing_imdb = "http://www.bing.com/search?q=%s+%s+tv+series+site:imdb.com" % (title_imdb.replace(' ', '+'), year) data = browser (urlbing_imdb) data = re.sub(r"\n|\r|\t|\s{2}| |http://ssl-proxy.my-addr.org/myaddrproxy.php/","",data) subdata_imdb =scrapertools.find_single_match(data,'<li class="b_algo">(.*?)h="ID.*?<strong>.*?TV Series') else: urlbing_imdb = "http://www.bing.com/search?q=%s+%s+site:imdb.com" % (title_imdb.replace(' ', '+'), year) data = browser (urlbing_imdb) data = re.sub(r"\n|\r|\t|\s{2}| |http://ssl-proxy.my-addr.org/myaddrproxy.php/","",data) subdata_imdb =scrapertools.find_single_match(data,'<li class="b_algo">(.*?)h="ID.*?<strong>') try: imdb_id = scrapertools.get_match(subdata_imdb,'<a href=.*?http.*?imdb.com/title/(.*?)/.*?"') except: try: imdb_id = scrapertools.get_match(subdata_imdb,'<a href=.*?http.*?imdb.com/.*?/title/(.*?)/.*?"') except: imdb_id="" otmdb = tmdb.Tmdb(external_id=imdb_id, external_source="imdb_id",tipo=tipo_ps,idioma_busqueda="es") id= otmdb.result.get("id") posterdb= otmdb.result.get("poster_path") if not posterdb: id=tiw=rating=tagline=id_tvdb="" fanart_4=fanart_2=fanart_3= item.fanart rating="Sin Puntuación" posterdb=tvf=item.thumbnail fanart_info= item.fanart thumbnail_art=item.thumbnail extra = str(fanart_2)+"|"+str(fanart_3)+"|"+str(fanart_4)+"|"+str(id)+"|"+str(tvf)+"|"+str(id_tvdb)+"|"+str(tiw)+"|"+str(rating) itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action="findvideos", thumbnail=item.thumbnail, fanart=item.fanart,extra=extra,folder=True) ) else: if tipo_ps !="movie": action= "findvideos" else: action= "findvideos_enlaces" id=tiw=rating=tagline=id_tvdb="" fanart_4=fanart_2=fanart_3= item.fanart rating="Sin Puntuación" posterdb=tvf=item.thumbnail fanart_info= item.fanart thumbnail_art=item.thumbnail extra = str(fanart_2)+"|"+str(fanart_3)+"|"+str(fanart_4)+"|"+str(id)+"|"+str(tvf)+"|"+str(id_tvdb)+"|"+str(tiw)+"|"+str(rating) itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action=action, thumbnail=item.thumbnail, fanart=item.fanart,extra=extra,contentType=item.contentType, folder=True) ) if posterdb !=item.thumbnail: if not "null" in posterdb: posterdb= "https://image.tmdb.org/t/p/original"+posterdb else: posterdb= item.thumbnail if otmdb.result.get("backdrop_path"): fanart = "https://image.tmdb.org/t/p/original"+otmdb.result.get("backdrop_path") else: fanart = item.fanart if sinopsis == "": if otmdb.result.get("'overview'"): sinopsis= otmdb.result.get("'overview'") else: sinopsis="" if otmdb.result.get("vote_average"): rating= otmdb.result.get("vote_average") else: rating ="Sin puntuacíon" imagenes=[] itmdb = tmdb.Tmdb(id_Tmdb=id, tipo=tipo_ps) images = itmdb.result.get("images") for key, value in images.iteritems(): for detail in value: imagenes.append('https://image.tmdb.org/t/p/original' + detail["file_path"]) if item.contentType!="movie": if itmdb.result.get("number_of_seasons"): season_number= itmdb.result.get("number_of_seasons") else: season_episode="" if itmdb.result.get("number_of_episodes"): season_episode= itmdb.result.get("number_of_episodes") else: season_episode="" if itmdb.result.get("status"): status =itmdb.result.get("status") else: status="" if status== "Ended": status ="Finalizada" else: status = "En emisión" tagline =str( status ) +" (Temporadas:"+ str(season_number)+",Episodios:"+str(season_episode)+")" if itmdb.result.get("external_ids").get("tvdb_id"): id_tvdb = itmdb.result.get("external_ids").get("tvdb_id") else: id_tvdb= "" else: id_tvdb="" if itmdb.result.get("tagline"): tagline = itmdb.result.get("tagline") else: tagline = "" if len(imagenes)>= 5: fanart_info = imagenes[1] fanart_2 = imagenes[2] fanart_3 = imagenes[3] fanart_4 = imagenes[4] if fanart== item.fanart: fanart= fanart_info elif len(imagenes)== 4 : fanart_info = imagenes[1] fanart_2 = imagenes[2] fanart_3 = imagenes[3] fanart_4 = imagenes[1] if fanart== item.fanart: fanart= fanart_info elif len(imagenes)== 3: fanart_info = imagenes[1] fanart_2 = imagenes[2] fanart_3 = imagenes[1] fanart_4 = imagenes[0] if fanart== item.fanart: fanart= fanart_info elif len(imagenes)== 2: fanart_info = imagenes[1] fanart_2 = imagenes[0] fanart_3 = imagenes[1] fanart_4 = imagenes[1] if fanart== item.fanart: fanart= fanart_info else: fanart_info = fanart fanart_2 = fanart fanart_3 = fanart fanart_4 = fanart images_fanarttv= fanartv(item,id_tvdb,id) if item.contentType!="movie": action="findvideos" if images_fanarttv: try: thumbnail_art = images_fanarttv.get("hdtvlogo")[0].get("url") except: try: thumbnail_art= images_fanarttv.get("clearlogo")[0].get("url") except: thumbnail_art =posterdb if images_fanarttv.get("tvbanner"): tvf =images_fanarttv.get("tvbanner")[0].get("url") elif images_fanarttv.get("tvthumb"): tvf =images_fanarttv.get("tvthumb")[0].get("url") elif images_fanarttv.get("tvposter"): tvf =images_fanarttv.get("tvposter")[0].get("url") else: tvf =posterdb if images_fanarttv.get("tvthumb"): thumb_info =images_fanarttv.get("tvthumb")[0].get("url") else: thumb_info =thumbnail_art if images_fanarttv.get("hdclearart"): tiw= images_fanarttv.get("hdclearart")[0].get("url") elif images_fanarttv.get("characterart"): tiw=images_fanarttv.get("characterart")[0].get("url") elif images_fanarttv.get("hdtvlogo"): tiw=images_fanarttv.get("hdtvlogo")[0].get("url") else: tiw="" else: tiw="" tvf=thumbnail_info=thumbnail_art=posterdb else: action="findvideos_enlaces" if images_fanarttv: if images_fanarttv.get("hdmovielogo"): thumbnail_art=images_fanarttv.get("hdmovielogo")[0].get("url") elif images_fanarttv.get("moviethumb"): thumbnail_art=images_fanarttv.get("moviethumb")[0].get("url") elif images_fanarttv.get("moviebanner"): thumbnail_art=images_fanarttv.get("moviebanner")[0].get("url") else: thumbnail_art =posterdb if images_fanarttv.get("moviedisc"): tvf=images_fanarttv.get("moviedisc")[0].get("url") elif images_fanarttv.get("hdmovielogo"): tvf=images_fanarttv.get("hdmovielogo")[0].get("url") else: tvf =posterdb if images_fanarttv.get("hdmovieclearart"): tiw= images_fanarttv.get("hdmovieclearart")[0].get("url") elif images_fanarttv.get("hdmovielogo"): tiw=images_fanarttv.get("hdmovielogo")[0].get("url") else: tiw="" else: tiw="" tvf=thumbnail_art=posterdb extra = str(fanart_2)+"|"+str(fanart_3)+"|"+str(fanart_4)+"|"+str(id)+"|"+str(tvf)+"|"+str(id_tvdb)+"|"+str(tiw)+"|"+str(rating)+"|"+tipo_ps itemlist.append( Item(channel=item.channel, title=item.title, url=item.url, action=action, thumbnail=thumbnail_art, fanart=fanart,extra=extra,contentType=item.contentType,folder=True) ) title_info ="[COLOR indianred][B]Info[/B][/COLOR]" extra= str(rating)+"|"+str(rating_filma)+"|"+str(id)+"|"+str(item.title)+"|"+str(id_tvdb)+"|"+str(tagline)+"|"+str(sinopsis)+"|"+str(critica)+"|"+str(thumbnail_art)+"|"+str(fanart_4) itemlist.append( Item(channel=item.channel, action="info" , title=title_info , url=item.url, thumbnail=posterdb, fanart=fanart_info,extra=extra,contentType=item.contentType, folder=False )) return itemlist
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("pelisalacarta.servers.streaminto url="+page_url) logger.info("### page_url-streaminto-find_videos : "+page_url) # Normaliza la URL try: if not page_url.startswith("http://streamin.to/embed-"): videoid = scrapertools.get_match(page_url,"streamin.to/([a-z0-9A-Z]+)") page_url = "http://streamin.to/embed-"+videoid+".html" except: import traceback logger.info(traceback.format_exc()) # Lo pide una vez headers = [['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14']] data = scrapertools.cache_page( page_url , headers=headers ) #logger.info("data="+data) #file: "37/2640690613_n.flv?h=2ki7efbuztuzcg3h5gecfdpdy3es3m7wc5423nwgzsxybtapha4sna47txdq", #streamer: "rtmp://95.211.184.228:1935/vod?h=2ki7efbuztuzcg3h5gecfdpdy3es3m7wc5423nwgzsxybtapha4sna47txdq" #image: "http://95.211.184.228:8777/i/03/00130/p0uqfu1iecak.jpg" #http://95.211.184.228:8777/15/4045655336_n.flv?h=2ki7efbuztuzcg3h5gecfdpdy3es3m7wc5423nwgzsxybtapha447fe7txcq #15/4045655336_n.flv?h= #patron = ',\{file\: "([^"]+)"' patron_flv = 'file: "([^"]+)"' #patron_rtmp = 'streamer: "([^"]+)"' patron_jpg = 'image: "(http://[^/]+/)' #media_url = [] try: host = scrapertools.get_match(data, patron_jpg) logger.info("pelisalacarta.servers.streaminto host="+host) flv_url = scrapertools.get_match(data, patron_flv) logger.info("pelisalacarta.servers.streaminto flv_url="+flv_url) flv = host+flv_url.split("=")[1]+"/v.flv" logger.info("pelisalacarta.servers.streaminto flv="+flv) #rtmp = scrapertools.get_match(data, patron_rtmp) except: logger.info("pelisalacarta.servers.streaminto opcion 2") op = scrapertools.get_match(data,'<input type="hidden" name="op" value="([^"]+)"') logger.info("pelisalacarta.servers.streaminto op="+op) usr_login = "" id = scrapertools.get_match(data,'<input type="hidden" name="id" value="([^"]+)"') logger.info("pelisalacarta.servers.streaminto id="+id) fname = scrapertools.get_match(data,'<input type="hidden" name="fname" value="([^"]+)"') logger.info("pelisalacarta.servers.streaminto fname="+fname) referer = scrapertools.get_match(data,'<input type="hidden" name="referer" value="([^"]*)"') logger.info("pelisalacarta.servers.streaminto referer="+referer) hashstring = scrapertools.get_match(data,'<input type="hidden" name="hash" value="([^"]*)"') logger.info("pelisalacarta.servers.streaminto hashstring="+hashstring) imhuman = scrapertools.get_match(data,'<input type="submit" name="imhuman".*?value="([^"]+)"').replace(" ","+") logger.info("pelisalacarta.servers.streaminto imhuman="+imhuman) import time time.sleep(10) # Lo pide una segunda vez, como si hubieras hecho click en el banner #op=download1&usr_login=&id=z3nnqbspjyne&fname=Coriolanus_DVDrip_Castellano_by_ARKONADA.avi&referer=&hash=nmnt74bh4dihf4zzkxfmw3ztykyfxb24&imhuman=Continue+to+Video post = "op="+op+"&usr_login="******"&id="+id+"&fname="+fname+"&referer="+referer+"&hash="+hashstring+"&imhuman="+imhuman headers.append(["Referer",page_url]) data = scrapertools.cache_page( page_url , post=post, headers=headers ) logger.info("data="+data) # Extrae la URL host = scrapertools.get_match(data, patron_jpg) flv = host+scrapertools.get_match(data, patron_flv).split("=")[1]+"/v.flv" #rtmp = scrapertools.get_match(data, patron_rtmp) video_urls = [] video_urls.append( [ scrapertools.get_filename_from_url(flv)[-4:]+" [streaminto]",flv]) #video_urls.append( [ scrapertools.get_filename_from_url(rtmp)[-4:]+" [streaminto]",rtmp]) for video_url in video_urls: logger.info("pelisalacarta.servers.streaminto %s - %s" % (video_url[0],video_url[1])) return video_urls
def peliculas(item): logger.info("pelisalacarta.channels.oranline peliculas") itemlist = [] # Descarga la página data = get_main_page(item.url) # Extrae las entradas (carpetas) ''' <div class="review-box review-box-compact" style="width: 140px;"> <!--Begin Image1--> <div class="post-thumbnail"> <a href="http://www.oranline.com/pelicula/metro-manila-2013-ver-online-y-descargar-gratis/" title="Metro Manila (2013) Ver Online Y Descargar Gratis"> <img src="http://www.oranline.com/wp-content/uploads/2013/10/metro-manila-140x210.jpg" alt="Metro Manila (2013) Ver Online Y Descargar Gratis" /> </a> <div id="mejor_calidad"> <a href="http://www.oranline.com/pelicula/metro-manila-2013-ver-online-y-descargar-gratis/" title="Metro Manila (2013) Ver Online Y Descargar Gratis"><img id="espanol" src="http://www.oranline.com/wp-content/themes/reviewit/images/HD-R_calidad.png" class="idiomas" alt="Metro Manila (2013) Ver Online Y Descargar Gratis" /> </a> <span>HD-R</span></div> </div> <!--End Image--> <div class="review-box-text"> <h2><a href="http://www.oranline.com/pelicula/metro-manila-2013-ver-online-y-descargar-gratis/" title="Metro Manila (2013) Ver Online Y Descargar Gratis">Metro Manila (2013) Ver Online...</a></h2> <p>Sinopsis Buscando un futuro mejor, Óscar Ramírez y su familia dejan los campos de arroz del norte ...</p> </div> <div id="campos_idiomas"> <img id="espanol" src="http://www.oranline.com/wp-content/themes/reviewit/images/s.png" class="idiomas" alt="" /> <img id="latino" src="http://www.oranline.com/wp-content/themes/reviewit/images/lx.png" class="idiomas" alt="" /> <img id="ingles" src="http://www.oranline.com/wp-content/themes/reviewit/images/ix.png" class="idiomas" alt="" /> <img id="vose" src="http://www.oranline.com/wp-content/themes/reviewit/images/vx.png" class="idiomas" alt="" /> </div> </div> <div class="clear"></div> ''' patron = '<div class="review-box.*?' patron += '<a href="([^"]+)" title="([^"]+)"[^<]+' patron += '<img src="([^"]+)"[^<]+' patron += '</a[^<]+' patron += '<div id="mejor_calidad"[^<]+' patron += '<a[^<]+<img[^<]+' patron += '</a[^<]+' patron += '<span>([^<]+)</span></div[^<]+' patron += '</div[^<]+' patron += '<![^<]+' patron += '<div class="review-box-text"[^<]+' patron += '<h2[^<]+<a[^<]+</a></h2[^<]+' patron += '<p>([^<]+)</p[^<]+' patron += '</div[^<]+' patron += '<div id="campos_idiomas">(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle, scrapedthumbnail, calidad, scrapedplot, idiomas in matches: scrapedtitle = scrapedtitle.replace("Ver Online Y Descargar Gratis", "").strip() scrapedtitle = scrapedtitle.replace("Ver Online Y Descargar gratis", "").strip() scrapedtitle = scrapedtitle.replace("Ver Online Y Descargar", "").strip() title = scrapedtitle + " (" + calidad + ") (" if "s.png" in idiomas: title = title + "ESP," if "l.png" in idiomas: title = title + "LAT," if "i.png" in idiomas: title = title + "ING," if "v.png" in idiomas: title = title + "VOSE," title = title[:-1] + ")" url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = scrapedplot.strip() if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, viewmode="movies_with_plot", folder=True)) try: next_page = scrapertools.get_match(data, "<a href='([^']+)'>\&rsaquo\;</a>") itemlist.append( Item(channel=__channel__, action="peliculas", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page), folder=True)) except: try: next_page = scrapertools.get_match( data, "<span class='current'>\d+</span><a href='([^']+)'") itemlist.append( Item(channel=__channel__, action="peliculas", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page), folder=True)) except: pass pass return itemlist
def info(item): logger.info() itemlist = [] url=item.url rating_tmdba_tvdb=item.extra.split("|")[0] if item.extra.split("|")[6]== "": rating_tmdba_tvdb= "Sin puntuación" rating_filma=item.extra.split("|")[1] filma="http://s6.postimg.org/6yhe5fgy9/filma.png" title= item.extra.split("|")[3] title = title.replace("%20"," ") try: if "." in rating_tmdba_tvdb: check_rat_tmdba= scrapertools.get_match(rating_tmdba_tvdb,'(\d+).') else: check_rat_tmdba=rating_tmdba_tvdb if int(check_rat_tmdba) >= 5 and int(check_rat_tmdba) < 8: rating = "[COLOR springgreen][B]"+rating_tmdba_tvdb+"[/B][/COLOR]" elif int(check_rat_tmdba) >= 8 or rating_tmdba_tvdb == 10: rating = "[COLOR yellow][B]"+rating_tmdba_tvdb+"[/B][/COLOR]" else: rating = "[COLOR crimson][B]"+rating_tmdba_tvdb+"[/B][/COLOR]" print "lolaymaue" except: rating = "[COLOR crimson][B]"+rating_tmdba_tvdb+"[/B][/COLOR]" if "10." in rating: rating = re.sub(r'10\.\d+','10',rating) try: check_rat_filma= scrapertools.get_match(rating_filma,'(\d)') print "paco" print check_rat_filma if int(check_rat_filma) >= 5 and int(check_rat_filma) < 8: print "dios" print check_rat_filma rating_filma = "[COLOR springgreen][B]"+rating_filma+"[/B][/COLOR]" elif int(check_rat_filma) >=8: print check_rat_filma rating_filma = "[COLOR yellow][B]"+rating_filma+"[/B][/COLOR]" else: rating_filma = "[COLOR crimson][B]"+rating_filma+"[/B][/COLOR]" print "rojo??" print check_rat_filma except: rating_filma = "[COLOR crimson][B]"+rating_filma+"[/B][/COLOR]" plot = item.extra.split("|")[6] plot = "[COLOR moccasin][B]"+plot+"[/B][/COLOR]" plot= re.sub(r"\\|<br />","",plot) if item.extra.split("|")[5] != "": tagline = item.extra.split("|")[5] if tagline == "\"\"": tagline = " " tagline = "[COLOR aquamarine][B]"+tagline+"[/B][/COLOR]" else: tagline = "" if item.contentType!="movie": icon ="http://s6.postimg.org/hzcjag975/tvdb.png" else: icon ="http://imgur.com/SenkyxF.png" foto= item.extra.split("|")[9] if not "tmdb" in foto: foto="" if item.extra.split("|")[7] !="": critica =item.extra.split("|")[7] else: critica= "Esta serie no tiene críticas..." photo= item.extra.split("|")[8].replace(" ","%20") if ".jpg" in photo: photo="" #Tambien te puede interesar peliculas = [] if item.contentType !="movie": url_tpi="http://api.themoviedb.org/3/tv/"+item.extra.split("|")[2]+"/recommendations?api_key="+api_key+"&language=es" data_tpi=httptools.downloadpage(url_tpi).data tpi=scrapertools.find_multiple_matches(data_tpi,'id":(.*?),.*?"original_name":"(.*?)",.*?"poster_path":(.*?),"popularity"') else: url_tpi="http://api.themoviedb.org/3/movie/"+item.extra.split("|")[2]+"/recommendations?api_key="+api_key+"&language=es" data_tpi=httptools.downloadpage(url_tpi).data tpi=scrapertools.find_multiple_matches(data_tpi,'id":(.*?),.*?"original_title":"(.*?)",.*?"poster_path":(.*?),"popularity"') for idp,peli,thumb in tpi: thumb =re.sub(r'"|}','',thumb) if "null" in thumb: thumb = "http://s6.postimg.org/tw1vhymj5/noposter.png" else: thumb ="https://image.tmdb.org/t/p/original"+thumb peliculas.append([idp,peli,thumb]) extra = ""+"|"+item.extra.split("|")[2]+"|"+item.extra.split("|")[2]+"|"+item.extra.split("|")[6]+"|"+"" infoLabels = {'title': title, 'plot': plot, 'thumbnail': photo, 'fanart': foto, 'tagline': tagline, 'rating': rating} item_info = item.clone(info=infoLabels, icon=icon, extra=extra, rating=rating, rating_filma=rating_filma, critica=critica, contentType=item.contentType, thumb_busqueda="http://imgur.com/j0A9lnu.png") from channels import infoplus infoplus.start(item_info, peliculas)
def getlist(item): logger.info("pelisalacarta.mejortorrent seriesydocs") itemlist = [] data = scrapertools.cachePage(item.url) # pelis # <a href="/peli-descargar-torrent-9578-Presentimientos.html"> # <img src="/uploads/imagenes/peliculas/Presentimientos.jpg" border="1"></a # # series # #<a href="/serie-descargar-torrents-11589-11590-Ahora-o-nunca-4-Temporada.html"> #<img src="/uploads/imagenes/series/Ahora o nunca4.jpg" border="1"></a> # # docs # #<a href="/doc-descargar-torrent-1406-1407-El-sueno-de-todos.html"> #<img border="1" src="/uploads/imagenes/documentales/El sueno de todos.jpg"></a> if item.url.find("peliculas") > -1: patron = '<a href="(/peli-descargar-torrent[^"]+)">[^<]+' patron += '<img src="([^"]+)"[^<]+</a>' patron_enlace = "/peli-descargar-torrent-\d+(.*?)\.html" patron_title = '<a href="/peli-descargar-torrent[^"]+">([^<]+)</a>(\s*<b>([^>]+)</b>)?' action = "show_movie_info" folder = True extra = "" elif item.url.find("series-letra") > -1: patron = "<a href='(/serie-descargar-torrent[^']+)'>()" patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html" patron_title = '<a href="/serie-descargar-torrent[^"]+">([^<]+)</a>(\s*<b>([^>]+)</b>)?' action = "episodios" folder = True extra = "series" elif item.url.find("series") > -1: patron = '<a href="(/serie-descargar-torrent[^"]+)">[^<]+' patron += '<img src="([^"]+)"[^<]+</a>' patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html" patron_title = '<a href="/serie-descargar-torrent[^"]+">([^<]+)</a>(\s*<b>([^>]+)</b>)?' action = "episodios" folder = True extra = "series" else: patron = '<a href="(/doc-descargar-torrent[^"]+)">[^<]+' patron += '<img src="([^"]+)"[^<]+</a>' patron_enlace = "/doc-descargar-torrent-\d+-\d+-(.*?)\.html" patron_title = '<a href="/doc-descargar-torrent[^"]+">([^<]+)</a>(\s*<b>([^>]+)</b>)?' action = "episodios" folder = True extra = "docus" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedthumbnail in matches: title = scrapertools.get_match(scrapedurl, patron_enlace) title = title.replace("-", " ") url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, urllib.quote(scrapedthumbnail)) plot = "" logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action=action, title=title, url=url, thumbnail=thumbnail, plot=plot, folder=folder, extra=extra)) matches = re.compile(patron_title, re.DOTALL).findall(data) scrapertools.printMatches(matches) # Cambia el título sacado de la URL por un título con más información. # esta implementación asume que va a encontrar las mismas coincidencias # que en el bucle anterior, lo cual técnicamente es erróneo, pero que # funciona mientras no cambien el formato de la página cnt = 0 for scrapedtitle, notused, scrapedinfo in matches: title = re.sub( '\r\n', '', scrapedtitle).decode('iso-8859-1').encode('utf8').strip() if title.endswith('.'): title = title[:-1] info = scrapedinfo.decode('iso-8859-1').encode('utf8') if info != "": title = '{0} {1}'.format(title, info) itemlist[cnt].title = title cnt += 1 if len(itemlist) == 0: itemlist.append( Item(channel=item.channel, action="mainlist", title="No se ha podido cargar el listado")) else: # Extrae el paginador patronvideos = "<a href='([^']+)' class='paginar'> Siguiente >>" matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=item.channel, action="getlist", title="Pagina siguiente >>", url=scrapedurl, folder=True)) return itemlist
def findvideos(item): logger.info("pelisalacarta.yaske findvideos url="+item.url) # Descarga la página data = scrapertools.cache_page(item.url,headers=HEADER) item.plot = scrapertools.find_single_match(data,'<meta name="sinopsis" content="([^"]+)"') item.plot = scrapertools.htmlclean(item.plot) item.contentPlot = item.plot # Extrae las entradas ''' <tr bgcolor=""> <td height="32" align="center"><a class="btn btn-mini enlace_link" style="text-decoration:none;" rel="nofollow" target="_blank" title="Ver..." href="http://www.yaske.net/es/reproductor/pelicula/2141/44446/"><i class="icon-play"></i><b> Opcion 04</b></a></td> <td align="left"><img src="http://www.google.com/s2/favicons?domain=played.to"/>played</td> <td align="center"><img src="http://www.yaske.net/theme/01/data/images/flags/la_la.png" width="21">Lat.</td> <td align="center" class="center"><span title="" style="text-transform:capitalize;">hd real 720</span></td> <td align="center"><div class="star_rating" title="HD REAL 720 ( 5 de 5 )"> <ul class="star"><li class="curr" style="width: 100%;"></li></ul> </div> </td> <td align="center" class="center">2553</td> </tr> ''' patron = '<tr bgcolor=(.*?)</tr>' matches = re.compile(patron,re.DOTALL).findall(data) itemlist = [] #n = 1 for tr in matches: logger.info("tr="+tr) try: title = scrapertools.get_match(tr,'<b>([^<]+)</b>') server = scrapertools.get_match(tr,'"http\://www.google.com/s2/favicons\?domain\=([^"]+)"') # <td align="center"><img src="http://www.yaske.net/theme/01/data/images/flags/la_la.png" width="19">Lat.</td> idioma = scrapertools.get_match(tr,'<img src="http://www.yaske.[a-z]+/theme/01/data/images/flags/([a-z_]+).png"[^>]+>[^<]*<') subtitulos = scrapertools.get_match(tr,'<img src="http://www.yaske.[a-z]+/theme/01/data/images/flags/[^"]+"[^>]+>([^<]*)<') calidad = scrapertools.get_match(tr,'<td align="center" class="center"[^<]+<span title="[^"]*" style="text-transform.capitalize.">([^<]+)</span></td>') #<a [....] href="http://api.ysk.pe/noref/?u=< URL Vídeo >"> url = scrapertools.get_match(tr,'<a.*?href="([^"]+)"').split("=")[1] # Para extraer netutv se necesita en la actualidad pasar por varias páginas con lo que relentiza mucho la carga. # De momento mostrará "No hay nada que reproducir" ''' if "/netu/tv/" in url: import base64 ################################################### # Añadido 17-09-14 ################################################### try: data = scrapertools.cache_page(url,headers=getSetCookie(url1)) except: data = scrapertools.cache_page(url) ################################################### match_b64_1 = 'base64,([^"]+)"' b64_1 = scrapertools.get_match(data, match_b64_1) utf8_1 = base64.decodestring(b64_1) match_b64_inv = "='([^']+)';" b64_inv = scrapertools.get_match(utf8_1, match_b64_inv) b64_2 = b64_inv[::-1] utf8_2 = base64.decodestring(b64_2).replace("%","\\").decode('unicode-escape') id_video = scrapertools.get_match(utf8_2,'<input name="vid" id="text" value="([^"]+)">') url = "http://netu.tv/watch_video.php?v="+id_video ''' title = title.replace(" ","") if "es_es" in idioma: scrapedtitle = title + " en "+server.strip()+" [ESP]["+calidad+"]" elif "la_la" in idioma: scrapedtitle = title + " en "+server.strip()+" [LAT]["+calidad+"]" elif "en_es" in idioma: scrapedtitle = title + " en "+server.strip()+" [SUB]["+calidad+"]" elif "en_en" in idioma: scrapedtitle = title + " en "+server.strip()+" [ENG]["+calidad+"]" else: scrapedtitle = title + " en "+server.strip()+" ["+idioma+" / "+subtitulos+"]["+calidad+"]" scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedtitle = scrapedtitle.strip() scrapedurl = url scrapedthumbnail = servertools.guess_server_thumbnail(scrapedtitle) logger.info("server="+server+", scrapedurl="+scrapedurl) if scrapedurl.startswith("http"): itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , folder=False, parentContent=item) ) except: import traceback logger.info("Excepcion: "+traceback.format_exc()) return itemlist
def episodios(item): logger.info() itemlist = [] infoLabels = item.infoLabels data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(item.url).data) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") calidad = item.quality pattern = '<ul class="%s">(.*?)</ul>' % "pagination" # item.pattern pagination = scrapertools.find_single_match(data, pattern) if pagination: pattern = '<li><a href="([^"]+)">Last<\/a>' full_url = scrapertools.find_single_match(pagination, pattern) url, last_page = scrapertools.find_single_match( full_url, r'(.*?\/pg\/)(\d+)') list_pages = [item.url] for x in range(2, int(last_page) + 1): response = httptools.downloadpage('%s%s' % (url, x)) if response.sucess: list_pages.append("%s%s" % (url, x)) else: list_pages = [item.url] for index, page in enumerate(list_pages): data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(page).data) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") data = data.replace( "chapters", "buscar-list") #Compatibilidad con mispelisy.series.com pattern = '<ul class="%s">(.*?)</ul>' % "buscar-list" # item.pattern if scrapertools.find_single_match(data, pattern): data = scrapertools.get_match(data, pattern) else: logger.debug(item) logger.debug("data: " + data) return itemlist if "pelisyseries.com" in host: pattern = '<li[^>]*><div class.*?src="(?P<thumb>[^"]+)?".*?<a class.*?href="(?P<url>[^"]+).*?<h3[^>]+>(?P<info>.*?)?<\/h3>.*?<\/li>' else: pattern = '<li[^>]*><a href="(?P<url>[^"]+).*?<img.*?src="(?P<thumb>[^"]+)?".*?<h2[^>]+>(?P<info>.*?)?<\/h2>' matches = re.compile(pattern, re.DOTALL).findall(data) #logger.debug("patron: " + pattern) #logger.debug(matches) season = "1" for url, thumb, info in matches: if "pelisyseries.com" in host: interm = url url = thumb thumb = interm if "<span" in info: # new style pattern = ".*?[^>]+>.*?Temporada\s*(?P<season>\d+)?.*?Capitulo(?:s)?\s*(?P<episode>\d+)?" \ "(?:.*?(?P<episode2>\d+)?)<.+?<span[^>]+>(?P<lang>.*?)?<\/span>\s*Calidad\s*<span[^>]+>" \ "[\[]\s*(?P<quality>.*?)?\s*[\]]<\/span>" if "Especial" in info: # Capitulos Especiales pattern = ".*?[^>]+>.*?Temporada.*?\[.*?(?P<season>\d+).*?\].*?Capitulo.*?\[\s*(?P<episode>\d+).*?\]?(?:.*?(?P<episode2>\d+)?)<.+?<span[^>]+>(?P<lang>.*?)?<\/span>\s*Calidad\s*<span[^>]+>[\[]\s*(?P<quality>.*?)?\s*[\]]<\/span>" if not scrapertools.find_single_match( info, pattern ): #en caso de error de formato, creo uno básico logger.debug("patron episodioNEW: " + pattern) logger.debug(info) info = '><strong>%sTemporada %s Capitulo 0</strong> - <span >Español Castellano</span> Calidad <span >[%s]</span>' % ( item.contentTitle, season, item.infoLabels['quality']) r = re.compile(pattern) match = [m.groupdict() for m in r.finditer(info)][0] if match['season'] is None: match['season'] = season if match['episode'] is None: match['episode'] = "0" if match['quality']: item.quality = match['quality'] if match["episode2"]: multi = True title = "%s (%sx%s-%s) [%s]" % ( item.show, match["season"], str( match["episode"]).zfill(2), str( match["episode2"]).zfill(2), match["lang"]) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) else: multi = False title = "%s (%sx%s) [%s]" % ( item.show, match["season"], str( match["episode"]).zfill(2), match["lang"]) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) else: # old style if scrapertools.find_single_match(info, '\[\d{3}\]'): info = re.sub(r'\[(\d{3}\])', r'[Cap.\1', info) elif scrapertools.find_single_match(info, '\[Cap.\d{2}_\d{2}\]'): info = re.sub(r'\[Cap.(\d{2})_(\d{2})\]', r'[Cap.1\1_1\2]', info) elif scrapertools.find_single_match(info, '\[Cap.([A-Za-z]+)\]'): info = re.sub(r'\[Cap.([A-Za-z]+)\]', '[Cap.100]', info) if scrapertools.find_single_match(info, '\[Cap.\d{2,3}'): pattern = "\[(?P<quality>.*?)\].*?\[Cap.(?P<season>\d).*?(?P<episode>\d{2})(?:_(?P<season2>\d+)" \ "(?P<episode2>\d{2}))?.*?\].*?(?:\[(?P<lang>.*?)\])?" elif scrapertools.find_single_match(info, 'Cap.\d{2,3}'): pattern = ".*?Temp.*?\s(?P<quality>.*?)\s.*?Cap.(?P<season>\d).*?(?P<episode>\d{2})(?:_(?P<season2>\d+)(?P<episode2>\d{2}))?.*?\s(?P<lang>.*)?" if not scrapertools.find_single_match( info, pattern ): #en caso de error de formato, creo uno básico logger.debug("patron episodioOLD: " + pattern) logger.debug(info) info = '%s [%s][Cap.%s00][Español]' % ( item.contentTitle, item.infoLabels['quality'], season) r = re.compile(pattern) match = [m.groupdict() for m in r.finditer(info)][0] str_lang = "" if match['quality']: item.quality = match['quality'] if match["lang"] is not None: str_lang = "[%s]" % match["lang"] item.quality = "%s %s" % (item.quality, match['lang']) if match["season2"] and match["episode2"]: multi = True if match["season"] == match["season2"]: title = "%s (%sx%s-%s) %s" % ( item.show, match["season"], match["episode"], match["episode2"], str_lang) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) else: title = "%s (%sx%s-%sx%s) %s" % ( item.show, match["season"], match["episode"], match["season2"], match["episode2"], str_lang) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) else: title = "%s (%sx%s) %s" % (item.show, match["season"], match["episode"], str_lang) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) multi = False season = match['season'] episode = match['episode'] logger.debug("title: " + title + " / url: " + url + " / calidad: " + item.quality + " / multi: " + str(multi) + " / Season: " + str(season) + " / EpisodeNumber: " + str(episode)) itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumb, quality=item.quality, multi=multi, contentSeason=season, contentEpisodeNumber=episode, infoLabels=infoLabels)) # order list #tmdb.set_infoLabels(itemlist, True) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if len(itemlist) > 1: itemlist = sorted( itemlist, key=lambda it: (int(it.contentSeason), int(it.contentEpisodeNumber))) if config.get_videolibrary_support() and len(itemlist) > 0: itemlist.append( item.clone(title="Añadir esta serie a la videoteca", action="add_serie_to_library", extra="episodios", quality=calidad)) return itemlist
def episodios(item): logger.info("pelisalacarta.mejortorrent episodios") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) total_capis = scrapertools.get_match( data, "<input type='hidden' name='total_capis' value='(\d+)'>") tabla = scrapertools.get_match( data, "<input type='hidden' name='tabla' value='([^']+)'>") titulo = scrapertools.get_match( data, "<input type='hidden' name='titulo' value='([^']+)'>") item.thumbnail = scrapertools.find_single_match( data, "src='http://www\.mejortorrent\.com(/uploads/imagenes/" + tabla + "/[a-zA-Z0-9_ ]+.jpg)'") item.thumbnail = 'http://www.mejortorrent.com' + urllib.quote( item.thumbnail) #<form name='episodios' action='secciones.php?sec=descargas&ap=contar_varios' method='post'> data = scrapertools.get_match( data, "<form name='episodios' action='secciones.php\?sec=descargas\&ap=contar_varios' method='post'>(.*?)</form>" ) ''' <td bgcolor='#C8DAC8' style='border-bottom:1px solid black;'><a href='/serie-episodio-descargar-torrent-18741-Juego-de-tronos-4x01.html'>4x01 - Episodio en V.O. Sub Esp.</a></td> <td width='120' bgcolor='#C8DAC8' align='right' style='border-right:1px solid black; border-bottom:1px solid black;'><div style='color:#666666; font-size:9px; margin-right:5px;'>Fecha: 2014-04-07</div></td> <td width='60' bgcolor='#F1F1F1' align='center' style='border-bottom:1px solid black;'> <input type='checkbox' name='episodios[1]' value='18741'> ''' if item.extra == "series": patron = "<td bgcolor[^>]+><a[^>]+>([^>]+)</a></td>[^<]+" else: patron = "<td bgcolor[^>]+>([^>]+)</td>[^<]+" patron += "<td[^<]+<div[^>]+>Fecha: ([^<]+)</div></td>[^<]+" patron += "<td[^<]+" patron += "<input type='checkbox' name='([^']+)' value='([^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) tmdb_title = re.sub( r'(\s*-\s*)?\d+.*?\s*Temporada|(\s*-\s*)?\s*Miniserie\.?|\(.*\)|\[.*\]', '', item.title).strip() logger.debug('pelisalacarta.mejortorrent episodios tmdb_title=' + tmdb_title) if item.extra == "series": oTmdb = Tmdb(texto_buscado=tmdb_title.strip(), tipo='tv', idioma_busqueda="es") else: oTmdb = Tmdb(texto_buscado=tmdb_title.strip(), idioma_busqueda="es") for scrapedtitle, fecha, name, value in matches: scrapedtitle = scrapedtitle.strip() if scrapedtitle.endswith('.'): scrapedtitle = scrapedtitle[:-1] title = scrapedtitle + " (" + fecha + ")" url = "http://www.mejortorrent.com/secciones.php?sec=descargas&ap=contar_varios" #"episodios%5B1%5D=11744&total_capis=5&tabla=series&titulo=Sea+Patrol+-+2%AA+Temporada" post = urllib.urlencode({ name: value, "total_capis": total_capis, "tabla": tabla, "titulo": titulo }) logger.debug("post=" + post) if item.extra == "series": epi = scrapedtitle.split("x") # Sólo comprobar Tmdb si el formato es temporadaXcapitulo if len(epi) > 1: temporada = re.sub("\D", "", epi[0]) capitulo = re.sub("\D", "", epi[1]) epi_data = oTmdb.get_episodio(temporada, capitulo) logger.debug("epi_data=" + str(epi_data)) if epi_data: item.thumbnail = epi_data["temporada_poster"] item.fanart = epi_data["episodio_imagen"] item.plot = epi_data["episodio_sinopsis"] epi_title = epi_data["episodio_titulo"] if epi_title != "": title = scrapedtitle + " " + epi_title + " (" + fecha + ")" else: try: item.fanart = oTmdb.get_backdrop() except: pass item.plot = oTmdb.get_sinopsis() logger.debug("title=[" + title + "], url=[" + url + "], item=[" + str(item) + "]") itemlist.append( Item(channel=item.channel, action="play", title=title, url=url, thumbnail=item.thumbnail, plot=item.plot, fanart=item.fanart, extra=post, folder=False)) return itemlist