def videos(item): logger.info("[telemundo.py] episodios") itemlist = [] # Descarga la página data = scrapertools.cachePage("http://msnlatino.telemundo.com/module/video_extras_content" + item.url) data = data.replace("\n", " ") data = data.replace("\r", " ") data = " ".join(data.split()) logger.info(data) ''' <div class="thumb"> <a href="/videos/12_corazones/especial_de_dia_de_vacaciones_15_032113/487c3def-6a00-42bc-83d2-84d05ac81d80" class="img156x90"> <img src="http://img2.catalog.video.msn.com/image.aspx?uuid=487c3def-6a00-42bc-83d2-84d05ac81d80&w=160&h=90&so=4" alt="" /></a> </div> <div class="tooltip"> <div class="c_tip"> <div class="arrow_tip"></div> <h3><a href="/videos/12_corazones/especial_de_dia_de_vacaciones_15_032113/487c3def-6a00-42bc-83d2-84d05ac81d80">12 Corazones</a></h3> <p><span class='tp_black'>Especial de dia de Vacaciones (1/5) (03-21-13)</span></p> <p><span class='tp_grey'>Descripción:</span> <br> Diviértete con las declaraciones de amor de nuestros participantes.</p> <p><span class='tp_grey'>Duración:</span> <span class='tp_black'>9:22</span></p> <p><span class='tp_grey'>Vistas:</span> <span class='tp_black'>115</span></p> </div> <div class="b_tip"> </div> </div> <div class="video-description"> <h3><a href="/videos/12_corazones/especial_de_dia_de_vacaciones_15_032113/487c3def-6a00-42bc-83d2-84d05ac81d80">12 Corazones</a></h3> <p>Especial de dia de...</p> </div> </div> ''' patron = '<div class="thumb"> <a href="(.*?)" class="img156x90"> <img src="(.*?)" alt="" /></a> </div> <div class="tooltip">.*?<span class=\'tp_black\'>(.*?)</span></p> <p><span class=\'tp_grey\'>Descripción:</span> <br>(.*?)</p>.*?</div>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedthumbnail,scrapedtitle,scrapeddescription in matches: title = scrapertools.htmlclean(scrapedtitle) + " - " + scrapertools.htmlclean(scrapeddescription) thumbnail = urlparse.urljoin("http://msnlatino.telemundo.com",scrapedthumbnail) #url = urlparse.urljoin("http://msnlatino.telemundo.com",scrapedurl) url = scrapedurl[-36:] itemlist.append( Item(channel=__channel__, action="play", title=title, url=url, thumbnail=thumbnail, folder=False)) patron = '<a href="([^"]+)" class="next"></a>' matches = re.compile(patron,re.DOTALL).findall(data) if matches: itemlist.append( Item(channel=__channel__, action="videos", title="!Página Siguiente", url=matches[0], thumbnail="", folder=True)) return itemlist
def fichas(item): itemlist = [] texto = scrapertools.get_match( item.url, "search...([^&]+)&" ) data = jsontools.load_json( scrapertools.cache_page( item.url ) ) nextPageToken = data.get('nextPageToken') _items = data.get('items', {}) for _item in _items: url = page_url % _item['id']['videoId'] title = _item['snippet']['title'] plot = _item['snippet']['description'] thumbnail = art_thumbnail % _item['id']['videoId'] fanart = art_thumbnail % _item['id']['videoId'] fulltitle = title title = scrapertools.htmlclean( title ) show = library.title_to_folder_name( title ) plot = scrapertools.htmlclean( plot ) itemlist.append( Item( channel=__channel__, title=title, url=url, action="play", thumbnail=thumbnail, fanart=fanart, plot=plot, server="youtube", fulltitle=fulltitle, viewmode="movie_with_plot", show=show, folder=False ) ) ## Paginación url = api_search % (texto, "50", nextPageToken) itemlist.append( Item( channel=__channel__, title="Pagina successiva >>", url=url, action="fichas", folder=True ) ) return itemlist
def episodios(item): logger.info("tvalacarta.channels.rtpa episodios") itemlist = [] if "&fin=" not in item.url: item.url = item.url + "&fin=1000" data = scrapertools.cache_page(item.url) json_object = jsontools.load_json(data) #logger.info("json_object="+repr(json_object)) #logger.info("VOD="+repr(json_object["VOD"])) for vod in json_object["VOD"]: logger.info("vod="+repr(vod)) title = vod["nombre_programa"] if vod["titulo"]!="": title = title + " - " + vod["titulo"] if vod["fecha_emision"]!="": title = title + " ("+scrapertools.htmlclean(vod["fecha_emision"])+")" url = "http://www.rtpa.es/video:"+urllib.quote(vod["nombre_programa"])+"_"+vod["id_generado"]+".html" try: url_imagen = vod["url_imagen"] thumbnail = urllib.quote(url_imagen).replace("//","/").replace("http%3A/","http://") except: thumbnail = "" plot = scrapertools.htmlclean(vod["sinopsis"]) itemlist.append( Item(channel=CHANNELNAME, title=title , url=url, thumbnail=thumbnail , plot=plot, fanart=thumbnail, server="rtpa", action="play" , show = item.title , viewmode="movie_with_plot", folder=False) ) return itemlist
def series_buscar(item): logger.info("pelisalacarta.channels.cinehanwer series_buscar") # Descarga la pagina headers = DEFAULT_HEADERS[:] headers.append(["Referer",item.extra]) headers.append(["X-Requested-With","XMLHttpRequest"]) data = scrapertools.cache_page(item.url,headers=headers) logger.info("data="+data) # Extrae las entradas (carpetas) ''' {"searchTerms":"yes","results":[{"ID":4501,"post_author":"1","post_date":"mayo 23, 2014","post_date_gmt":"2014-05-23 17:56:47","post_title":"4x06 - Leyes de dioses y hombres","post_excerpt":"<p>La historia de Canci\u00f3n de Hielo y Fuego se sit\u00faa en un mundo ficticio medieval. Hay tres l\u00edneas [...]<\/p>\n","post_status":"publish","comment_status":"open","ping_status":"open","post_password":"","post_name":"4x06-leyes-de-dioses-y-hombres","to_ping":"","pinged":"","post_modified":"2014-05-23 19:56:47","post_modified_gmt":"2014-05-23 17:56:47","post_content_filtered":"","post_parent":0,"guid":"http:\/\/series.cinehanwer.com\/?p=4501","menu_order":0,"post_type":"post","post_mime_type":"","comment_count":"0","filter":"raw","post_author_nicename":"admin","permalink":"http:\/\/series.cinehanwer.com\/4x06-leyes-de-dioses-y-hombres\/","attachment_thumbnail":"http:\/\/series.cinehanwer.com\/wp-content\/uploads\/2013\/04\/\u00edndice-150x150.jpg","show_more":true},{"ID":4424,"post_author":"1","post_date":"mayo 16, 2014","post_date_gmt":"2014-05-16 09:02:06","post_title":"1x20 - El hacedor de reyes","post_excerpt":"<p>El criminal m\u00e1s buscado del mundo, Thomas Raymond Reddington (James Spader, se entrega [...]<\/p>\n","post_status":"publish","comment_status":"open","ping_status":"open","post_password":"","post_name":"1x20-el-hacedor-de-reyes","to_ping":"","pinged":"","post_modified":"2014-05-16 11:02:06","post_modified_gmt":"2014-05-16 09:02:06","post_content_filtered":"","post_parent":0,"guid":"http:\/\/series.cinehanwer.com\/?p=4424","menu_order":0,"post_type":"post","post_mime_type":"","comment_count":"0","filter":"raw","post_author_nicename":"admin","permalink":"http:\/\/series.cinehanwer.com\/1x20-el-hacedor-de-reyes\/","attachment_thumbnail":"http:\/\/series.cinehanwer.com\/wp-content\/uploads\/2014\/01\/The-Blacklist-128x128.jpeg","show_more":true}],"displayPostMeta":true} ''' json_object = jsontools.load_json(data) logger.info("results="+json_object["results"]) data = json_object["results"] for entries in data: title = scrapertools.htmlclean(entries["post_title"]) thumbnail = scrapertools.htmlclean(entries["attachment_thumbnail"]) url = scrapertools.htmlclean(entries["permalink"]) plot = "" itemlist.append( Item(channel=__channel__, action="findvideos_series" , title=title , url=url, thumbnail=thumbnail, plot=plot, fulltitle=title, viewmode="movie")) return itemlist
def findvideos(item): logger.info("pelisalacarta.channels.zampaseries findvideos") # Descarga la pagina data = scrapertools.cache_page(item.url) #logger.info("data="+data) # Extrae las entradas (carpetas) patron = '<tr[^<]+' patron += '<td>([^<]*)</td[^<]+' patron += '<td>([^<]*)</td[^<]+' patron += '<td>([^<]*)</td[^<]+' patron += '<td>([^<]*)</td[^<]+' patron += '<td>[^<]*</td[^<]+' patron += '<td>[^<]*</td[^<]+' patron += '<td class="descarga"><a href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) itemlist = [] for nombre_servidor,idioma,subs,calidad,scrapedurl in matches: if subs.strip()=="": subtitulos = "" else: subtitulos = scrapertools.htmlclean(" sub "+subs) title = "Ver en "+nombre_servidor+" ("+scrapertools.htmlclean(idioma)+subtitulos+") (Calidad "+calidad.strip()+")" url = urlparse.urljoin(item.url,scrapedurl) thumbnail = "" plot = "" if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="play" , title=title , url=url, extra=item.url,folder=False)) return itemlist
def infosinopsis(item): logger.info("pelisalacarta.channels.pordede infosinopsis") url_aux = item.url.replace("/links/view/slug/", "/peli/").replace("/what/peli", "") # Descarga la pagina headers = DEFAULT_HEADERS[:] #headers.append(["Referer",item.extra]) #headers.append(["X-Requested-With","XMLHttpRequest"]) data = scrapertools.cache_page(url_aux,headers=headers) if (DEBUG): logger.info("data="+data) scrapedtitle = scrapertools.find_single_match(data,'<h1>([^<]+)</h1>') scrapedvalue = scrapertools.find_single_match(data,'<span class="puntuationValue" data-value="([^"]+)"') scrapedyear = scrapertools.find_single_match(data,'<h2 class="info">[^<]+</h2>\s*<p class="info">([^<]+)</p>') scrapedduration = scrapertools.find_single_match(data,'<h2 class="info">[^<]+</h2>\s*<p class="info">([^<]+)</p>', 1) scrapedplot = scrapertools.find_single_match(data,'<div class="info text"[^>]+>([^<]+)</div>') #scrapedthumbnail = scrapertools.find_single_match(data,'<meta property="og:image" content="([^"]+)"') #thumbnail = scrapedthumbnail.replace("http://www.pordede.comhttp://", "http://").replace("mediacover", "mediathumb") scrapedgenres = re.compile('href="/pelis/index/genre/[^"]+">([^<]+)</a>',re.DOTALL).findall(data) scrapedcasting = re.compile('href="/star/[^"]+">([^<]+)</a><br/><span>([^<]+)</span>',re.DOTALL).findall(data) title = scrapertools.htmlclean(scrapedtitle) plot = "Año: [B]"+scrapedyear+"[/B]" plot += " , Duración: [B]"+scrapedduration+"[/B]" plot += " , Puntuación usuarios: [B]"+scrapedvalue+"[/B]" plot += "\nGéneros: "+", ".join(scrapedgenres) plot += "\n\nSinopsis:\n"+scrapertools.htmlclean(scrapedplot) plot += "\n\nCasting:\n" for actor,papel in scrapedcasting: plot += actor+" ("+papel+"). " tbd = TextBox("DialogTextViewer.xml", os.getcwd(), "Default") tbd.ask(title, plot) del tbd return
def peliculas(item): logger.info("[italiafilm.py] peliculas") itemlist = [] data = scrapertools.cachePage(item.url) patron = '<article(.*?)</article>' matches = re.compile(patron,re.DOTALL).findall(data) for match in matches: title = scrapertools.find_single_match(match,'<h3[^<]+<a href="[^"]+"[^<]+>([^<]+)</a>') title = scrapertools.htmlclean(title).strip() url = scrapertools.find_single_match(match,'<h3[^<]+<a href="([^"]+)"') plot = scrapertools.find_single_match(match,'<p class="summary">(.*?)</p>') plot = scrapertools.htmlclean(plot).strip() thumbnail = scrapertools.find_single_match(match,'data-echo="([^"]+)"') if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action='findvideos', title=title , url=url , thumbnail=thumbnail , fanart=thumbnail, plot=plot , viewmode="movie_with_plot", folder=True) ) # Siguiente try: pagina_siguiente = scrapertools.get_match(data,'<a class="next page-numbers" href="([^"]+)"') itemlist.append( Item(channel=__channel__, action="peliculas", title=">> Pagina seguente" , url=pagina_siguiente , folder=True) ) except: pass return itemlist
def findvideos(item): logger.info("pelisalacarta.channels.hdfull findvideos") itemlist=[] ## Carga estados status = jsontools.load_json(scrapertools.cache_page(host+'/a/status/all')) url_targets = item.url ## Vídeos if "###" in item.url: id = item.url.split("###")[1].split(";")[0] type = item.url.split("###")[1].split(";")[1] item.url = item.url.split("###")[0] if type == "2" and account and item.category != "Cine": title = bbcode_kodi2html(" ( [COLOR orange][B]Agregar a Favoritos[/B][/COLOR] )") if "Favorito" in item.title: title = bbcode_kodi2html(" ( [COLOR red][B]Quitar de Favoritos[/B][/COLOR] )") if (config.get_platform().startswith("xbmc") or config.get_platform().startswith("boxee")): title_label = bbcode_kodi2html(" ( [COLOR gray][B]" + item.show + "[/B][/COLOR] )") itemlist.append( Item( channel=__channel__, action="findvideos", title=title_label, fulltitle=title_label, url=url_targets, thumbnail=item.thumbnail, show=item.show, folder=False ) ) itemlist.append( Item( channel=__channel__, action="set_status", title=title, fulltitle=title, url=url_targets, thumbnail=item.thumbnail, show=item.show, folder=True ) ) data = agrupa_datos( scrapertools.cache_page(item.url) ) patron = '<div class="embed-selector"[^<]+' patron += '<h5 class="left"[^<]+' patron += '<span[^<]+<b class="key">\s*Idioma.\s*</b>([^<]+)</span[^<]+' patron += '<span[^<]+<b class="key">\s*Servidor.\s*</b><b[^>]+>([^<]+)</b[^<]+</span[^<]+' patron += '<span[^<]+<b class="key">\s*Calidad.\s*</b>([^<]+)</span[^<]+</h5.*?' patron += '<a href="(http[^"]+)".*?' patron += '</i>([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(data) for idioma,servername,calidad,url,opcion in matches: opcion = opcion.strip() if opcion != "Descargar": opcion = "Ver" title = opcion+": "+servername.strip()+" ("+calidad.strip()+")"+" ("+idioma.strip()+")" title = scrapertools.htmlclean(title) thumbnail = item.thumbnail plot = item.title+"\n\n"+scrapertools.find_single_match(data,'<meta property="og:description" content="([^"]+)"') plot = scrapertools.htmlclean(plot) fanart = scrapertools.find_single_match(data,'<div style="background-image.url. ([^\s]+)') url+= "###" + id + ";" + type itemlist.append( Item( channel=__channel__, action="play", title=title, fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, show=item.show, folder=True ) ) ## 2 = película if type == "2" and item.category != "Cine": ## STRM para todos los enlaces de servidores disponibles ## Si no existe el archivo STRM de la peícula muestra el item ">> Añadir a la biblioteca..." try: itemlist.extend( file_cine_library(item,url_targets) ) except: pass return itemlist
def peliculas(item): logger.info("[peliculasfull.py] peliculas") itemlist = [] data = scrapertools.cache_page(item.url) patron = "<div class='post bar hentry'>[^<]+<h2 class='post-title entry-title'>[^<]+<a href='([^']+)'>([^<]+)</a>.*?" patron += "<div><span style='color: #fec700;'>calidad : </span>([^<]+)</div>.*?" patron += '<div id=.*?summary.*?><div style="float: left; margin-bottom: 1em; margin-right: 1em;">[^<]+<a href="([^"]+)".*?target="_blank">.*?<span class="ficha">SINOPSIS : :</span>(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl, scrapedtitle,scrapedquality, scrapedthumbnail, scrapedplot in matches: url = scrapedurl scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedquality = scrapedquality.replace('\n','') title = scrapedtitle + " - Calidad:" + scrapedquality thumbnail = scrapedthumbnail plot = scrapertools.htmlclean(scrapedplot) # Añade al listado itemlist.append( Item(channel=__channel__, action="findvideos", title= title, url=url , thumbnail=thumbnail , plot=plot , folder=True) ) # EXTREA EL LINK DE LA SIGUIENTE PAGINA patron = "<a class='blog-pager-older-link'.*?href='([^']+)' id='Blog1_blog-pager-older-link' title='Next Post'>Next Movie" matches = re.compile(patron,re.DOTALL).findall(data) for match in matches: if len(matches) > 0: url = match url = scrapertools.htmlclean(url) title = ">> Página Siguiente " thumbnail = "" plot = "" # Añade al listado itemlist.append( Item(channel=__channel__, action="peliculas", title=title , url=url , thumbnail=thumbnail , plot=plot , folder=True) ) return itemlist
def findvideos(item): logger.info("[verseries.py] findvideos") itemlist=[] # Descarga la página data = scrapertools.cache_page(item.url) # Enlaces online patron = '<li class="lcc">[^<]+<a target="repro" rel="nofollow" href="([^"]+)"[^>]+>(.*?)</a>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl,scrapedtitle in matches: scrapedurl = urlparse.urljoin(item.url,scrapedurl) scrapedtitle = unicode(scrapedtitle,"iso-8859-1").encode("utf-8").strip() scrapedtitle = scrapertools.htmlclean(scrapedtitle) itemlist.append( Item(channel=item.channel, action="play", title="Online "+scrapedtitle , fulltitle=item.fulltitle , url=scrapedurl , thumbnail=item.thumbnail, category="series" , plot=item.plot, show=item.show) ) # Enlaces MU patron = '<li class="lcc">[^<]+<a rel="nofollow" target="blank" href="([^"]+)" class="lcc">(.*?)</a>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl,scrapedtitle in matches: scrapedurl = urlparse.urljoin(item.url,scrapedurl) scrapedtitle = unicode(scrapedtitle,"iso-8859-1").encode("utf-8").strip() scrapedtitle = scrapertools.htmlclean(scrapedtitle) itemlist.append( Item(channel=item.channel, action="play", title="Descarga "+scrapedtitle , fulltitle=item.fulltitle , url=scrapedurl , thumbnail=item.thumbnail, category="series" , plot=item.plot, show=item.show) ) return itemlist
def detalle_programa(item): logger.info("tvalacarta.channels.dwspan detalle_programa") id_programa = scrapertools.find_single_match(item.url,"programm=(\d+)") url = "http://www.dw.com/es/programa/a/s-"+id_programa+"-1" try: item.page = scrapertools.get_header_from_response(url,header_to_get="location") data = scrapertools.cache_page(item.page) item.plot = scrapertools.find_single_match(data,'<div class="longText">(.*?)</div>') item.plot = scrapertools.htmlclean( item.plot ).strip() if item.plot=="": item.plot = scrapertools.find_single_match(data,'<div class="news"[^<]+<h2[^<]+</h2>(.*?)</div>') item.plot = scrapertools.htmlclean( item.plot ).strip() item.thumbnail = scrapertools.find_single_match(data,'<input type="hidden" name="preview_image" value="([^"]+)"') if item.thumbnail.strip()=="": item.thumbnail = scrapertools.find_single_match(data,'<img class="stillImage" src="([^"]+)"') item.thumbnail = urlparse.urljoin(item.page,item.thumbnail) except: import traceback logger.info(traceback.format_exc()) return item
def findvideos(item): #show = item.title.replace("Añadir esta serie a la biblioteca de XBMC","") #logger.info("[megaspain.py] findvideos show "+ show) itemlist=[] data = scrapertools.cache_page(item.url) if 'thank_you_button'in data: item.url = item.url.replace("php?topic=","php?action=thankyou;topic=") item.url = item.url + item.plot data = scrapertools.cache_page(item.url) if 'MegaSpain' in data: patronimage = '<div class="inner" id="msg_\d{1,9}".*?<img src="([^"]+)".*?mega.co.nz/\#\![A-Za-z0-9\-\_]+\![A-Za-z0-9\-\_]+' matches = re.compile(patronimage,re.DOTALL).findall(data) if len(matches)>0: thumbnail = matches[0] thumbnail = scrapertools.htmlclean(thumbnail) thumbnail = unicode( thumbnail, "iso-8859-1" , errors="replace" ).encode("utf-8") item.thumbnail = thumbnail patronplot = '<div class="inner" id="msg_\d{1,9}".*?<img src="[^"]+"[^/]+/>(.*?)lgf_facebook_share' matches = re.compile(patronplot,re.DOTALL).findall(data) if len(matches)>0: plot = matches[0] title = item.title plot = re.sub(' ', '', plot) plot = re.sub('\s\s', '', plot) plot = scrapertools.htmlclean(plot) item.plot = "" from servers import servertools itemlist.extend(servertools.find_video_items(data=data)) for videoitem in itemlist: videoitem.channel=__channel__ videoitem.action="play" videoitem.folder=False videoitem.thumbnail=item.thumbnail videoitem.plot = item.plot videoitem.title = "["+videoitem.server+videoitem.title + " " + item.title #videoitem.show = show # if config.get_platform().startswith("xbmc") or config.get_platform().startswith("boxee"): # itemlist.append( Item(channel=item.channel, title=show + " Añadir esta serie a la biblioteca de XBMC", url=item.url, action="add_serie_to_library", extra="findvideos") ) return itemlist else: item.thumbnail = "" item.plot = "" from servers import servertools itemlist.extend(servertools.find_video_items(data=data)) for videoitem in itemlist: videoitem.channel=__channel__ videoitem.action="play" videoitem.folder=False videoitem.thumbnail=item.thumbnail videoitem.plot = item.plot videoitem.title = "["+videoitem.server+videoitem.title + " " + item.title return itemlist
def episodios(item): import urllib logger.info("tvalacarta.channels.xiptv episodios") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) data = scrapertools.find_single_match(data,'(<li[^<]+<div class="item">.*?<div class="pager">.*?</div>)') ''' <li> <div class="item"> <div class="image drop-shadow curved curved-hz-1 "> <a href="/la-setmana-catalunya-central/capitol/capitol-30"><img alt="Imatge_pgm30" src="/media/asset_publics/resources/000/180/341/video/imatge_pgm30.jpg?1396620287" /></a> </div> <div class="content"> <span class="date"> 04/04/2014 </span> <h4> <a href="/la-setmana-catalunya-central/capitol/capitol-30">Capítol 30</a> </h4> <p><h5><a href="/la-setmana-catalunya-central" target="_blank">La setmana Catalunya central</a> </h5> </p> <span class="duration">25:02</span> <span class="views">0 reproduccions</span> <p>Al llarg dels segle XIX el Seminari de Vic va anar forjant una col·lecció de Ciències Naturals que representa, a dia d’avui, un valuós testimoni històric. Al programa d’avui coneixerem el nou destí de les peces que integren aquesta col·lecció i quin és el seu nou destí: integrar-se al fons del Museu del Ter de Manlleu. En aquesta edició de ‘La S...</p> <div class="related"> ''' patron = '<li[^<]+<div class="item">(.*?)<div class="related">' matches = re.compile(patron,re.DOTALL).findall(data) for match in matches: fecha = scrapertools.find_single_match(match,'<span class="date">([^<]+)</span>').strip() duracion = scrapertools.find_single_match(match,'<span class="duration">([^<]+)</span>').strip() titulo_programa = scrapertools.find_single_match(match,'<p><h5><a[^>]+>([^<]+)</a>').strip() titulo_episodio = scrapertools.find_single_match(match,'<h4[^<]+<a[^>]+>([^<]+)</a>').strip() scrapedurl = scrapertools.find_single_match(match,'<h4[^<]+<a href="([^"]+)"') scrapedthumbnail = scrapertools.find_single_match(match,'<img alt="[^"]+" src="([^"]+)"') scrapedplot = scrapertools.find_single_match(match,'<p>([^<]+)</p>').strip() title = scrapertools.htmlclean(titulo_episodio) # + " (" + fecha + ") (" + duracion + ")") url = urlparse.urljoin( item.url , scrapedurl ) thumbnail = urlparse.urljoin( item.url , scrapedthumbnail ) plot = scrapertools.htmlclean(scrapedplot) if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, title=title , action="play" , server="xiptv", url=url, thumbnail=thumbnail, fanart=thumbnail, plot=plot , show=item.show , category = item.category , viewmode="movie_with_plot", folder=False) ) # Página siguiente patron = '<a href="([^"]+)">next</a>' matches = re.compile(patron,re.DOTALL).findall(data) for match in matches: itemlist.append( Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios" , extra="pager", url=urlparse.urljoin(item.url,match), show=item.show, folder=True) ) return itemlist
def episodios(item): logger.info("[seriespepito.py] list") # Descarga la página data = scrapertools.cache_page(item.url) # Completa plot y thumbnail item = detalle_programa(item,data) data = scrapertools.get_match(data,'<div class="accordion"(.*?)<div class="subtitulo">') logger.info(data) # Extrae los capítulos ''' <tbody> <tr> <td> <a class="asinenlaces" title=" 0x01 - Battlestar Galactica 2003 - Capitulo 1" href="http://battlestar-galactica-2003.seriespepito.com/temporada-0/capitulo-1/"> <i class="icon-film"></i> <strong>0x01</strong> - Battlestar Galactica 2003 - Capitulo 1 </a><button id="capvisto_121_0_1" class="btn btn-warning btn-mini sptt pull-right bcapvisto ctrl_over" data-tt_my="left center" data-tt_at="right center" data-tt_titulo="Marca del último capítulo visto" data-tt_texto="Este es el último capítulo que has visto de esta serie." data-id="121" data-tem="0" data-cap="1" type="button"><i class="icon-eye-open"></i></button></td></tr><tr><td><a title=" 0x02 - Battlestar Galactica 2003 - Capitulo 2" href="http://battlestar-galactica-2003.seriespepito.com/temporada-0/capitulo-2/"><i class="icon-film"></i> <strong>0x02</strong> - Battlestar Galactica 2003 - Capitulo 2 <span class="flag flag_0"></span></a><button id="capvisto_121_0_2" class="btn btn-warning btn-mini sptt pull-right bcapvisto ctrl_over" data-tt_my="left center" data-tt_at="right center" data-tt_titulo="Marca del último capítulo visto" data-tt_texto="Este es el último capítulo que has visto de esta serie." data-id="121" data-tem="0" data-cap="2" type="button"><i class="icon-eye-open"></i></button></td></tr></tbody> ''' patron = '<tr>' patron += '<td>' patron += '<a.*?href="([^"]+)"[^<]+' patron += '<i[^<]+</i[^<]+' patron += '<strong>([^<]+)</strong>' patron += '([^<]+)<(.*?)<button' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for scrapedurl,scrapedepisode,scrapedtitle,idiomas in matches: title = unicode( scrapedtitle.strip(), "iso-8859-1" , errors="replace" ).encode("utf-8") title = scrapertools.htmlclean(scrapedepisode) + " " + scrapertools.htmlclean(scrapedtitle).strip() #title = scrapertools.entityunescape(title) if "flag_0" in idiomas: title = title + " (Español)" if "flag_1" in idiomas: title = title + " (Latino)" if "flag_2" in idiomas: title = title + " (VO)" if "flag_3" in idiomas: title = title + " (VOS)" url = scrapedurl thumbnail = item.thumbnail plot = item.plot if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos" , title=title , url=url, thumbnail=thumbnail, plot=plot, show=item.show, viewmode="movie_with_plot")) if (config.get_platform().startswith("xbmc") or config.get_platform().startswith("boxee")) and len(itemlist)>0: itemlist.append( Item(channel=item.channel, title="Añadir esta serie a la biblioteca de XBMC", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show,fanart="http://pelisalacarta.mimediacenter.info/fanart/seriespepito.jpg")) itemlist.append( Item(channel=item.channel, title="Descargar todos los episodios de la serie", url=item.url, action="download_all_episodes", extra="episodios", show=item.show,fanart="http://pelisalacarta.mimediacenter.info/fanart/seriespepito.jpg")) return itemlist
def episodios_serie(item,data): logger.info("tvalacarta.cctvspan episodios_serie") ''' <div class="list_box" style="display:block"> <div class="lanmugwZ10483_con01" style="background:block;width:357;height:381;"> <div class="box03"> <div class="text"><A guid="07c3ff69b66f412287de977c6ac58fb5" onclick="loadvideo('07c3ff69b66f412287de977c6ac58fb5');" href="javascript:void(0);" name="">1</A><A guid="05bcab043fed4032a2194051a4dd1c47" onclick="loadvideo('05bcab043fed4032a2194051a4dd1c47');" href="javascript:void(0);" name="">2</A><A guid="4fa2cb1ff40043fcaa505eb24c0fd63d" onclick="loadvideo('4fa2cb1ff40043fcaa505eb24c0fd63d');" href="javascript:void(0);" name="">3</A><A guid="909a80cbe038443aa20511330ebaeb3d" onclick="loadvideo('909a80cbe038443aa20511330ebaeb3d');" href="javascript:void(0);" name="">4</A><A guid="620505788b7846bba209306c5f92123d" onclick="loadvideo('620505788b7846bba209306c5f92123d');" href="javascript:void(0);" name="">5</A><A guid="6d5c99daaa9548d7a4e89160a564c624" onclick="loadvideo('6d5c99daaa9548d7a4e89160a564c624');" href="javascript:void(0);" name="">6</A><A guid="a167205d4d7d44bb8a2be82f3d89c48f" onclick="loadvideo('a167205d4d7d44bb8a2be82f3d89c48f');" href="javascript:void(0);" name="">7</A><A guid="87d7b9b30f614ad885a2fe4ac4682ee8" onclick="loadvideo('87d7b9b30f614ad885a2fe4ac4682ee8');" href="javascript:void(0);" name="">8</A><A guid="49f9223f83434a87a3fa7b11c5ea7e7d" onclick="loadvideo('49f9223f83434a87a3fa7b11c5ea7e7d');" href="javascript:void(0);" name="">9</A><A guid="cb6ce3197da84d20b42c7592270fd1f1" onclick="loadvideo('cb6ce3197da84d20b42c7592270fd1f1');" href="javascript:void(0);" name="">10</A><A guid="08d2add80fce406291fb59fef0af49d5" onclick="loadvideo('08d2add80fce406291fb59fef0af49d5');" href="javascript:void(0);" name="">11</A><A guid="000ae1094a464c44b2c16ad9b6db9a09" onclick="loadvideo('000ae1094a464c44b2c16ad9b6db9a09');" href="javascript:void(0);" name="">12</A><A guid="398ef05cb8194044aa5acce3bda5bb01" onclick="loadvideo('398ef05cb8194044aa5acce3bda5bb01');" href="javascript:void(0);" name="">13</A><A guid="18a574abafcb492d8b1c440c0e03f0fe" onclick="loadvideo('18a574abafcb492d8b1c440c0e03f0fe');" href="javascript:void(0);" name="">14</A><A guid="ed4948e6783348598ff321f4490a277b" onclick="loadvideo('ed4948e6783348598ff321f4490a277b');" href="javascript:void(0);" name="">15</A><A guid="4dbe4c076ac8460496727bf7705248de" onclick="loadvideo('4dbe4c076ac8460496727bf7705248de');" href="javascript:void(0);" name="">16</A><A guid="05d3b39d808f4e2596606d4692b063ed" onclick="loadvideo('05d3b39d808f4e2596606d4692b063ed');" href="javascript:void(0);" name="">17</A><A guid="9253d2398e70406dbbbe3bed4085a2db" onclick="loadvideo('9253d2398e70406dbbbe3bed4085a2db');" href="javascript:void(0);" name="">18</A><A guid="bfbde6475dbf461cb2ae2e51544cdf0c" onclick="loadvideo('bfbde6475dbf461cb2ae2e51544cdf0c');" href="javascript:void(0);" name="">19</A><A guid="db7d3d3e526d46c9bae09911edab8529" onclick="loadvideo('db7d3d3e526d46c9bae09911edab8529');" href="javascript:void(0);" name="">20</A><A guid="849c52f0b3a949b8885a86336c8d550c" onclick="loadvideo('849c52f0b3a949b8885a86336c8d550c');" href="javascript:void(0);" name="">21</A><A guid="47b79bdea3fd4294b081366fb2462ecf" onclick="loadvideo('47b79bdea3fd4294b081366fb2462ecf');" href="javascript:void(0);" name="">22</A><A guid="3e97008f8c6f4f16a5d1b3d28ad24b02" onclick="loadvideo('3e97008f8c6f4f16a5d1b3d28ad24b02');" href="javascript:void(0);" name="">23</A><A guid="437bcf9eca72462b8d3e6034e2df2691" onclick="loadvideo('437bcf9eca72462b8d3e6034e2df2691');" href="javascript:void(0);" name="">24</A><A guid="d6122e3e570a45a48eb3aba3918d74a6" onclick="loadvideo('d6122e3e570a45a48eb3aba3918d74a6');" href="javascript:void(0);" name="">25</A></div> </div> </div> </div> </div> ''' # Descarga la pȧina #data = scrapertools.cachePage(item.url) bloque = scrapertools.find_single_match(data,'<div class="box03">(.*?)</div>') patron = '<A guid="([^"]+)"[^>]+>([^<]+)</A>' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for scrapedurl,scrapedtitle in matches: title = scrapertools.htmlclean(scrapedtitle) url = scrapedurl thumbnail = "" itemlist.append( Item(channel=__channel__, action="play", server="cntv", title=title , url=url , thumbnail=thumbnail, show=item.show, folder=False) ) if len(itemlist)>0: return itemlist ''' <li> <div class="image"><a href="http://espanol.cntv.cn/program/Telenovela/20130121/105582.shtml" target="_blank" title=""><img src="http://p3.img.cctvpic.com/fmspic/2013/01/21/c496264b7c4546e2a99f6bc33c7a8c86-180.jpg" width="106" alt="La contrasena de la felicidad Capítulo 27" /></a></div> <div class="text"> <a href="http://espanol.cntv.cn/program/Telenovela/20130121/105582.shtml" target="_blank" title="">La contrasena de la felicidad Capítulo 27</a> </div> </li> ''' bloque = scrapertools.find_single_match(data,'<div class="image_list_box"(.*?)</ul[^<]+</div') logger.info("bloque="+bloque) patron = '<a href="([^"]+)"[^<]+<img src="([^"]+)" width="[^"]+"\s+alt="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) for scrapedurl,scrapedthumbnail,scrapedtitle in matches: title = scrapertools.htmlclean(scrapedtitle) url = scrapedurl thumbnail = scrapedthumbnail itemlist.append( Item(channel=__channel__, action="play", server="cntv", title=title , url=url , thumbnail=thumbnail, show=item.show, folder=False) ) return itemlist
def findvideos(item): logger.info("pelisalacarta.channels.hdfull findvideos") itemlist=[] # Descarga la pagina item.url = item.url.replace("divxatope.com/descargar/","divxatope.com/ver-online/") ''' <div class="embed-selector" style="background-image: url('http://hdfull.tv/templates/hdfull/images/lat.png')" onclick="changeEmbed(29124,countdown);"> <h5 class="left"> <span> <b class="key"> Idioma: </b> Audio Latino </span> <span> <b class="key">Servidor:</b><b class="provider" style="background-image: url(http://www.google.com/s2/favicons?domain=powvideo.net)">Powvideo</b> </span> <span> <b class="key">Calidad: </b> HD720 </span> </h5> <ul class="filter action-buttons"> <li class="current right" style="float:right"> <a href="javascript:void(0);" onclick="reportMovie(29124)" class="danger" title="Reportar"><i class="icon-warning-sign icon-white"></i> </a> <a href="http://powvideo.net/q87l85llcifz" target="_blank"><i class="icon-share-alt icon-white"></i> Enlace externo</a> </li> </ul> </div> ''' # Descarga la pagina data = scrapertools.cachePage(item.url) patron = '<div class="embed-selector"[^<]+' patron += '<h5 class="left"[^<]+' patron += '<span[^<]+<b class="key">\s*Idioma.\s*</b>([^<]+)</span[^<]+' patron += '<span[^<]+<b class="key">\s*Servidor.\s*</b><b[^>]+>([^<]+)</b[^<]+</span[^<]+' patron += '<span[^<]+<b class="key">\s*Calidad.\s*</b>([^<]+)</span[^<]+</h5[^<]+' patron += '<ul class="filter action-buttons">(.*?)</ul>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for idioma,servername,calidad,bloqueurl in matches: title = "Mirror en "+servername.strip()+" ("+calidad.strip()+")"+" ("+idioma.strip()+")" title = scrapertools.htmlclean(title) url = bloqueurl thumbnail = item.thumbnail plot = item.title+"\n\n"+scrapertools.find_single_match(data,'<meta property="og:description" content="([^"]+)"') plot = scrapertools.htmlclean(plot) fanart = scrapertools.find_single_match(data,'<div style="background-image.url. ([^\s]+)') if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append(Item(channel=__channel__, action="play", title=title , fulltitle = title, url=url , thumbnail=thumbnail , plot=plot , fanart=fanart, folder=True, viewmode="movie_with_plot")) return itemlist
def peliculas(item): logger.info("pelisalacarta.channels.quebajamos peliculas") # Descarga la pagina data = scrapertools.cache_page(item.url) logger.info("data=" + data) # Extrae las entradas (carpetas) patron = '<div id="caja_portada" onmouseover=".overlib.\'' patron += "<div id=cucu><div class=head>(.*?)</div> <div class=sinopsis>(.*?)</div>.*?" patron += '<div class="wrap_img"[^<]+' patron += '<a href="([^"]+)"[^<]+' patron += '<img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedtitle, scrapedplot, scrapedurl, scrapedthumbnail in matches: title = scrapertools.htmlclean(unicode(scrapedtitle, "iso-8859-1", errors="replace").encode("utf-8")).strip() thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = scrapertools.htmlclean(unicode(scrapedplot, "iso-8859-1", errors="replace").encode("utf-8")).strip() url = urlparse.urljoin(item.url, scrapedurl) itemlist.append( Item( channel=__channel__, action="play", title=title, url=url, extra=item.url, thumbnail=thumbnail, plot=plot, viewmode="movie_with_plot", ) ) if DEBUG: logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") next_page = scrapertools.find_single_match( data, 'class="current"[^<]+</a[^<]+<a class="paginate" title="[^"]+" href="([^"]+)"' ) if next_page != "": itemlist.append( Item( channel=__channel__, action="peliculas", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page), folder=True, ) ) return itemlist
def listmirrors(item): logger.info("[yotix.py] listmirrors") title = item.title url = item.url thumbnail = item.thumbnail plot = item.plot itemlist = [] # Descarga la página de detalle data = scrapertools.cachePage(url) #logger.info(data) # Extrae el argumento patronvideos = '<div class="texto-sinopsis">(.*?)<div' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: plot = scrapertools.htmlclean(matches[0].strip()) # Extrae los enlaces si el video está en la misma página patron = 'so.addParam\(\'flashvars\',\'.*?file\=([^\&]+)\&' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: url = matches[0] newurl = findnewlocation(url) if newurl!="": url = newurl itemlist.append( Item(channel=CHANNELNAME, action="play" , title=title , url=url, thumbnail=thumbnail, plot=plot, server="Directo", folder=False)) # Extrae los enlaces a los vídeos (Megavídeo) patronvideos = '<a.*?href="(http://yotix.tv/flash/[^"]+)"[^>]*>(.*?)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Añade al listado de XBMC scrapedtitle = scrapertools.htmlclean(match[1].replace("–","-")).strip() scrapedurl = match[0] itemlist.append( Item(channel=CHANNELNAME, action="play" , title=title , url=url, thumbnail=thumbnail, plot=plot, server="Megavideo", folder=False)) # Extrae los enlaces a los vídeos (Directo) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/sitio/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/media/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/video/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/ver/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/rt/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/anime/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/gb/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/online/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/4s/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) return itemlist
def peliculas(item): logger.info("pelisalacarta.yaske peliculas") data = scrapertools.cache_page(item.url,headers=HEADER) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) # Extrae las entradas ''' <li class="item-movies c8"><a class="image-block" href="http://www.yaske.to/es/pelicula/0005346/ver-transformers-4-online.html" title="Transformers 4: La era de la extinción"><img src="http://www.yaske.to/upload/images/59481937cedbdd789cec00aab9f7ed8b.jpg" width="140" height="200" /></a><ul class="bottombox"><li title="Transformers 4: La era de la extinción"><a href="http://www.yaske.to/es/pelicula/0005346/ver-transformers-4-online.html" title="Transformers 4: La era de la extinción">Transformers 4: La…</a></li><li>Accion, ciencia Ficcion</li><li><img src='http://www.yaske.to/theme/01/data/images/flags/es_es.png' title='Spanish ' width='25'/> <img src='http://www.yaske.to/theme/01/data/images/flags/en_es.png' title='English SUB Spanish' width='25'/> <img src='http://www.yaske.to/theme/01/data/images/flags/la_la.png' title='Latino ' width='25'/> </li><li><a rel="lyteframe" rev="width: 600px; height: 380px; scrolling: no;" youtube="trailer" href="http://www.youtube.com/v/&hl&autoplay=1" target="_blank"><img src="http://2.bp.blogspot.com/-hj7moVFACQU/UBoi0HAFeyI/AAAAAAAAA9o/2I2KPisYtsk/s1600/vertrailer.png" height="22" border="0"></a></li></ul><div class="quality">Hd Real 720</div><div class="view"><span>view: 335482</span></div></li> ''' patron = '<li class="item-movies[^"]+"><div class="tooltipyk">' patron += '<a class="image-block" href="([^"]+)" title="([^"]+)">' patron += '<img src="([^"]+)"[^/]+/.*?' patron += '<ul class="bottombox">.*?<li>(<img.*?)</li>.*?</ul>' patron += '<div class="quality">([^<]+)</div>' matches = re.compile(patron,re.DOTALL).findall(data) logger.debug(repr(matches)) itemlist = [] for scrapedurl, scrapedtitle, scrapedthumbnail, idiomas, calidad in matches: patronidiomas = "<img src='[^']+' title='([^']+)'" matchesidiomas = re.compile(patronidiomas,re.DOTALL).findall(idiomas) idiomas_disponibles = "" for idioma in matchesidiomas: idiomas_disponibles = idiomas_disponibles + idioma.strip() + "/" if len(idiomas_disponibles)>0: idiomas_disponibles = "["+idiomas_disponibles[:-1]+"]" title = scrapedtitle.strip()+" "+idiomas_disponibles+"["+calidad+"]" title = scrapertools.htmlclean(title) contentTitle = scrapertools.htmlclean(scrapedtitle.strip()) url = scrapedurl thumbnail = scrapedthumbnail scrapedplot = "" itemlist.append( Item(channel=item.channel, action="findvideos", title=title , url=url , thumbnail=thumbnail , plot=scrapedplot , fulltitle=scrapertools.htmlclean(scrapedtitle.strip()), viewmode="movie", folder=True, hasContentDetails="true", contentTitle=contentTitle, contentThumbnail=thumbnail) ) # Extrae el paginador patronvideos = "<a href='([^']+)'>\»\;</a>" matches = re.compile(patronvideos,re.DOTALL).findall(data) if len(matches)>0: scrapedurl = urlparse.urljoin(item.url,matches[0]) itemlist.append( Item(channel=item.channel, action="peliculas", title=">> Página siguiente" , url=scrapedurl , folder=True) ) return itemlist
def programas(item): logger.info("[acbtv.py] programas") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) # Extrae programas """ <div class="caja-1"> <div class="contentCaja"> <p class="resumen-programa">¿Conoces la música popular de tu región? Podrás descubrirla en '<strong>El Templete</strong>', un programa donde bandas de música, rondas y rondallas serán las encargadas de enseñarnos lo mejor de nuestros pueblos con la música como hilo conductor. Conoceremos a sus componentes y nos colaremos en sus ensayos, para acabar acompañándoles en sus rondas, en sus pasacalles y en sus actuaciones.</p> <div></div> <h6><strong>Entretenimiento</strong></h6> <h5>Martes a las 20:30<br>en CMT</h5> <a href="detail.php?id=7623"><img src="http://media.rtvcm.es/media//0000048500/0000048975.jpg" alt="" width="150" height="124" /></a> <h3><a href="detail.php?id=7623">El Templete</a></h3> <h4></h4> </div> </div> """ patron = '<div class="caja-1"[^<]+' patron += '<div class="contentCaja"[^<]+' patron += '<p class="resumen-programa">(.*?)</p>.*?' patron += '<a href="([^"]+)"><img src="([^"]+)"[^<]+</a[^<]+' patron += '<h3><a href="[^"]+">([^<]+)</a></h3>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedplot, scrapedurl, scrapedthumbnail, scrapedtitle in matches: title = scrapedtitle.strip() title = scrapertools.htmlclean(title) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = scrapertools.htmlclean(scrapedplot).strip() itemlist.append( Item( channel=CHANNELNAME, title=title, url=url, thumbnail=thumbnail, action="episodios", show=item.title, extra="1", folder=True, ) ) return itemlist
def peliculas(item): logger.info("pelisalacarta.channels.quebajamos peliculas") # Descarga la pagina data = scrapertools.cache_page(item.url) logger.info("data="+data) # Extrae las entradas (carpetas) ''' <div id="caja_portada" onmouseover="{overlib('<div id=cucu><div class=head>Her (2013) 1080p ONLINE <font color=white>|</font> <font color=green> HD 5.1 </font> </div> <div class=sinopsis>La película nos sitúa en un futuro no muy lejano donde vive Theodore (Joaquin Phoenix), un hombre solitario que trabaja como escritor y que está pasando por las últimas etapas de un traumático divorcio. La vida de Theodore no es demasiado emocionante, cuando no está trabajando se pasa las horas jugando a videojuegos y, de vez en cuando, sale con sus amigos. Pero todo va a cambiar cuando el escritor decide adquirir un nuevo sistema operativo para su teléfono y su ordenador, y este sistema tiene como nombre Samantha (voz de Scarlett Johansson).</div> <div class=info><b>REPARTO:</b> Joaquin Phoenix, Scarlett Johansson, Rooney Mara, Amy Adams, Olivia Wilde, Chris Pratt, Portia Doubleday, Sam Jaeger, Katherine Boecher, Kelly Sarah, Spike Jonze, Bill Hader, Kristen Wiig, Brian Cox <br /><b>GENERO:</b> Comedia, Drama, Ciencia ficción, Romance <br /><b>DURACION:</b> 126 min.</div></div>', WIDTH, 150, DELAY, 100);}" onmouseout="return nd();"> <div class='hd_icon'></div> <div class="wrap_img"> <a href="/stream/7734"> <img src="http://image.tmdb.org/t/p/w185/zu5oyq47nMyz6JNA2SJJT1eOeyR.jpg" /> </a> </div> <div class="info"> <p class="titulo">Her (2013) 1080p ...</p> <p class="info">Categoria: <span>PeliculasHD</span></p> <p class="info">Calidad: <span> <font style="color:#0e3714">HD 5.1</font></span></p> <p class="info">Visto <span>30 veces</span></p> </div> </div> ''' patron = '<div id="caja_portada" onmouseover=".overlib.\'' patron += '<div id=cucu><div class=head>(.*?)</div> <div class=sinopsis>(.*?)</div>.*?' patron += '<div class="wrap_img"[^<]+' patron += '<a href="([^"]+)"[^<]+' patron += '<img src="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) itemlist = [] for scrapedtitle,scrapedplot,scrapedurl,scrapedthumbnail in matches: title = scrapertools.htmlclean(unicode( scrapedtitle, "iso-8859-1" , errors="replace" ).encode("utf-8")).strip() thumbnail = urlparse.urljoin(item.url,scrapedthumbnail) plot = scrapertools.htmlclean(unicode( scrapedplot, "iso-8859-1" , errors="replace" ).encode("utf-8")).strip() url = urlparse.urljoin(item.url,scrapedurl) itemlist.append( Item(channel=__channel__, action="play" , title=title , url=url, extra=item.url, thumbnail=thumbnail, plot=plot, viewmode="movie_with_plot")) if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") next_page = scrapertools.find_single_match(data,'class="current"[^<]+</a[^<]+<a class="paginate" title="[^"]+" href="([^"]+)"') if next_page!="": itemlist.append( Item(channel=__channel__, action="peliculas" , title=">> Página siguiente" , url=urlparse.urljoin(item.url,next_page), folder=True)) return itemlist
def peliculas(item): logger.info("[peliculaspepito.py] novedades") # Descarga la página data = scrapertools.cachePage(item.url) data = scrapertools.get_match(data,'<ul class="lista_peliculas">(.*?)</ul>') ''' <li><a class="tilcelpel" title="Emperador" href="http://emperador.peliculaspepito.com/"><img id="img_13266" data-id="13266" alt="Emperador" src="http://s.peliculaspepito.com/peliculas/13266-emperador-thumb.jpg" /></a><div class="pfestrenoportada"><span class="text-warning">07-03-2014</span></div><div id="imgtilinfo13266" class="til_info"><p><a title="Emperador" href="http://emperador.peliculaspepito.com/">Emperador</a></p><p class="pcalidi"><span class="flag flag_0"></span></p><p class="pidilis">DVD/BR Screener</p></div><a title="Emperador" href="http://emperador.peliculaspepito.com/"><div data-id="13266" id="til_info_sensor13266" data-on="0" data-an="0" class="til_info_sensor"></div></a></li> ''' patron = '<li[^<]+' patron += '<a class="tilcelpel" title="([^"]+)" href="([^"]+)"[^<]+' patron += '<img id="[^"]+" data-id="[^"]+" alt="[^"]+" src="([^"]+)"[^>]+>.*?' patron += '<p class="pcalidi"><span class="([^"]+)"></span></p><p class="pidilis">([^<]+)</p>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for scrapedtitle,scrapedurl,scrapedthumbnail,idiomas,calidad in matches: logger.info("title="+scrapedtitle) title = scrapertools.htmlclean(scrapedtitle).strip() title = title.replace("\r","").replace("\n","") #title = unicode( title, "iso-8859-1" , errors="replace" ).encode("utf-8") #title = re.compile("\s+",re.DOTALL).sub(" ",title) logger.info("title="+title) calidad = scrapertools.htmlclean(calidad).strip() if "flag_0" in idiomas: title = title + " (Español)" if "flag_1" in idiomas: title = title + " (Latino)" if "flag_2" in idiomas: title = title + " (VO)" if "flag_3" in idiomas: title = title + " (VOS)" title = title + "["+calidad+"]" url = scrapedurl thumbnail = scrapedthumbnail plot = "" plot = unicode( plot, "iso-8859-1" , errors="replace" ).encode("utf-8") if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos" , title=title , url=url, thumbnail=thumbnail, plot=plot, show=title, viewmode="movie")) return itemlist
def siguientes(item): logger.info("pelisalacarta.channels.pordede siguientes") # Descarga la pagina headers = DEFAULT_HEADERS[:] #headers.append(["Referer",item.extra]) headers.append(["X-Requested-With","XMLHttpRequest"]) data = scrapertools.cache_page(item.url,headers=headers) if (DEBUG): logger.info("data="+data) # Extrae las entradas (carpetas) json_object = jsontools.load_json(data) if (DEBUG): logger.info("html2="+json_object["html"]) data = json_object["html"] patron = '' patron += '<div class="coverMini shadow tiptip" title="([^"]+)">[^<]+' patron += '<img class="centeredPic centeredPicFalse" onerror="[^"]+" src="([^"]+)"[^<]+' patron += '<img src="/images/loading-mini.gif" class="loader"/>[^<]+' patron += '<div class="extra-info"><span class="year">[^<]+' patron += '</span><span class="value"><i class="icon-star"></i>[^<]+' patron += '</span></div>[^<]+' patron += '</div>[^<]+' patron += '</a>[^<]+' patron += '<a class="userepiinfo defaultLink" href="([^"]+)">(\d+)x(\d+)' matches = re.compile(patron,re.DOTALL).findall(data) itemlist = [] #for scrapedurl,scrapedtitle,scrapedthumbnail in matches: for scrapedtitle,scrapedthumbnail,scrapedurl,scrapedsession,scrapedepisode in matches: title = scrapertools.htmlclean(scrapedtitle) session = scrapertools.htmlclean(scrapedsession) episode = scrapertools.htmlclean(scrapedepisode) thumbnail = urlparse.urljoin(item.url,scrapedthumbnail) fanart = thumbnail.replace("mediathumb","mediabigcover") plot = "" title = session + "x" + episode + " - " + title #http://www.pordede.com/peli/the-lego-movie #http://www.pordede.com/links/view/slug/the-lego-movie/what/peli?popup=1 referer = urlparse.urljoin(item.url,scrapedurl) url = referer #itemlist.append( Item(channel=item.channel, action="episodios" , title=title , url=url, thumbnail=thumbnail, plot=plot, fulltitle=title, show=title)) itemlist.append( Item(channel=item.channel, action="episodio" , title=title , url=url, thumbnail=thumbnail, plot=plot, fulltitle=title, show=title, fanart=fanart, extra=session+"|"+episode)) if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") return itemlist
def lomasvisto(item): logger.info("[seriespepito.py] lomasvisto") # Descarga la página data = scrapertools.cachePage(item.url) data = scrapertools.get_match(data,'s visto de esta semana en Pepito</div><ul(.*?)</ul>') patron = '<a title="([^"]+)" href="([^"]+)"[^<]+' patron += '<img.*?src="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for scrapedtitle,scrapedurl,scrapedthumbnail in matches: logger.info("title="+scrapedtitle) title = scrapertools.htmlclean(scrapedtitle).strip() title = title.replace("\r","").replace("\n","") #title = unicode( title, "iso-8859-1" , errors="replace" ).encode("utf-8") title = re.compile("\s+",re.DOTALL).sub(" ",title) logger.info("title="+title) url = scrapedurl thumbnail = scrapedthumbnail plot = "" plot = unicode( plot, "iso-8859-1" , errors="replace" ).encode("utf-8") if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="episodios" , title=title , url=url, thumbnail=thumbnail, plot=plot, show=title, viewmode="movie", fanart="http://pelisalacarta.mimediacenter.info/fanart/seriespepito.jpg")) return itemlist
def mainlist(item): logger.info("tvalacarta.channels.telemundo mainlist") itemlist = [] ''' <div class="grid-collection-item--MAIN"> <a href="http://www.telemundo.com/novelas/el-fantasma-de-elena/videos" class="grid-collection-item--link"> <span class="grid-collection-item--aspect-ratio-412x137"> <img class="grid-collection-item--image" data-original="http://www.telemundo.com/sites/nbcutelemundo/files/styles/show_brand_grid/public/sites/nbcutelemundo/files/images/tv_show/06_ElFantasmaDeElena_596x200_2.jpg?itok=blnz-UOw" width="412" height="137" /><noscript><img class="grid-collection-item--image" src="http://www.telemundo.com/sites/nbcutelemundo/files/styles/show_brand_grid/public/sites/nbcutelemundo/files/images/tv_show/06_ElFantasmaDeElena_596x200_2.jpg?itok=blnz-UOw" width="412" height="137" /></noscript> </span> <span class="grid-collection-item--name">El Fantasma de Elena</span> </a> </div> ''' data = scrapertools.cachePage("http://msnlatino.telemundo.com/videos/allprograms/") patron = '<div class="grid-collec[^<]+' patron += '[^0-9]+' patron += '<a href="([^"]+)"[^<]+' patron += '<span[^<]+' patron += '<img class="[^"]+"[^0-9]+data-original="([^"]+)"[^<]+' patron += '<noscript><img[^<]+<\/noscript[^<]+<\/span>[^<]+' patron += '<span class="[^"]+">([^<]+)<\/span>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedthumbnail,scrapedtitle in matches: title = scrapertools.htmlclean(scrapedtitle) thumbnail = scrapedthumbnail url = scrapedurl.replace("videos","capitulos") itemlist.append( Item(channel=__channel__, action="episodios", title=title, url=url, thumbnail=thumbnail, viewmode="movie", folder=True)) return itemlist
def detalle_programa(item,data=""): logger.info("[shurweb.py] detalle_programa") # Descarga la página url = item.url if data=="": data = scrapertools.cache_page(url) # Obtiene el thumbnail try: item.thumbnail = scrapertools.get_match(data,'<div class="serie_thumb"><img src="([^"]+)"/>') except: pass plot = scrapertools.get_match(data,'<div class="synopsis clearfix">(.*?)</div>') plot = re.compile("<strong>Idiom[^<]+</strong>[^<]+<br />",re.DOTALL).sub("",plot) plot = re.compile("<strong>Calid[^<]+</strong>[^<]+<br />",re.DOTALL).sub("",plot) plot = re.compile("Sinopsis\:",re.DOTALL).sub("",plot) item.plot = scrapertools.htmlclean(plot).strip() try: item.title = scrapertools.get_match(data,'<h1 class="cat_head">([^<]+)</h1>').strip() except: pass return item
def episodios(item): logger.info("[mundonick.py] episodios") data = scrapertools.cachePage(item.url) #logger.info(data) #<a href=""><img class="linkImgTurbo" src="/shared/media/images/shows/l/legend_of_korra/101_3_82x55.jpg" alt="" title=""></a><a href="/nickturbo/?gid=2418&cid=1696825&vid=853875"><img class="linkImgTurbo" src="images/thumbsLitleFrame.png" alt="Legend Of Korra | Episodio 01" title="Legend Of Korra | Episodio 01" patron = '<a href=""><img class="linkImgTurbo" src="([^"]+)" alt="" title=""></a><a href="([^"]+)"><img class="linkImgTurbo" src="images/thumbsLitleFrame.png" alt="(.*?)" title="(.*?)"' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] #video_urls = [] for match in matches: scrapedurl = match[1].split('=')[3]; scrapedthumbnail = __urlbase__ + match[0] scrapedtitle = scrapertools.htmlclean(match[3]).decode('iso-8859-1').encode("utf8","ignore") itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, folder=False) ) #video_urls.append(get_video_url_internal(scrapedurl )) #for video_url in video_urls: # logger.info(str(video_url)) return itemlist
def buscar(item): keyboard = xbmc.Keyboard() keyboard.doModal() busqueda=keyboard.getText() data = scrapertools.cachePage("http://www.peliculaspepito.com/buscador/" + busqueda + "/") data = scrapertools.get_match(data,'<ul class="lp">(.*?)</ul>') patron = '<li>' patron += '<a.*?href="([^"]+)">' patron += '<img.*?alt="([^"]+)" src="([^"]+)"[^>]+>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for scrapedurl,scrapedtitle,scrapedthumbnail in matches: logger.info("title="+scrapedtitle) title = scrapertools.htmlclean(scrapedtitle).strip() title = title.replace("\r","").replace("\n","") #title = unicode( title, "iso-8859-1" , errors="replace" ).encode("utf-8") title = re.compile("\s+",re.DOTALL).sub(" ",title) logger.info("title="+title) url = scrapedurl thumbnail = scrapedthumbnail plot = "" plot = unicode( plot, "iso-8859-1" , errors="replace" ).encode("utf-8") if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos" , title=title , url=url, thumbnail=thumbnail, plot=plot, show=title, viewmode="movie", fanart="http://pelisalacarta.mimediacenter.info/fanart/seriespepito.jpg")) return itemlist
def episodios(item): logger.info("[vertelenovelas.py] episodios") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) data = scrapertools.get_match(data, '<ul class="anime_episodios"(.*?)</ul>') # <li><a href="ver/rafaela-119.html">Capitulo 119</a></li> patron = '<li><a href="([^"]+)">([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: title = scrapertools.htmlclean(scrapedtitle) plot = "" thumbnail = "" url = urlparse.urljoin(item.url, scrapedurl) if DEBUG: logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item( channel=__channel__, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, folder=True, ) ) return itemlist
def search(item, texto): logger.info() itemlist = [] item.url = urlparse.urljoin(HOST, "search_suggest") texto = texto.replace(" ", "+") post = "value=%s" % texto data = httptools.downloadpage(item.url, post=post).data dict_data = jsontools.load_json(data) for e in dict_data: title = clean_title(scrapertools.htmlclean(e["name"])) url = e["url"] plot = e["description"] thumbnail = HOST + e["thumb"] new_item = item.clone(action="episodios", title=title, url=url, plot=plot, thumbnail=thumbnail) if "Pelicula" in e["genre"]: new_item.contentType = "movie" new_item.contentTitle = title else: new_item.show = title new_item.context = renumbertools.context itemlist.append(new_item) return itemlist
def listado_busqueda(item): logger.info() itemlist = [] data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(item.url, post=item.post).data) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") list_chars = [["ñ", "ñ"]] for el in list_chars: data = re.sub(r"%s" % el[0], el[1], data) try: get, post = scrapertools.find_single_match(data, '<ul class="pagination">.*?<a class="current" href.*?' '<a\s*href="([^"]+)"(?:\s*onClick=".*?\'([^"]+)\'.*?")') except: post = False if post: if "pg" in item.post: item.post = re.sub(r"pg=(\d+)", "pg=%s" % post, item.post) else: item.post += "&pg=%s" % post pattern = '<ul class="%s">(.*?)</ul>' % item.pattern data = scrapertools.get_match(data, pattern) pattern = '<li[^>]*><a href="(?P<url>[^"]+).*?<img.*?src="(?P<thumb>[^"]+)?".*?<h2.*?>(?P<title>.*?)?<\/h2>' matches = re.compile(pattern, re.DOTALL).findall(data) for url, thumb, title in matches: real_title = scrapertools.find_single_match(title, r'<strong.*?>(.*?)Temporada.*?<\/strong>') #series if real_title == "": real_title = scrapertools.find_single_match(title, r'(.*?)\[.*?]') #movies real_title = scrapertools.remove_htmltags(real_title).decode('iso-8859-1').encode('utf-8') real_title = scrapertools.htmlclean(real_title) calidad = scrapertools.find_single_match(title, r'.*?\s*Calidad.*?<span[^>]+>[\[]\s*(?P<quality>.*?)\s*[\]]<\/span>') #series if calidad == "": calidad = scrapertools.find_single_match(title, r'..*?(\[.*?.*\])') #movies year = scrapertools.find_single_match(thumb, r'-(\d{4})') # fix encoding for title title = scrapertools.htmlclean(title) title = title.replace("�", "ñ").replace("Temp", " Temp").replace("Esp", " Esp").replace("Ing", " Ing").replace("Eng", " Eng") title = re.sub(r'(Calidad.*?\])', '', title) if real_title == "": real_title = title if calidad == "": calidad = title context = "movie" url_real = True # no mostramos lo que no sean videos if "juego/" in url: continue # Codigo para rescatar lo que se puede en pelisy.series.com de Series para la Videoteca. la URL apunta al capítulo y no a la Serie. Nombre de Serie frecuentemente en blanco. Se obtiene de Thumb, así como el id de la serie if ("/serie" in url or "-serie" in url) and "pelisyseries.com" in host: calidad_mps = "series/" if "seriehd" in url: calidad_mps = "series-hd/" if "serievo" in url: calidad_mps = "series-vo/" if "serie-vo" in url: calidad_mps = "series-vo/" real_title_mps = re.sub(r'.*?\/\d+_', '', thumb) real_title_mps = re.sub(r'\.\w+.*?', '', real_title_mps) if "/0_" not in thumb: serieid = scrapertools.find_single_match(thumb, r'.*?\/\w\/(?P<serieid>\d+).*?.*') if len(serieid) > 5: serieid = "" else: serieid = "" #detectar si la url creada de tvshow es válida o hay que volver atras url_tvshow = host + calidad_mps + real_title_mps + "/" url_id = host + calidad_mps + real_title_mps + "/" + serieid data_serie = data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(url_id).data) data_serie = unicode(data_serie, "iso-8859-1", errors="replace").encode("utf-8") data_serie = data_serie.replace("chapters", "buscar-list") pattern = '<ul class="%s">(.*?)</ul>' % "buscar-list" # item.pattern if not scrapertools.find_single_match(data_serie, pattern): data_serie = data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(url_tvshow).data) data_serie = unicode(data_serie, "iso-8859-1", errors="replace").encode("utf-8") data_serie = data_serie.replace("chapters", "buscar-list") if not scrapertools.find_single_match(data_serie, pattern): context = "movie" url_real = False if not config.get_setting("unify"): #Si Titulos Inteligentes NO seleccionados: if calidad: title = title + '[' + calidad + "]" else: url = url_tvshow else: url = url_id real_title_mps = real_title_mps.replace("-", " ") logger.debug("url: " + url + " / title: " + title + " / real_title: " + real_title + " / real_title_mps: " + real_title_mps + " / calidad_mps : " + calidad_mps + " / context : " + context) real_title = real_title_mps show = real_title if ".com/serie" in url and "/miniseries" not in url and url_real: if not config.get_setting("unify"): #Si Titulos Inteligentes NO seleccionados: if calidad: title = title + '[' + calidad + "]" context = "tvshow" itemlist.append(Item(channel=item.channel, action="episodios", title=title, url=url, thumbnail=thumb, quality=calidad, show=show, extra="serie", context=["buscar_trailer"], contentType=context, contentTitle=real_title, contentSerieName=real_title, infoLabels= {'year':year})) else: if config.get_setting("unify"): #Si Titulos Inteligentes SI seleccionados: title = real_title itemlist.append(Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumb, quality=calidad, show=show, context=["buscar_trailer"], contentType=context, contentTitle=real_title, infoLabels= {'year':year})) logger.debug("url: " + url + " / title: " + title + " / real_title: " + real_title + " / show: " + show + " / calidad: " + calidad) tmdb.set_infoLabels(itemlist, True) if post: itemlist.append(item.clone(channel=item.channel, action="listado_busqueda", title=">> Página siguiente", text_color='yellow', text_bold=True, thumbnail=get_thumb("next.png"))) return itemlist
def findvideos(item): logger.info() itemlist = [] # Descarga la pagina data = httptools.downloadpage(item.url).data item.plot = scrapertools.find_single_match( data, '<div class="post-entry" style="height:300px;">(.*?)</div>') item.plot = scrapertools.htmlclean(item.plot).strip() item.contentPlot = item.plot link = scrapertools.find_single_match( data, 'href="http://(?:tumejorserie|tumejorjuego).*?link=([^"]+)"') if link != "": link = "http://www.divxatope1.com/" + link logger.info("torrent=" + link) itemlist.append( Item(channel=item.channel, action="play", server="torrent", title="Vídeo en torrent", fulltitle=item.title, url=link, thumbnail=servertools.guess_server_thumbnail("torrent"), plot=item.plot, folder=False, parentContent=item)) patron = "<div class=\"box1\"[^<]+<img[^<]+</div[^<]+" patron += '<div class="box2">([^<]+)</div[^<]+' patron += '<div class="box3">([^<]+)</div[^<]+' patron += '<div class="box4">([^<]+)</div[^<]+' patron += '<div class="box5">(.*?)</div[^<]+' patron += '<div class="box6">([^<]+)<' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist_ver = [] itemlist_descargar = [] for servername, idioma, calidad, scrapedurl, comentarios in matches: title = "Mirror en " + servername + " (" + calidad + ")" + " (" + idioma + ")" servername = servername.replace("uploaded", "uploadedto").replace( "1fichier", "onefichier") if comentarios.strip() != "": title = title + " (" + comentarios.strip() + ")" url = urlparse.urljoin(item.url, scrapedurl) mostrar_server = servertools.is_server_enabled(servername) if mostrar_server: thumbnail = servertools.guess_server_thumbnail(title) plot = "" logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") action = "play" if "partes" in title: action = "extract_url" new_item = Item(channel=item.channel, action=action, title=title, fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, parentContent=item) if comentarios.startswith("Ver en"): itemlist_ver.append(new_item) else: itemlist_descargar.append(new_item) for new_item in itemlist_ver: itemlist.append(new_item) for new_item in itemlist_descargar: itemlist.append(new_item) return itemlist
def lista(item): logger.info() itemlist = [] # Descarga la pagina data = httptools.downloadpage(item.url, post=item.extra).data # logger.info("data="+data) bloque = scrapertools.find_single_match( data, '(?:<ul class="pelilist">|<ul class="buscar-list">)(.*?)</ul>') patron = '<li[^<]+' patron += '<a href="([^"]+)".*?' patron += 'src="([^"]+)".*?' patron += '<h2[^>]*>(.*?)</h2.*?' patron += '(?:<strong[^>]*>|<span[^>]*>)(.*?)(?:</strong>|</span>)' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for scrapedurl, scrapedthumbnail, scrapedtitle, calidad in matches: scrapedtitle = scrapertools.htmlclean(scrapedtitle) title = scrapedtitle.strip() if scrapertools.htmlclean(calidad): title += " (" + scrapertools.htmlclean(calidad) + ")" url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = "" logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") contentTitle = scrapertools.htmlclean(scrapedtitle).strip() patron = '([^<]+)<br>' matches = re.compile(patron, re.DOTALL).findall(calidad + '<br>') idioma = '' if "divxatope1.com/serie" in url: contentTitle = re.sub('\s+-|\.{3}$', '', contentTitle) capitulo = '' temporada = 0 episodio = 0 if len(matches) == 3: calidad = matches[0].strip() idioma = matches[1].strip() capitulo = matches[2].replace('Cap', 'x').replace('Temp', '').replace( ' ', '') temporada, episodio = capitulo.strip().split('x') itemlist.append( Item(channel=item.channel, action="episodios", title=title, fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, folder=True, contentTitle=contentTitle, language=idioma, contentSeason=int(temporada), contentEpisodeNumber=int(episodio), contentQuality=calidad)) else: if len(matches) == 2: calidad = matches[0].strip() idioma = matches[1].strip() itemlist.append( Item(channel=item.channel, action="findvideos", title=title, fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, folder=True, contentTitle=contentTitle, language=idioma, contentThumbnail=thumbnail, contentQuality=calidad)) next_page_url = scrapertools.find_single_match( data, '<li><a href="([^"]+)">Next</a></li>') if next_page_url != "": itemlist.append( Item(channel=item.channel, action="lista", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page_url), folder=True)) else: next_page_url = scrapertools.find_single_match( data, '<li><input type="button" class="btn-submit" value="Siguiente" onClick="paginar..(\d+)' ) if next_page_url != "": itemlist.append( Item(channel=item.channel, action="lista", title=">> Página siguiente", url=item.url, extra=item.extra + "&pg=" + next_page_url, folder=True)) return itemlist
def series(item): logger.info("pelisalacarta.channels.reyanime series") # Descarga la pagina data = scrapertools.cache_page(item.url) logger.info("data=" + data) # Extrae las entradas ''' <a href="/anime/akane-iro-ni-somaru-saka/"> <div class="anim-list"> <div id="a49" class="anim-sinop-estilos-iz"> <div class="anim-sinopsis-dos-iz"> <div class="anim-list-genr-iz"> comedia , drama , ecchi , recuentos de la vida , romance </div> <div class="line-title"></div> Juunichi es un joven estudiante con una vida escolar muy normal junto a sus amigos y amigas en la escuela. Sin embargo, cierto día, una chica nueva llega transferida a su salón y se presenta como su "prometida". Juunichi, que no sabe nada de esto, discute con ella acerca de lo que ha dicho y, fin... </div><div class="anim-sinopsis-uno-iz"></div> </div> <!-- test --> <img onmousemove="MM_showHideLayers('a49','','show')" onmouseout="MM_showHideLayers('a49','','hide')" src="/media/cache/8e/e0/8ee04c67c17286efb07a771d48beae76.jpg" width="131" height="" class="img-til"/> <div onmousemove="MM_showHideLayers('a49','','show')" onmouseout="MM_showHideLayers('a49','','hide')" class="anime-titulo"> Akane Iro Ni Somaru Saka </div> </div> </a> ''' patron = '(<a href="[^"]+"[^<]+' patron += '<span[^<]+</span[^<]+' patron += '<div id="[^<]+<div[^<]+</div[^<]+<h5.*?</a)' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for match in matches: scrapedurl = scrapertools.find_single_match(match, '<a href="([^"]+)"') scrapedplot = scrapertools.find_single_match(match, '</h6>([^<]+)</div>') scrapedthumbnail = scrapertools.find_single_match( match, 'src="([^"]+)"') scrapedtitle = scrapertools.find_single_match( match, '<spa[^>]+>([^<]+)</spa') title = scrapedtitle.strip() url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = scrapertools.htmlclean(scrapedplot).strip() show = title if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action="episodios", title=title, url=url, thumbnail=thumbnail, plot=plot, show=show, fulltitle=title, fanart=thumbnail, viewmode="movies_with_plot", folder=True)) next_page = scrapertools.find_single_match( data, '<a href="([^"]+)" class="next">siguiente >>') if next_page != "": itemlist.append( Item(channel=item.channel, action="series", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page, viewmode="movie_with_plot"), folder=True)) return itemlist
def buscar(item): logger.info("pelisalacarta.channels.cinehanwer buscar") # Descarga la pagina headers = DEFAULT_HEADERS[:] headers.append(["Referer", item.extra]) headers.append(["X-Requested-With", "XMLHttpRequest"]) data = scrapertools.cache_page(item.url, headers=headers) logger.info("data=" + data) # Extrae las entradas (carpetas) ''' {"html":"\n\t<div class=\"ddItemContainer modelContainer\" data-model=\"serie\" data-id=\"13051\">\n\n\t\t<div data-action=\"status\" class=\"dropdownContainer desplegableAbstract\">\n\t\t\t<span><i class=\"icon-caret-down\"><\/i><\/span>\n\t\t\t<ul class=\"dropdown\">\n\t\t\t\t<li data-value=\"1\"><a href=\"#\"><i class=\"icon-check\"><\/i>Pendiente<\/a><\/li>\n\t\t\t\t<li data-value=\"2\"><a href=\"#\"><i class=\"icon-eye-open\"><\/i>Siguiendo<\/a><\/li> <li data-value=\"3\"><a href=\"#\"><i class=\"icon-eye-close\"><\/i>Finalizada<\/a><\/li>\n <li data-value=\"4\"><a href=\"#\"><i class=\"icon-heart\"><\/i>Favorita<\/a><\/li>\n <li data-value=\"tolist\"><a href=\"\/serie\/attack-on-titan\/addtolist\"><i class=\"icon-list\"><\/i>A\u00f1adir a lista<\/a><\/li>\n\t\t\t\t<li data-value=\"0\" class=\"cancel\"><a href=\"#\" style=\"color:#999;\"><i class=\"icon-remove\"><\/i>Cancelar<\/a><\/li>\n\t\t\t<\/ul>\n\t\t<\/div>\n\n <a class=\"defaultLink extended\" href=\"\/serie\/attack-on-titan\">\n\n\t\t\t<div class=\"coverMini shadow tiptip\" title=\"Ataque a los Titanes\">\n\n\n\t\t\t\t\t<img class=\"centeredPic centeredPicFalse\" onerror=\"this.src='\/images\/cover-notfound.png';\" src=\"\/content\/covers\/mediathumb-13051-5.png\"\/>\n\n <img src=\"\/images\/loading-mini.gif\" class=\"loader\"\/>\n\t\t\t<\/div>\n\t\t\t<span class=\"title\">Ataque a los Titanes<\/span>\n <\/a>\n\n \n\n\n \t<\/div>\n\n\n\t<div class=\"ddItemContainer modelContainer\" data-model=\"serie\" data-id=\"4901\">\n\n\t\t<div data-action=\"status\" class=\"dropdownContainer desplegableAbstract\">\n\t\t\t<span><i class=\"icon-caret-down\"><\/i><\/span>\n\t\t\t<ul class=\"dropdown\">\n\t\t\t\t<li data-value=\"1\"><a href=\"#\"><i class=\"icon-check\"><\/i>Pendiente<\/a><\/li>\n\t\t\t\t<li data-value=\"2\"><a href=\"#\"><i class=\"icon-eye-open\"><\/i>Siguiendo<\/a><\/li> <li data-value=\"3\"><a href=\"#\"><i class=\"icon-eye-close\"><\/i>Finalizada<\/a><\/li>\n <li data-value=\"4\"><a href=\"#\"><i class=\"icon-heart\"><\/i>Favorita<\/a><\/li>\n <li data-value=\"tolist\"><a href=\"\/serie\/huntik-secrets-&-seekers\/addtolist\"><i class=\"icon-list\"><\/i>A\u00f1adir a lista<\/a><\/li>\n\t\t\t\t<li data-value=\"0\" class=\"cancel\"><a href=\"#\" style=\"color:#999;\"><i class=\"icon-remove\"><\/i>Cancelar<\/a><\/li>\n\t\t\t<\/ul>\n\t\t<\/div>\n\n <a class=\"defaultLink extended\" href=\"\/serie\/huntik-secrets-&-seekers\">\n\n\t\t\t<div class=\"coverMini shadow tiptip\" title=\"Huntik: Secrets & Seekers\">\n\n\n\t\t\t\t\t<img class=\"centeredPic centeredPicFalse\" onerror=\"this.src='\/images\/cover-notfound.png';\" src=\"\/content\/covers\/mediathumb-4901-5.png\"\/>\n\n <img src=\"\/images\/loading-mini.gif\" class=\"loader\"\/>\n\t\t\t<\/div>\n\t\t\t<span class=\"title\">Huntik: Secrets & Seekers<\/span>\n <\/a>\n\n \n\n\n \t<\/div>\n\n<div class=\"loadingBar\" data-url=\"\/series\/loadmedia\/offset\/30\/showlist\/all\/years\/1950\/query\/titanes\/on\/undefined\">\n <span class=\"text\">Cargar m\u00e1s <\/span><i class=\"icon-caret-down text\"><\/i>\n <img src=\"\/images\/loading.gif\">\n<\/div>","ready":"\n\t\tcontroller.userStatus(\"serie\", \"13051\", \"0\");\n\t\n\t\tcontroller.userStatus(\"serie\", \"4901\", \"0\");\n\t","error":"","title":"cinehanwer.us - Search Series - cinehanwer.us","data":[],"facets":"<a class=\"mediaFilterLink active\" data-value=\"0\" href=\"\/series\">Todos<\/a><a class=\"mediaFilterLink\" data-value=\"action and adventure\" href=\"\/series\/index\/genre\/action+and+adventure\">Acci\u00f3n y Aventura <span class=\"num\">(2)<\/span><\/a><a class=\"mediaFilterLink\" data-value=\"animation\" href=\"\/series\/index\/genre\/animation\">Animaci\u00f3n <span class=\"num\">(2)<\/span><\/a><a class=\"mediaFilterLink\" data-value=\"drama\" href=\"\/series\/index\/genre\/drama\">Drama <span class=\"num\">(1)<\/span><\/a><a class=\"mediaFilterLink\" data-value=\"fantasy\" href=\"\/series\/index\/genre\/fantasy\">Fantas\u00eda <span class=\"num\">(1)<\/span><\/a><a class=\"mediaFilterLink\" data-value=\"children\" href=\"\/series\/index\/genre\/children\">Infantil <span class=\"num\">(1)<\/span><\/a>","session":"1v1jo5vqu64g3obvnt44cdtl07","screenId":"screen-1739968202"} ''' json_object = jsontools.load_json(data) logger.info("html=" + json_object["html"]) data = json_object["html"] ''' <a class="defaultLink extended" href="/serie/huntik-secrets-&-seekers"> <div class="coverMini shadow tiptip" title="Huntik: Secrets & Seekers"> <img class="centeredPic centeredPicFalse" onerror="this.src='/images/cover-notfound.png';" src="/content/covers/mediathumb-4901-5.png"/> <img src="/images/loading-mini.gif" class="loader"/> </div> <span class="title">Huntik: Secrets & Seekers</span> </a> ''' patron = '<a class="defaultLink extended" href="([^"]+)"[^<]+' patron += '<div class="coverMini shadow tiptip" title="([^"]+)"[^<]+' patron += '<img class="centeredPic.*?src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedurl, scrapedtitle, scrapedthumbnail in matches: title = scrapertools.htmlclean(scrapedtitle) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = "" #http://www.cinehanwer.us/peli/the-lego-movie #http://www.cinehanwer.us/links/view/slug/the-lego-movie/what/peli?popup=1 if "/peli/" in scrapedurl: referer = urlparse.urljoin(item.url, scrapedurl) url = referer.replace("/peli/", "/links/view/slug/") + "/what/peli" if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvideos", title=title, extra=referer, url=url, thumbnail=thumbnail, plot=plot, fulltitle=title, viewmode="movie")) else: referer = item.url url = urlparse.urljoin(item.url, scrapedurl) itemlist.append( Item(channel=__channel__, action="episodios", title=title, extra=referer, url=url, thumbnail=thumbnail, plot=plot, fulltitle=title, viewmode="movie")) return itemlist
def findvideos(item): show = item.title.replace("Añadir esta serie a la biblioteca de XBMC", "") logger.info("[megaforo.py] findvideos show " + show) itemlist = [] data = scrapertools.cache_page(item.url) if 'mega-foro' in data: patronimage = '<div class="inner" id="msg_\d{1,9}".*?<img src="([^"]+)".*?mega.co.nz/\#\![A-Za-z0-9\-\_]+\![A-Za-z0-9\-\_]+' matches = re.compile(patronimage, re.DOTALL).findall(data) if len(matches) > 0: thumbnail = matches[0] thumbnail = scrapertools.htmlclean(thumbnail) thumbnail = unicode(thumbnail, "iso-8859-1", errors="replace").encode("utf-8") item.thumbnail = thumbnail patronplot = '<div class="inner" id="msg_\d{1,9}".*?<img src="[^"]+"[^/]+/>(.*?)lgf_facebook_share' matches = re.compile(patronplot, re.DOTALL).findall(data) if len(matches) > 0: plot = matches[0] title = item.title plot = re.sub(' ', '', plot) plot = re.sub('\s\s', '', plot) plot = scrapertools.htmlclean(plot) item.plot = "" from servers import servertools itemlist.extend(servertools.find_video_items(data=data)) for videoitem in itemlist: videoitem.channel = __channel__ videoitem.action = "play" videoitem.folder = False videoitem.thumbnail = item.thumbnail videoitem.plot = item.plot videoitem.title = "[" + videoitem.server + videoitem.title + " " + item.title videoitem.show = show if config.get_platform().startswith( "xbmc") or config.get_platform().startswith("boxee"): itemlist.append( Item(channel=item.channel, title="Añadir esta serie a la biblioteca de XBMC", url=item.url, action="add_serie_to_library", extra="findvideos")) return itemlist else: item.thumbnail = "" item.plot = "" from servers import servertools itemlist.extend(servertools.find_video_items(data=data)) for videoitem in itemlist: videoitem.channel = __channel__ videoitem.action = "play" videoitem.folder = False videoitem.thumbnail = item.thumbnail videoitem.plot = item.plot videoitem.title = "[" + videoitem.server + videoitem.title + " " + item.title return itemlist
def peliculas(item): logger.info("[streamondemand-pureita cineblog01] peliculas") itemlist = [] if item.url == "": item.url = sito # Descarga la página data = httptools.downloadpage(item.url, headers=headers).data # Extrae las entradas (carpetas) patronvideos = '<div class="span4".*?<a.*?<p><img src="([^"]+)".*?' patronvideos += '<div class="span8">.*?<a href="([^"]+)"> <h1>([^"]+)</h1></a>.*?' patronvideos += '<strong>([^<]*)</strong>.*?<br />([^<+]+)' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(3)) scrapedurl = urlparse.urljoin(item.url, match.group(2)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(1)) scrapedthumbnail = scrapedthumbnail.replace(" ", "%20") scrapedplot = scrapertools.unescape("[COLOR orange]" + match.group(4) + "[/COLOR]\n" + match.group(5).strip()) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() scrapedtitle = scrapedtitle.replace("–", "-").replace( "×", "").replace("[Sub-ITA]", "(Sub Ita)") scrapedtitle = scrapedtitle.replace("/", " - ").replace( "’", "'").replace("…", "...").replace("#", "# ") itemlist.append( Item(channel=__channel__, action="findvideos", contentType="movie", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, extra=item.extra, viewmode="movie_with_plot")) # Next page mark try: bloque = scrapertools.get_match( data, "<div id='wp_page_numbers'>(.*?)</div>") patronvideos = '<a href="([^"]+)">></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "[COLOR orange]Successivi >>[/COLOR]" scrapedurl = matches[0] scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item( channel=__channel__, action="peliculas", title=scrapedtitle, url=scrapedurl, thumbnail= "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png", extra=item.extra, plot=scrapedplot)) except: pass return itemlist
def search_links_abando(item): logger.info() data = httptools.downloadpage(item.url).data itemlist = [] if "Lo sentimos, no tenemos trailer" in data: itemlist.append( item.clone(title=config.get_localized_string(70503), action="", text_color="")) else: if item.contextual: progreso = platformtools.dialog_progress( config.get_localized_string(70512), config.get_localized_string(70504)) progreso.update(10) i = 0 message = config.get_localized_string(70504) patron = '<div class="col-md-3 col-xs-6"><a href="([^"]+)".*?' \ 'Images/(\d+).gif.*?</div><small>(.*?)</small>' matches = scrapertools.find_multiple_matches(data, patron) if len(matches) == 0: trailer_url = scrapertools.find_single_match( data, '<iframe.*?src="([^"]+)"') if trailer_url != "": trailer_url = trailer_url.replace("embed/", "watch?v=") code = scrapertools.find_single_match(trailer_url, 'v=([A-z0-9\-_]+)') thumbnail = "https://img.youtube.com/vi/%s/0.jpg" % code itemlist.append( item.clone(title="Trailer [youtube]", url=trailer_url, server="youtube", thumbnail=thumbnail, action="play")) else: for scrapedurl, language, scrapedtitle in matches: if language == "1": idioma = " (ITA)" else: idioma = " (V.O)" scrapedurl = urlparse.urljoin( "http://www.abandomoviez.net/%s" % item.prefix, scrapedurl) scrapedtitle = scrapertools.htmlclean( scrapedtitle) + idioma + " [youtube]" if item.contextual: i += 1 message += ".." progreso.update(10 + (old_div(90 * i, len(matches))), message) scrapedtitle = "%s" % scrapedtitle data_trailer = httptools.downloadpage(scrapedurl).data trailer_url = scrapertools.find_single_match( data_trailer, 'iframe.*?src="([^"]+)"') trailer_url = trailer_url.replace("embed/", "watch?v=") code = scrapertools.find_single_match(trailer_url, 'v=([A-z0-9\-_]+)') thumbnail = "https://img.youtube.com/vi/%s/0.jpg" % code itemlist.append( item.clone(title=scrapedtitle, url=trailer_url, server="youtube", action="play", thumbnail=thumbnail)) if item.contextual: progreso.close() if keyboard: if item.contextual: title = "%s" else: title = "%s" itemlist.append( item.clone(title=title % config.get_localized_string(70511), action="manual_search", thumbnail="", extra="abandomoviez")) return itemlist
def abandomoviez_search(item): logger.info() # Comprueba si es una búsqueda de cero o viene de la opción Siguiente if item.page != "": data = httptools.downloadpage(item.page).data else: titulo = item.contentTitle.decode('utf-8').encode('iso-8859-1') post = urllib.urlencode({ 'query': titulo, 'searchby': '1', 'posicion': '1', 'orden': '1', 'anioin': item.year, 'anioout': item.year, 'orderby': '1' }) url = "http://www.abandomoviez.net/db/busca_titulo.php?busco2=%s" % item.contentTitle item.prefix = "db/" data = httptools.downloadpage(url, post=post).data if "No hemos encontrado ninguna" in data: url = "http://www.abandomoviez.net/indie/busca_titulo.php?busco2=%s" % item.contentTitle item.prefix = "indie/" data = httptools.downloadpage( url, post=post).data.decode("iso-8859-1").encode('utf-8') itemlist = [] patron = '(?:<td width="85"|<div class="col-md-2 col-sm-2 col-xs-3">).*?<img src="([^"]+)"' \ '.*?href="([^"]+)">(.*?)(?:<\/td>|<\/small>)' matches = scrapertools.find_multiple_matches(data, patron) # Si solo hay un resultado busca directamente los trailers, sino lista todos los resultados if len(matches) == 1: item.url = urlparse.urljoin( "http://www.abandomoviez.net/%s" % item.prefix, matches[0][1]) item.thumbnail = matches[0][0] itemlist = search_links_abando(item) elif len(matches) > 1: for scrapedthumbnail, scrapedurl, scrapedtitle in matches: scrapedurl = urlparse.urljoin( "http://www.abandomoviez.net/%s" % item.prefix, scrapedurl) scrapedtitle = scrapertools.htmlclean(scrapedtitle) itemlist.append( item.clone(title=scrapedtitle, action="search_links_abando", url=scrapedurl, thumbnail=scrapedthumbnail)) next_page = scrapertools.find_single_match( data, '<a href="([^"]+)">Siguiente') if next_page != "": next_page = urlparse.urljoin( "http://www.abandomoviez.net/%s" % item.prefix, next_page) itemlist.append( item.clone(title=config.get_localized_string(70502), action="abandomoviez_search", page=next_page, thumbnail="", text_color="")) if not itemlist: itemlist.append( item.clone(title=config.get_localized_string(70501), action="", thumbnail="", text_color="")) if keyboard: if item.contextual: title = "%s" else: title = "%s" itemlist.append( item.clone(title=title % config.get_localized_string(70511), action="manual_search", thumbnail="", extra="abandomoviez")) return itemlist
def episodios(item): logger.info() itemlist = [] # Descarga la pagina idserie = '' data = httptools.downloadpage(item.url).data logger.debug("data=" + data) patrontemporada = '<div class="checkSeason"[^>]+>([^<]+)<div class="right" onclick="controller.checkSeason(.*?)\s+</div></div>' matchestemporadas = re.compile(patrontemporada, re.DOTALL).findall(data) idserie = scrapertools.find_single_match( data, '<div id="layout4" class="itemProfile modelContainer" data-model="serie" data-id="(\d+)"' ) for nombre_temporada, bloque_episodios in matchestemporadas: logger.debug("nombre_temporada=" + nombre_temporada) logger.debug("bloque_episodios=" + bloque_episodios) # Extrae los episodios patron = '<span class="title defaultPopup" href="([^"]+)"><span class="number">([^<]+)</span>([^<]+)</span>(\s*</div>\s*<span[^>]*><span[^>]*>[^<]*</span><span[^>]*>[^<]*</span></span><div[^>]*><button[^>]*><span[^>]*>[^<]*</span><span[^>]*>[^<]*</span></button><div class="action([^"]*)" data-action="seen">)?' matches = re.compile(patron, re.DOTALL).findall(bloque_episodios) for scrapedurl, numero, scrapedtitle, info, visto in matches: # visto_string = "[visto] " if visto.strip()=="active" else "" if visto.strip() == "active": visto_string = "[visto] " else: visto_string = "" title = visto_string + nombre_temporada.replace( "Temporada ", "").replace( "Extras", "Extras 0" ) + "x" + numero + " " + scrapertools.htmlclean(scrapedtitle) thumbnail = item.thumbnail fanart = item.fanart plot = "" # http://www.pordede.com/peli/the-lego-movie # http://www.pordede.com/links/view/slug/the-lego-movie/what/peli?popup=1 # http://www.pordede.com/links/viewepisode/id/475011?popup=1 epid = scrapertools.find_single_match(scrapedurl, "id/(\d+)") url = "http://www.pordede.com/links/viewepisode/id/" + epid itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, fulltitle=title, fanart=fanart, show=item.show)) logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") if config.get_videolibrary_support(): # con año y valoracion la serie no se puede actualizar correctamente, si ademas cambia la valoracion, creara otra carpeta # Sin año y sin valoración: show = re.sub(r"\s\(\d+\)\s\(\d+\.\d+\)", "", item.show) # Sin año: # show = re.sub(r"\s\(\d+\)", "", item.show) # Sin valoración: # show = re.sub(r"\s\(\d+\.\d+\)", "", item.show) itemlist.append( Item(channel='pordede', title="Añadir esta serie a la videoteca", url=item.url, action="add_serie_to_library", extra="episodios###", show=show)) itemlist.append( Item(channel='pordede', title="Descargar todos los episodios de la serie", url=item.url, action="download_all_episodes", extra="episodios", show=show)) itemlist.append( Item(channel='pordede', title="Marcar como Pendiente", tipo="serie", idtemp=idserie, valor="1", action="pordede_check", show=show)) itemlist.append( Item(channel='pordede', title="Marcar como Siguiendo", tipo="serie", idtemp=idserie, valor="2", action="pordede_check", show=show)) itemlist.append( Item(channel='pordede', title="Marcar como Finalizada", tipo="serie", idtemp=idserie, valor="3", action="pordede_check", show=show)) itemlist.append( Item(channel='pordede', title="Marcar como Favorita", tipo="serie", idtemp=idserie, valor="4", action="pordede_check", show=show)) itemlist.append( Item(channel='pordede', title="Quitar marca", tipo="serie", idtemp=idserie, valor="0", action="pordede_check", show=show)) return itemlist
def programas(item, load_all_pages=False): logger.info("tvalacarta.channels.xiptv programas") itemlist = [] if item.url == "": item.url = PROGRAMAS_URL # Extrae los programas data = scrapertools.cache_page(item.url) data = scrapertools.find_single_match( data, '(<li[^<]+<div class="item">.*?<div class="pager">.*?</div>)') ''' <li> <div class="item"> <div class="image drop-shadow curved curved-hz-1"> <a href="/sex-toy-ficcions"><img alt="Frame_sex_toy_ficcions" src="/media/asset_publics/resources /000/106/321/program/FRAME_SEX_TOY_FICCIONS.JPG?1350386776" /></a> </div> <div class="archived"><em>Històric</em></div> <div class="content"> <h4><a href="/sex-toy-ficcions">Sex Toy Ficcions</a></h4> <h5> <a href="/programes/page/10?model_type=Program&program%5Bprogram_categories%5D=Nous+formats" >Nous formats</a> </h5> <p>Sèrie en clau de comèdia, que gira al voltant de reunions cada cop més habituals conegudes com a "tupper sex", trobades a domicili per millorar la vida sexual de les persones que hi participen . La intenció de Sex Toy Ficcions és aconseguir que l'espectador s'identifiqui amb les conductes i frustracions sexuals dels protagonistes d'aquesta ficció...</p> <span class="chapters"> 13 capítols </span> <dl> <dt>TV responsable</dt> <dd>La Xarxa</dd> <dt>Categoria</dt> <dd> <a href="/programes/page/10?model_type=Program&program%5Bprogram_categories%5D=Nous+formats" >Nous formats</a> ''' patron = '<li>[^<]+<div class="item">(.*?)</li>' matches = re.compile(patron, re.DOTALL).findall(data) for match in matches: scrapedurl = scrapertools.find_single_match(match, '<a href="([^"]+)"') scrapedthumbnail = scrapertools.find_single_match( match, '<img alt="[^"]+" src="([^"]+)"') scrapedtitle = scrapertools.find_single_match( match, '<h4[^<]+<a href="[^"]+">([^<]+)</a>') scrapedcategory = scrapertools.find_single_match( match, '<h5[^<]+<a href="[^"]+">([^<]+)</a>') scrapedplot = scrapertools.find_single_match(match, '<p>(.*?)</p>') title = scrapertools.htmlclean(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = scrapertools.htmlclean(scrapedcategory + "\n" + scrapedplot).strip() if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=CHANNELNAME, title=title, action="episodios", url=url, page=url, thumbnail=thumbnail, fanart=thumbnail, plot=plot, show=title, category="programas", viewmode="movie_with_plot", folder=True)) # Página siguiente next_page_url = scrapertools.find_single_match( data, '<a href="([^"]+)">next</a>') if next_page_url != "": next_page_url = urlparse.urljoin(item.url, next_page_url) logger.info("next_page_url=" + next_page_url) next_page_item = Item(channel=CHANNELNAME, title=">> Página siguiente", action="programas", extra="pager", url=next_page_url, folder=True) if load_all_pages: itemlist.extend(programas(next_page_item, load_all_pages)) else: itemlist.append(next_page_item) return itemlist
def parse_mixed_results(item, data): patron = '<a class="defaultLink extended" href="([^"]+)"[^<]+' patron += '<div class="coverMini shadow tiptip" title="([^"]+)"[^<]+' patron += '<img class="centeredPic.*?src="([^"]+)"' patron += '[^<]+<img[^<]+<div class="extra-info">' patron += '<span class="year">([^<]+)</span>' patron += '<span class="value"><i class="icon-star"></i>([^<]+)</span>' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedyear, scrapedvalue in matches: title = scrapertools.htmlclean(scrapedtitle) if scrapedyear != '': title += " (" + scrapedyear + ")" fulltitle = title if scrapedvalue != '': title += " (" + scrapedvalue + ")" thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) fanart = thumbnail.replace("mediathumb", "mediabigcover") plot = "" # http://www.pordede.com/peli/the-lego-movie # http://www.pordede.com/links/view/slug/the-lego-movie/what/peli?popup=1 if "/peli/" in scrapedurl or "/docu/" in scrapedurl: # sectionStr = "peli" if "/peli/" in scrapedurl else "docu" if "/peli/" in scrapedurl: sectionStr = "peli" else: sectionStr = "docu" referer = urlparse.urljoin(item.url, scrapedurl) url = referer.replace( "/{0}/".format(sectionStr), "/links/view/slug/") + "/what/{0}".format(sectionStr) logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action="findvideos", title=title, extra=referer, url=url, thumbnail=thumbnail, plot=plot, fulltitle=fulltitle, fanart=fanart, contentTitle=scrapedtitle, contentType="movie", context=["buscar_trailer"])) else: referer = item.url url = urlparse.urljoin(item.url, scrapedurl) itemlist.append( Item(channel=item.channel, action="episodios", title=title, extra=referer, url=url, thumbnail=thumbnail, plot=plot, fulltitle=fulltitle, show=title, fanart=fanart, contentTitle=scrapedtitle, contentType="tvshow", context=["buscar_trailer"])) next_page = scrapertools.find_single_match( data, '<div class="loadingBar" data-url="([^"]+)"') if next_page != "": url = urlparse.urljoin("http://www.pordede.com", next_page) itemlist.append( Item(channel=item.channel, action="lista", title=">> Página siguiente", extra=item.extra, url=url)) try: import xbmcplugin xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) except: pass return itemlist
def episodios(item): logger.info() itemlist = [] # Descarga la pagina idserie = '' data = httptools.downloadpage(item.url).data patrontemporada = '<ul.*?<li class="season-header" >([^<]+)<(.*?)\s+</ul>' matchestemporadas = re.compile(patrontemporada, re.DOTALL).findall(data) idserie = scrapertools.find_single_match(data, 'data-model="5" data-id="(\d+)"') token = scrapertools.find_single_match(data, '_token" content="([^"]+)"') if (config.get_platform().startswith("xbmc") or config.get_platform().startswith("kodi")): itemlist.append(Item(channel=item.channel, action="infosinopsis", title="INFO / SINOPSIS", url=item.url, thumbnail=item.thumbnail, fanart=item.fanart, folder=False)) for nombre_temporada, bloque_episodios in matchestemporadas: # Extrae los episodios patron_episodio = '<li><a href="#"(.*?)</a></li>' # patron = '<li><a href="#" data-id="([^"]*)".*?data-href="([^"]+)">\s*<div class="name">\s*<span class="num">([^<]+)</span>\s*([^<]+)\s*</div>.*?"show-close-footer episode model([^"]+)"' matches = re.compile(patron_episodio, re.DOTALL).findall(bloque_episodios) for data_episodio in matches: scrapeid = scrapertools.find_single_match(data_episodio, '<li><a href="#" data-id="([^"]*)"') scrapedurl = scrapertools.find_single_match(data_episodio, 'data-href="([^"]+)">\s*<div class="name">') numero = scrapertools.find_single_match(data_episodio, '<span class="num">([^<]+)</span>') scrapedtitle = scrapertools.find_single_match(data_episodio, '<span class="num">.*?</span>\s*([^<]+)\s*</div>') visto = scrapertools.find_single_match(data_episodio, '"show-close-footer episode model([^"]+)"') title = nombre_temporada.replace("Temporada ", "").replace("Extras de la serie", "Extras 0").replace(" ", "") + "x" + numero + " " + scrapertools.htmlclean( scrapedtitle) if visto.strip() == "seen": title = "[visto] " + title thumbnail = item.thumbnail fanart = item.fanart plot = "" # https://www.plusdede.com/peli/the-lego-movie # https://www.plusdede.com/links/view/slug/the-lego-movie/what/peli?popup=1 # https://www.plusdede.com/links/viewepisode/id/475011?popup=1 # epid = scrapertools.find_single_match(scrapedurl,"id/(\d+)") url = "https://www.plusdede.com" + scrapedurl itemlist.append( Item(channel=item.channel, action="findvideos", nom_serie=item.title, tipo="5", title=title, url=url, thumbnail=thumbnail, plot=plot, fulltitle=title, fanart=fanart, show=item.show)) if config.get_videolibrary_support(): # con año y valoracion la serie no se puede actualizar correctamente, si ademas cambia la valoracion, creara otra carpeta # Sin año y sin valoración: show = re.sub(r"\s\(\d+\)\s\(\d+\.\d+\)", "", item.show) # Sin año: # show = re.sub(r"\s\(\d+\)", "", item.show) # Sin valoración: # show = re.sub(r"\s\(\d+\.\d+\)", "", item.show) itemlist.append( Item(channel='plusdede', title="Añadir esta serie a la biblioteca de XBMC", url=item.url, token=token, action="add_serie_to_library", extra="episodios###", show=show)) itemlist.append( Item(channel='plusdede', title="Descargar todos los episodios de la serie", url=item.url, token=token, action="download_all_episodes", extra="episodios", show=show)) itemlist.append(Item(channel='plusdede', title="Marcar como Pendiente", tipo="5", idtemp=idserie, token=token, valor="pending", action="plusdede_check", show=show)) itemlist.append(Item(channel='plusdede', title="Marcar como Siguiendo", tipo="5", idtemp=idserie, token=token, valor="following", action="plusdede_check", show=show)) itemlist.append(Item(channel='plusdede', title="Marcar como Finalizada", tipo="5", idtemp=idserie, token=token, valor="seen", action="plusdede_check", show=show)) itemlist.append(Item(channel='plusdede', title="Marcar como Favorita", tipo="5", idtemp=idserie, token=token, valor="favorite", action="plusdede_check", show=show)) itemlist.append( Item(channel='plusdede', title="Quitar marca", tipo="5", idtemp=idserie, token=token, valor="nothing", action="plusdede_check", show=show)) itemlist.append( Item(channel='plusdede', title="Añadir a lista", tipo="5", tipo_esp="lista", idtemp=idserie, token=token, action="plusdede_check", show=show)) return itemlist
def parse_mixed_results(item, data): itemlist = [] patron = '<div class="media-dropdown mini dropdown model" data-value="([^"]+)"+' patron += '.*?<a href="([^"]+)"[^<]data-toggle="tooltip" data-container="body"+' patron += ' data-delay="500" title="([^"]+)"[^<]+' patron += '.*?src="([^"]+)"+' patron += '.*?<div class="year">([^<]+)</div>+' patron += '.*?<div class="value"><i class="fa fa-star"></i> ([^<]+)</div>' matches = re.compile(patron, re.DOTALL).findall(data) if item.tipo == "lista": following = scrapertools.find_single_match(data, '<div class="follow-lista-buttons ([^"]+)">') data_id = scrapertools.find_single_match(data, 'data-model="10" data-id="([^"]+)">') if following.strip() == "following": itemlist.append( Item(channel='plusdede', title="Dejar de seguir", idtemp=data_id, token=item.token, valor="unfollow", action="plusdede_check", url=item.url, tipo=item.tipo)) else: itemlist.append( Item(channel='plusdede', title="Seguir esta lista", idtemp=data_id, token=item.token, valor="follow", action="plusdede_check", url=item.url, tipo=item.tipo)) for visto, scrapedurl, scrapedtitle, scrapedthumbnail, scrapedyear, scrapedvalue in matches: title = "" if visto.strip() == "seen": title += "[visto] " title += scrapertools.htmlclean(scrapedtitle) if scrapedyear != '': title += " (" + scrapedyear + ")" fulltitle = title if scrapedvalue != '': title += " (" + scrapedvalue + ")" thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) fanart = thumbnail.replace("mediathumb", "mediabigcover") plot = "" # https://www.plusdede.com/peli/the-lego-movie # https://www.plusdede.com/links/view/slug/the-lego-movie/what/peli?popup=1 if "/peli/" in scrapedurl or "/docu/" in scrapedurl: # sectionStr = "peli" if "/peli/" in scrapedurl else "docu" if "/peli/" in scrapedurl: sectionStr = "peli" else: sectionStr = "docu" referer = urlparse.urljoin(item.url, scrapedurl) url = urlparse.urljoin(item.url, scrapedurl) if item.tipo != "series": itemlist.append(Item(channel=item.channel, action="findvideos", title=title, extra=referer, url=url, thumbnail=thumbnail, plot=plot, fulltitle=fulltitle, fanart=fanart, contentTitle=scrapedtitle, contentType="movie", context=["buscar_trailer"])) else: referer = item.url url = urlparse.urljoin(item.url, scrapedurl) if item.tipo != "pelis": itemlist.append(Item(channel=item.channel, action="episodios", title=title, extra=referer, url=url, thumbnail=thumbnail, plot=plot, fulltitle=fulltitle, show=title, fanart=fanart, contentTitle=scrapedtitle, contentType="tvshow", context=["buscar_trailer"])) next_page = scrapertools.find_single_match(data, '<div class="onclick load-more-icon no-json" data-action="replace" data-url="([^"]+)">') if next_page != "": url = urlparse.urljoin("https://www.plusdede.com", next_page).replace("amp;", "") itemlist.append( Item(channel=item.channel, action="pag_sig", token=item.token, title=">> Página siguiente", extra=item.extra, url=url)) try: import xbmcplugin xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) except: pass return itemlist
def findvideos(item): logger.info() itemlist = [] if item.contentType == "movie": # Descarga la página data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| |<br>", "", data) if not item.infoLabels["tmdb_id"]: item.infoLabels["tmdb_id"] = scrapertools.find_single_match( data, '<a href="https://www.themoviedb.org/' '[^/]+/(\d+)') item.infoLabels["year"] = scrapertools.find_single_match( data, 'class="e_new">(\d{4})') if __modo_grafico__: tmdb.set_infoLabels_item(item, __modo_grafico__) if not item.infoLabels["plot"]: item.infoLabels["plot"] = scrapertools.find_single_match( data, 'itemprop="description">([^<]+)</div>') if not item.infoLabels["genre"]: item.infoLabels["genre"] = ", ".join( scrapertools.find_multiple_matches( data, '<a itemprop="genre"[^>]+>' '([^<]+)</a>')) ficha = scrapertools.find_single_match(item.url, '-f(\d+)-') if not ficha: ficha = scrapertools.find_single_match(item.url, 'f=(\d+)') cid = "0" else: ficha, cid = scrapertools.find_single_match(item.url, 'ficha=(\d+)&c_id=(\d+)') url = "https://playmax.mx/c_enlaces_n.php?apikey=%s&sid=%s&ficha=%s&cid=%s" % ( apikey, sid, ficha, cid) data = httptools.downloadpage(url).data data = xml2dict(data) for k, v in data["Data"].items(): try: if type(v) is dict: if k == "Online": order = 1 elif k == "Download": order = 0 else: order = 2 itemlist.append( item.clone(action="", title=k, text_color=color3, order=order)) if type(v["Item"]) is str: continue elif type(v["Item"]) is dict: v["Item"] = [v["Item"]] for it in v["Item"]: try: thumbnail = "%s/styles/prosilver/imageset/%s.png" % ( host, it['Host']) title = " %s - %s/%s" % (it['Host'].capitalize(), it['Quality'], it['Lang']) calidad = int( scrapertools.find_single_match( it['Quality'], '(\d+)p')) calidadaudio = it['QualityA'].replace("...", "") subtitulos = it['Subtitles'].replace( "Sin subtítulos", "") if subtitulos: title += " (%s)" % subtitulos if calidadaudio: title += " [Audio:%s]" % calidadaudio likes = 0 if it["Likes"] != "0" or it["Dislikes"] != "0": likes = int(it["Likes"]) - int(it["Dislikes"]) title += " (%s ok, %s ko)" % (it["Likes"], it["Dislikes"]) if type(it["Url"]) is dict: for i, enlace in enumerate(it["Url"]["Item"]): titulo = title + " (Parte %s)" % (i + 1) itemlist.append( item.clone(title=titulo, url=enlace, action="play", calidad=calidad, thumbnail=thumbnail, order=order, like=likes, ficha=ficha, cid=cid, folder=False)) else: url = it["Url"] itemlist.append( item.clone(title=title, url=url, action="play", calidad=calidad, thumbnail=thumbnail, order=order, like=likes, ficha=ficha, cid=cid, folder=False)) except: pass except: pass if not config.get_setting("order_web", "playmax"): itemlist.sort(key=lambda it: (it.order, it.calidad, it.like), reverse=True) else: itemlist.sort(key=lambda it: it.order, reverse=True) if itemlist: itemlist.extend(acciones_fichas(item, sid, ficha)) if not itemlist and item.contentType != "movie": url = url.replace("apikey=%s&" % apikey, "") data = httptools.downloadpage(url).data data = re.sub(r"\n|\r|\t|\s{2}| |<br>", "", data) patron = '<div id="f_fde_c"[^>]+>(.*?update_fecha\(\d+\)">)</div>' estrenos = scrapertools.find_multiple_matches(data, patron) for info in estrenos: info = "Estreno en " + scrapertools.htmlclean(info) itemlist.append(item.clone(action="", title=info)) if not itemlist: itemlist.append( item.clone(action="", title="No hay enlaces disponibles")) return itemlist
def mainlist(item): logger.info("deportesalacarta.livesportsws lista") itemlist = [] import xbmc check=xbmc.getInfoLabel('ListItem.Title') if item.channel != __channel__: item.channel = __channel__ else: if not xbmc.Player().isPlaying(): xbmc.executebuiltin('xbmc.PlayMedia('+song+')') """ Lo que ocurre con url = http://translate.googleusercontent.com/translate_c?depth=1&nv=1&rurl=translate.google.com&sl=ru&tl=es&u=http://lfootball.ws/&usg=ALkJrhgzJfI1TDn3BxGgPbjgAHHS7J0i9g Redirecciones: 1. http://translate.google.com/translate?depth=1&nv=1&rurl=translate.google.com&sl=ru&tl=es&u=http://lfootball.ws/ 2. http://translate.googleusercontent.com/translate_p?nv=1&rurl=translate.google.com&sl=ru&tl=es&u=http://lfootball.ws/&depth=2&usg=ALkJrhgAAAAAVupk4tLINTbmU7JrcQdl0G4V3LtnRM1n 3. http://translate.googleusercontent.com/translate_c?depth=2&nv=1&rurl=translate.google.com&sl=ru&tl=es&u=http://lfootball.ws/&usg=ALkJrhhhRDwHSDRDN4t27cX5CYZLFFQtmA Lo que significa que necesitamos una key nueva cada vez en el argumento "usg" y para llegar a la url 3 debemos hacer la petición 1 y 2 con 'follow_redirects=False' o con la convinación de 'follow_redirects=False' y 'header_to_get="location"' """ #### Opción 1: 'follow_redirects=False' ## Petición 1 url = "http://translate.google.com/translate?depth=1&nv=1&rurl=translate.google.com&sl=ru&tl=es&u=http://livesport.ws/football" data = dhe(httptools.downloadpage(url, follow_redirects=False).data)#.decode('cp1251').encode('utf8') ## Petición 2 url = scrapertools.get_match(data, ' src="([^"]+)" name=c ') data = dhe(httptools.downloadpage(url, follow_redirects=False).data)#.decode('cp1251').encode('utf8') ## Petición 3 url = scrapertools.get_match(data, 'URL=([^"]+)"') data = dhe(httptools.downloadpage(url).data)#.decode('cp1251').encode('utf8') """ #### Opción 2: 'follow_redirects=False' y 'header_to_get="location"' ## Petición 1 url = "http://translate.google.com/translate?depth=1&nv=1&rurl=translate.google.com&sl=ru&tl=es&u=http://lfootball.ws/" data = dhe( scrapertools.downloadpage(url,follow_redirects=False) )#.decode('cp1251').encode('utf8') ## Petición 2 url = scrapertools.get_match(data, ' src="([^"]+)" name=c ') url = scrapertools.get_header_from_response(url, header_to_get="location") ## Petición 3 data = dhe( scrapertools.cachePage(url ) )#.decode('cp1251').encode('utf8') """ patrondata = '</h1></div>(.*?)</h2>' matchesdata = re.compile(patrondata,re.DOTALL).findall(data) for bloque_data in matchesdata: for bloque_data in matchesdata: patrondaygame = '<span class=text>.*?<span class=text>(.*?)</span></a>(.*?)</span> --></li></ul></div>' matchesdaygame = re.compile(patrondaygame,re.DOTALL).findall(bloque_data) for day , bloque_games in matchesdaygame: day = re.sub(r"</span>|<i class=ico><span>de</span></i>|<span class=text>|de","",day) day = day.replace("actuales","Hoy") day = scrapertools.htmlclean(day) dia = scrapertools.get_match(day, '(\d+)') mes = re.sub(r"(?i)de |hoy |ayer |mañana |el |día ", "", day) mes_ = scrapertools.find_single_match(mes, '\d+\s*([A-z]+)') if not mes_: mes_ = scrapertools.find_single_match(mes, '([A-z]+)\s*\d+,') mes = mes_.title() mes = month_convert(mes) mes = str(mes).zfill(2) if "hoy" in day or "Hoy" in day: day = day.replace(day,"[COLOR yellow][B]"+day+"[/B][/COLOR]") elif "Ayer" in day or "ayer" in day: day = day.replace(day,"[COLOR darkgoldenrod][B]"+day+"[/B][/COLOR]") else: day = day.replace(day,"[COLOR greenyellow][B]"+day+"[/B][/COLOR]") itemlist.append( Item(channel=__channel__, title=day,action="mainlist",url="",fanart="http://www.easywallprints.com/upload/designs/background-with-soccer-balls-zoom-1.jpg",thumbnail="http://s6.postimg.org/3yl2y4adt/livesportagenda.png",folder=False) ) patron = 'es&u=(.*?)&usg.*?id=event-(.*?)>(.*?)</i>.*?<span class=competition>.*?<span class=competition>(.*?)</span></a>.*?<i class="separator">.*?</span>(.*?)</span>.*?src=(.*?)>.*?src=(.*?)>.*?text-align: left">.*?</span>(.*?)</span>.*?<i class="live-broadcasting-status-(\d)"'#'<a class="link" href="([^"]+)" title="(.*?)".*?<span class="liga"><span>(.*?)</span></span>.*?<span class="date"><span>(.*?)</span></span>' matches = re.compile(patron,re.DOTALL).findall(bloque_games) for url_info,id_event, hora,competition,team1,thumbnail,fanart,team2 , status in matches: team1 = re.sub(r"-"," ",team1) team2= re.sub(r"-"," ",team2) competition = re.sub(r"\.","",competition) if status == "4": continue if "00:" in hora: hora = hora.replace("00:","24:") if not "LIVE" in hora: time= re.compile('(\d+):(\d+)',re.DOTALL).findall(hora) for horas, minutos in time: wrong_time =int(horas) value = 1 correct_time = wrong_time - value correct_time = str(correct_time) hora = correct_time +":"+ minutos if "OFFLINE" in hora: extra = hora title = team1+"-"+team2+"____" title = title.title() fulltitle =title.replace(title,"[COLOR burlywood][B]"+title+"[/B][/COLOR]") title= title.replace(title,"[COLOR burlywood]"+title+"[/COLOR]") action = "mainlist" folder = False evento = "" time = "" fecha = "" else: if "hoy" in day or "Hoy" in day: title = team1+" - "+team2 title = title.title() fulltitle =title.replace(title,"[COLOR deepskyblue][B]"+title+"[/B][/COLOR]") if "LIVE" in hora: import time time = "live" fecha = dia+"/"+str(mes) fecha = fecha.strip() evento = team1+" vs "+team2 extra= hora hora = u'\u006C\u0456\u0475\u04BC!!'.encode('utf-8') hora = hora.replace(hora,"[COLOR crimson][B]"+hora+"[/B][/COLOR]") else: evento = team1+" vs "+team2 time = hora.strip() fecha = dia+"/"+str(mes) fecha = fecha.strip() extra = hora hora = hora.replace(hora,"[COLOR aquamarine][B]"+hora+"[/B][/COLOR]") title = hora+ " " + title.replace(title,"[COLOR deepskyblue]"+title+"[/COLOR]")+ "[COLOR floralwhite]"+" "+"("+competition+")"+"[/COLOR]" action = "enlaces" folder = True else: title = team1+" - "+team2 evento = team1+" vs "+team2 time = hora fecha = dia+"/"+mes title = title.title() fulltitle =title.replace(title,"[COLOR mediumaquamarine][B]"+title+"[/B][/COLOR]") title = "[COLOR aquamarine][B]"+hora+"[/B][/COLOR]"+ " " + title.replace(title,"[COLOR mediumaquamarine]"+title+"[/COLOR]")+ "[COLOR paleturquoise]"+" "+"("+competition+")"+"[/COLOR]" action = "enlaces" folder = True extra = hora post_id = scrapertools.get_match(url_info,'http.*?livesport.ws\/(.*?)-') url = "http://livesport.ws/engine/modules/sports/sport_refresh.php?from=event&event_id="+id_event+"&tab_id=0&post_id="+post_id itemlist.append( Item(channel=__channel__, title=" "+title,action=action,url=url,thumbnail =urlparse.urljoin(host,thumbnail),fanart =urlparse.urljoin(host,fanart),fulltitle = fulltitle,extra =extra,date=fecha, time=time, evento=evento, context="info_partido",deporte="futbol",folder=folder) ) return itemlist
def entradas(item): logger.info("deportesalacarta.channels.tugoleada entradas") itemlist = [] data = scrapertools.downloadpage(host) bloque = scrapertools.find_single_match( data, '<div class="col-md-12">(.*?)</div>') try: matches = scrapertools.find_multiple_matches( bloque, '(?i)<p.*?>(?:<img.*?>|)(.*?CANAL\s*(\d+))</p>') for scrapedtitle, canal in matches: url = host + "canal" + canal scrapedtitle = "[COLOR green]%s[/COLOR]" % scrapedtitle itemlist.append( item.clone(title=scrapedtitle, url=url, action="play")) except: import traceback logger.info(traceback.format_exc()) matches = [] if not itemlist: matches = scrapertools.find_multiple_matches( data, 'src="(https://i.gyazo.com[^"]+)"') for i, imagen in enumerate(matches): title = "Agenda: Imagen " + str(i + 1) + " (Click para agrandar)" itemlist.append( item.clone(title=title, url=imagen, thumbnail=imagen, action="abrir_imagen", folder=False)) if not matches and re.search(r"(?i)elitegol", data): data = scrapertools.downloadpage(item.url) data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) patron = '<div class="litd fecha">(.*?)\s*-\s*(\d+:\d+).*?</div>.*?src=".*?(\d+).png"' \ '.*?<div class="litd competicion">(.*?)</div>.*?href=[^>]+>(.*?)</a>' \ '.*?javascript:abrir_evento\((\d+)\)(.*?)</li>' matches = scrapertools.find_multiple_matches(data, patron) lista = [] urls = [] sports = { "1": "futbol", "2": "baloncesto", "3": "F1", "4": "tenis", "5": "ciclismo", "6": "otro", "7": "béisbol", "8": "rugby", "9": "voleybol" } for fecha, hora, sport, torneo, evento, id, check_live in matches: fecha = scrapertools.htmlclean(fecha) urls.append("http://www.elitegol.com/ajax/abrir_evento.php?id=%s" % id) partido = "[COLOR darkorange][B]" + evento + "[/B][/COLOR]" torneo = " [COLOR blue]" + torneo + "[/COLOR]" if "EN JUEGO" in check_live: scrapedtitle = "[COLOR red][B]" + fecha + "-" + hora + "[/B][/COLOR] " + partido + torneo else: scrapedtitle = "[COLOR green][B]" + fecha + "-" + hora + "[/B][/COLOR] " + partido + torneo if re.search(r'(?i)hoy', fecha): date = datetime.datetime.today() date = date.strftime("%d/%m") elif re.search(r'(?i)mañana', fecha): date = datetime.datetime.today() + datetime.timedelta(days=1) date = date.strftime("%d/%m") else: date = fecha try: deporte = sports[sport] except: deporte = "otro" lista.append( Item(channel=__channel__, title=scrapedtitle, action="", url="", date=date, time=hora, deporte=deporte, evento=evento)) try: from multiprocessing.dummy import Pool as ThreadPool thread = ThreadPool() results = thread.map(scrapertools.downloadpageWithoutCookies, urls) thread.close() thread.join() except: results = [] for url_ajax in urls: data_result = scrapertools.downloadpageWithoutCookies(url_ajax) results.append(data_result) prox_eventos = [] for i, data in enumerate(results): busqueda = re.search(r'(?i)tumarcador', data, flags=re.DOTALL) if busqueda: canal = scrapertools.find_single_match( data, '(?i)>(?:\w+|\s*|)tumarcador.*?(\d+).*?</a>') fulltitle = lista[i].fulltitle scrapedurl = host + "canal" + canal itemlist.append(lista[i].clone(action="play", url=scrapedurl)) else: prox_eventos.append(lista[i]) itemlist.append( Item(channel=__channel__, action="", title="", folder=False)) itemlist.append( Item( channel=__channel__, action="", title= "[COLOR magenta][B]Posibles próximos eventos (No confirmados)[/B][/COLOR]", folder=False)) for evento in prox_eventos: itemlist.append(evento) return itemlist
def peliculas(item): logger.info("[cineblog01.py] mainlist") itemlist = [] if item.url == "": item.url = sito # Descarga la p�gina data = scrapertools.cache_page(item.url) if DEBUG: logger.info(data) # Extrae las entradas (carpetas) ''' <div class="span4"> <a href="http://www.cb01.eu/testament-of-youth-sub-ita-2014/"><p><img src="http://www.locandinebest.net/imgk/testament_of_youth.jpg"></p> </a> <!--<img src="http://www.cb01.eu/wp-content/themes/cb01-new_2015/images/film-img1.png" alt=""/>--> </div> <div class="span8"> <!--<div class="index_post_content">--> <a href="http://www.cb01.eu/testament-of-youth-sub-ita-2014/"> <h1>Testament of Youth [Sub-ITA] (2014)</h1></a> <!--<p>COMEDY - DURATION 92 '- USA<br>--> <p><strong>BIOGRAFICO – DURATA 132′ – USA</strong> <br /> L’incontenibile e intelligente Vera Brittain sfida i pregiudizi della famiglia e della citt� natale per ottenere una borsa di studio a Oxford. Mentre persegue i suoi sogni letterari, Vera si innamora di Roland Leighton, il migliore amico del fratello… +Info » ... <div class="rating"> ''' ''' <div class="span4"> <a href="http://www.cb01.eu/serietv/under-the-dome/"><p><img src="http://www.locandinebest.net/imgk/under_the_dome.jpg" alt="" width="350" height="" /></p> </a> <!--<img src="http://www.cb01.eu/serietv/wp-content/themes/cb01-new_2015/images/film-img1.png" alt=""/>--> </div> <div class="span8"> <!--<div class="index_post_content">--> <a href="http://www.cb01.eu/serietv/under-the-dome/"> <h1>Under the Dome</h1></a> <!--<p>COMEDY - DURATION 92 '- USA<br>--> FANTASCIENZA / MISTERO / DRAMMATICO (2013-) � una tiepida mattina d’autunno a Chester’s Mill, nel Maine, una mattina come tante altre. All’improvviso, una specie di cilindro trasparente cala sulla cittadina, tranciando in due tutto quello che si trova lungo il suo perimetro: cose, animali, persone. Come se dal cielo fosse scesa l <br><a href="http://www.cb01.eu/serietv/under-the-dome/">+ info � ...</a><br><br> <!--</div>--> <!--<div class="info">--> <div class="rating"> ''' patronvideos = '<div class="span4"[^<]+' patronvideos += '<a href="([^"]+)"><p><img src="([^"]+)"[^<]+</p[^<]+' patronvideos += '</a[^<]+' patronvideos += '<!--<img[^>]+>--[^<]+' patronvideos += '</div[^<]+' patronvideos += '<div class="span8"[^<]+' patronvideos += '<!--<div class="index_post_content">--[^<]+' patronvideos += '<a[^<]+<h1>([^<]+)</h1></a>(.*?)<div class="rating">' #patronvideos += '<div id="description"><p>(.?*)</div>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedplot in matches: title = scrapedtitle url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = scrapertools.htmlclean(scrapedplot).strip() if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, viewmode="movie_with_plot", fanart=thumbnail)) # Next page mark next_page_url = scrapertools.find_single_match( data, '<li><a href="([^"]+)">></a></li>') if next_page_url != "": itemlist.append( Item(channel=item.channel, action="peliculas", title=">> Next page", url=next_page_url, viewmode="movie_with_plot")) return itemlist
def calendario(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data patron = '<div class="specific-date">.*?datetime="\d+-(\d+)-(\d+).*?class="day-name">.*?>\s*([^<]+)</time>(.*?)</section>' bloques = scrapertools.find_multiple_matches(data, patron) for mes, dia, title, b in bloques: patron = 'class="available-time">([^<]+)<.*?<cite itemprop="name">(.*?)</cite>.*?href="([^"]+)"' \ '.*?>\s*(.*?)\s*</a>(.*?)</article>' matches = scrapertools.find_multiple_matches(b, patron) if matches: title = "%s/%s - %s" % (dia, mes, title.strip()) itemlist.append(item.clone(action="", title=title)) for hora, title, url, subt, datos in matches: subt = subt.replace("Available", "Disponible").replace( "Episode", "Episodio").replace("in ", "en ") subt = re.sub(r"\s{2,}", " ", subt) if "<time" in subt: subt = re.sub(r"<time.*?>", "", subt).replace("</time>", "") scrapedtitle = " [%s] %s - %s" % ( hora, scrapertools.htmlclean(title), subt) scrapedtitle = re.sub( r"\[email protected\]|\[email\xc2\xa0protected\]", "Idolm@ster", scrapedtitle) if "Disponible" in scrapedtitle: if item.proxy == "spain": url = urllib.unquote( url.replace("/browse.php?u=", "").replace("&b=12", "")) action = "play" server = "crunchyroll" else: action = "" server = "" thumb = scrapertools.find_single_match( datos, '<img class="thumbnail" src="([^"]+)"') if not thumb: thumb = scrapertools.find_single_match(datos, 'src="([^"]+)"') if thumb: thumb = urllib.unquote(thumb.replace("/browse.php?u=", "").replace("_thumb", "_full") \ .replace("&b=12", "").replace("_large", "_full")) itemlist.append( item.clone(action=action, url=url, title=scrapedtitle, contentTitle=title, thumbnail=thumb, text_color=color2, contentSerieName=title, server=server)) next = scrapertools.find_single_match( data, 'js-pagination-next"\s*href="([^"]+)"') if next: if item.proxy == "spain": next = "http://proxyanonimo.es" + url.replace("&b=12", "") else: next = host + next itemlist.append( item.clone(action="calendario", url=next, title=">> Siguiente Semana")) prev = scrapertools.find_single_match( data, 'js-pagination-last"\s*href="([^"]+)"') if prev: if item.proxy == "spain": prev = "http://proxyanonimo.es" + url.replace("&b=12", "") else: prev = host + prev itemlist.append( item.clone(action="calendario", url=prev, title="<< Semana Anterior")) return itemlist
def filmaffinity_search(item): logger.info() if item.filmaffinity: item.url = item.filmaffinity return search_links_filmaff(item) # Comprueba si es una búsqueda de cero o viene de la opción Siguiente if item.page != "": data = httptools.downloadpage(item.page).data else: params = urllib.urlencode([('stext', item.contentTitle), ('stype%5B%5D', 'title'), ('country', ''), ('genre', ''), ('fromyear', item.year), ('toyear', item.year)]) url = "http://www.filmaffinity.com/es/advsearch.php?%s" % params data = httptools.downloadpage(url).data itemlist = [] patron = '<div class="mc-poster">.*?<img.*?src="([^"]+)".*?' \ '<div class="mc-title"><a href="/es/film(\d+).html"[^>]+>(.*?)<img' matches = scrapertools.find_multiple_matches(data, patron) # Si solo hay un resultado, busca directamente los trailers, sino lista todos los resultados if len(matches) == 1: item.url = "http://www.filmaffinity.com/es/evideos.php?movie_id=%s" % matches[ 0][1] item.thumbnail = matches[0][0] if not item.thumbnail.startswith("http"): item.thumbnail = "http://www.filmaffinity.com" + item.thumbnail itemlist = search_links_filmaff(item) elif len(matches) > 1: for scrapedthumbnail, id, scrapedtitle in matches: if not scrapedthumbnail.startswith("http"): scrapedthumbnail = "http://www.filmaffinity.com" + scrapedthumbnail scrapedurl = "http://www.filmaffinity.com/es/evideos.php?movie_id=%s" % id if PY3: scrapedtitle = unicode(scrapedtitle, encoding="utf-8", errors="ignore") scrapedtitle = scrapertools.htmlclean(scrapedtitle) itemlist.append( item.clone(title=scrapedtitle, url=scrapedurl, action="search_links_filmaff", thumbnail=scrapedthumbnail)) next_page = scrapertools.find_single_match( data, '<a href="([^"]+)">>></a>') if next_page != "": next_page = urlparse.urljoin("http://www.filmaffinity.com/es/", next_page) itemlist.append( item.clone(title=config.get_localized_string(70502), page=next_page, action="filmaffinity_search", thumbnail="", text_color="")) if not itemlist: itemlist.append( item.clone(title=config.get_localized_string(70501) % item.contentTitle, action="", thumbnail="", text_color="")) if keyboard: if item.contextual: title = "%s" else: title = "%s" itemlist.append( item.clone(title=title % config.get_localized_string(70513), action="manual_search", thumbnail="", extra="filmaffinity")) return itemlist
def colecciones(item): logger.info() from core import jsontools itemlist = [] usuario = False data = httptools.downloadpage(item.url).data if "Ver colecciones del usuario" not in item.title and not item.index: data = jsontools.load_json(data)["Data"] content = data["Content"] content = re.sub(r"\n|\r|\t|\s{2}| |<br>", "", content) else: usuario = True if item.follow: content = scrapertools.find_single_match(data, 'id="followed_collections"(.*?)<div id="recommended_collections"') else: content = scrapertools.find_single_match(data, '<div id="collections".*?<div class="collections_list(.*?)<div class="collections_list') content = re.sub(r"\n|\r|\t|\s{2}| |<br>", "", content) patron = '<a class="name" href="([^"]+)".*?>([^<]+)<.*?src="([^"]+)".*?<p class="info">(.*?)</p>' matches = scrapertools.find_multiple_matches(content, patron) index = "" if item.index and item.index != "0": matches = matches[item.index:item.index + 20] if len(matches) > item.index + 20: index = item.index + 20 elif len(matches) > 20: matches = matches[:20] index = 20 folder = filetools.join(config.get_data_path(), 'thumbs_copiapop') for url, scrapedtitle, thumb, info in matches: url = item.extra + url + "/gallery,1,1?ref=pager" title = "%s (%s)" % (scrapedtitle, scrapertools.htmlclean(info)) try: scrapedthumbnail = filetools.join(folder, "%s.jpg" % thumb.split("e=", 1)[1][-20:]) except: try: scrapedthumbnail = filetools.join(folder, "%s.jpg" % thumb.split("/thumbnail/", 1)[1][-20:]) thumb = thumb.replace("/thumbnail/", "/") except: scrapedthumbnail = "" if scrapedthumbnail: t = threading.Thread(target=download_thumb, args=[scrapedthumbnail, thumb]) t.setDaemon(True) t.start() else: scrapedthumbnail = thumb itemlist.append(Item(channel=item.channel, action="listado", title=title, url=url, thumbnail=scrapedthumbnail, text_color=color2, extra=item.extra, foldername=scrapedtitle)) if not usuario and data.get("NextPageUrl"): url = item.extra + data["NextPageUrl"] itemlist.append(item.clone(title=">> Página Siguiente", url=url, text_color="")) elif index: itemlist.append(item.clone(title=">> Página Siguiente", url=item.url, index=index, text_color="")) return itemlist
def episodios(item): logger.info("[rtve.py] episodios") # En la paginación la URL vendrá fijada, si no se construye aquí la primera página if item.url=="": # El ID del programa está en item.extra (ej: 42610) # La URL de los vídeos de un programa es # http://www.rtve.es/alacarta/interno/contenttable.shtml?ctx=42610&pageSize=20&pbq=1 item.url = "http://www.rtve.es/alacarta/interno/contenttable.shtml?ctx="+item.extra+"&pageSize=20&pbq=1" data = scrapertools.cachePage(item.url) itemlist = [] # Extrae los vídeos patron = '<li class="[^"]+">.*?' patron += '<span class="col_tit"[^>]+>[^<]+' patron += '<a href="([^"]+)">(.*?)</a>[^<]+' patron += '</span>[^<]+' patron += '<span class="col_tip">([^<]+)</span>[^<]+' patron += '<span class="col_dur">([^<]+)</span>.*?' patron += '<span class="col_fec">([^<]+)</span>.*?' patron += '<span class="detalle">([^>]+)</span>' matches = re.findall(patron,data,re.DOTALL) if DEBUG: scrapertools.printMatches(matches) # Crea una lista con las entradas for match in matches: if not "developer" in config.get_platform(): scrapedtitle = match[1]+" ("+match[2].strip()+") ("+match[3].strip()+") ("+match[4]+")" else: scrapedtitle = match[1] scrapedtitle = scrapedtitle.replace("<em>Nuevo</em> ","") scrapedtitle = scrapertools.unescape(scrapedtitle) scrapedtitle = scrapedtitle.strip() scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = item.thumbnail scrapedplot = scrapertools.unescape(match[5].strip()) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() scrapedextra = match[2] if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , server="rtve" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , show=item.show, category = item.category, extra=scrapedextra, folder=False) ) if len(itemlist)>0: # Extrae la paginación patron = '<a name="paginaIR" href="([^"]+)"><span>Siguiente</span></a>' matches = re.findall(patron,data,re.DOTALL) if DEBUG: scrapertools.printMatches(matches) # Crea una lista con las entradas for match in matches: scrapedtitle = "!Página siguiente" scrapedurl = urlparse.urljoin(item.url,match).replace("&","&") #http://www.rtve.es/alacarta/interno/contenttable.shtml?pbq=2&modl=TOC&locale=es&pageSize=15&ctx=36850&advSearchOpen=false if not scrapedurl.endswith("&advSearchOpen=false"): scrapedurl = scrapedurl + "&advSearchOpen=false" scrapedthumbnail = "" scrapedplot = "" scrapedextra = item.extra if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="episodios" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , extra = scrapedextra, category = item.category, show=item.show) ) if (config.get_platform().startswith("xbmc") or config.get_platform().startswith("boxee")) and len(itemlist)>0: itemlist.append( Item(channel=item.channel, title=">> Opciones para esta serie", url=item.url, action="serie_options##episodios", thumbnail=item.thumbnail, extra = item.extra , show=item.show, folder=False)) else: # Extrae los vídeos patron = '<div class="mark"[^<]+' patron += '<a href="([^"]+)" title="([^"]+)"[^<]+' patron += '<span class="[^<]+' patron += '<img src="([^"]+)".*?' patron += '<div class="apiCall summary"[^<]+' patron += '<p[^<]+' patron += '<span class="time">([^<]+)</span[^<]+' patron += '<span class="date">([^<]+)</span>([^<]+)<' matches = re.findall(patron,data,re.DOTALL) if DEBUG: scrapertools.printMatches(matches) # Crea una lista con las entradas for scrapedurl,scrapedtitle,scrapedthumbnail,duracion,fecha,plot in matches: title = scrapedtitle+" ("+duracion+")("+fecha+")" url = urlparse.urljoin(item.url,scrapedurl) plot = plot thumbnail = scrapedthumbnail if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, title=title , action="play" , server="rtve" , url=url, thumbnail=thumbnail, plot=plot , show=item.show, category = item.category, fanart=thumbnail, viewmode="movie_with_plot", folder=False) ) if (config.get_platform().startswith("xbmc") or config.get_platform().startswith("boxee")) and len(itemlist)>0: itemlist.append( Item(channel=item.channel, title=">> Opciones para esta serie", url=item.url, action="serie_options##episodios", thumbnail=item.thumbnail, extra = item.extra , show=item.show, folder=False)) return itemlist
def lista(item): logger.info("pelisalacarta.channels.divxatope lista") itemlist = [] ''' <li style="width:136px;height:263px;margin:0px 15px 0px 0px;"> <a href="http://www.divxatope.com/descargar/374639_ahi-os-quedais-web-screener-r6-español-castellano-2014.html" title="Descargar Ahi Os Quedais Web en DVD-Screener torrent gratis"><div class='ribbon-estreno' ></div> <img class="torrent-image" src="http://www.divxatope.com/uploads/torrents/images/thumbnails2/6798_ahi--os--quedais.jpg" alt="Descargar Ahi Os Quedais Web en DVD-Screener torrent gratis" style="width:130px;height:184px;" /> <h2 style="float:left;width:100%;margin:3px 0px 0px 0px;padding:0px 0px 3px 0px;line-height:12px;font-size:12px;height:23px;border-bottom:solid 1px #C2D6DB;">Ahi Os Quedais Web </h2> <strong style="float:left;width:100%;text-align:center;color:#000;margin:0px;padding:3px 0px 0px 0px;font-size:11px;line-height:12px;">DVD-Screener<br>Español Castellano </strong> </a> </li> ''' # Descarga la pagina if item.extra == "": data = scrapertools.cachePage(item.url) else: data = scrapertools.cachePage(item.url, post=item.extra) #logger.info("data="+data) patron = '<li [^<]+' patron += '<a href="([^"]+)".*?' patron += '<img class="[^"]+" src="([^"]+)"[^<]+' patron += '<h2[^>]+">([^<]+)</h2[^<]+' patron += '<strong[^>]+>(.*?)</strong>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedthumbnail, scrapedtitle, calidad in matches: title = scrapedtitle.strip() + " (" + scrapertools.htmlclean( calidad) + ")" url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = "" if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvideos", title=title, fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, folder=True)) next_page_url = scrapertools.find_single_match( data, '<li><a href="([^"]+)">Next</a></li>') if next_page_url != "": itemlist.append( Item(channel=__channel__, action="lista", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page_url), folder=True)) else: next_page_url = scrapertools.find_single_match( data, '<li><input type="button" class="btn-submit" value="Siguiente" onClick="paginar..(\d+)' ) if next_page_url != "": itemlist.append( Item(channel=__channel__, action="lista", title=">> Página siguiente", url=item.url, extra=item.extra + "&pg=" + next_page_url, folder=True)) return itemlist
def episodios(item): logger.info("tvalacarta.channels.eltrece episodios") itemlist = [] ''' <div about="/la-noche-de-mirtha/programa-38_074529" typeof="sioc:Item foaf:Document" class="ds-1col node node--capitulo-completo view-mode-c13_capitulo_completo node--c13-capitulo-completo node--capitulo-completo--c13-capitulo-completo clearfix"> <figure data-desktop="217x122" data-tabletlandscape="217x122" data-tabletportrait="217x122" data-mobilelandscape="217x122" data-mobileportrait="217x122" alt="Programa 38 (10-01-15)" data-width="90" data-height="90" data-timestamp="1421945563" data-uri="public://2015/01/11/mirthascioli.jpg" class="field field--name-field-images field--type-image field--label-hidden" ><a href="/la-noche-de-mirtha/programa-38_074529" data-pagetype="capitulo_completo"><span class="hasvideo"></span><noscript><img src='public://styles/90x90/public/2015/01/11/mirthascioli.jpg?t=1421945563' width='90' height='90' alt='Programa 38 (10-01-15)' /></noscript></a><figcaption></figcaption></figure> <h2><a data-pagetype="capitulo_completo" href="/la-noche-de-mirtha/programa-38_074529">Programa 38 (10-01-15)</a></h2> <p>Invitados del programa de hoy: Daniel Scioli, Alejandra Maglietti, Facundo...</p></div> ''' # Descarga la página data = scrapertools.cache_page(item.url) item.url = urlparse.urljoin( item.url, scrapertools.find_single_match( data, 'href="(/[^\/]+/capitulos-completos)">Cap')) # Busca la opción de "Capítulos completos" data = scrapertools.cache_page(item.url) matches = re.compile('<figure(.*?)</div>', re.DOTALL).findall(data) for match in matches: logger.info("tvalacarta.channels.eltrece programas match=" + match) title = scrapertools.find_single_match( match, '<a data-pagetype="capitulo_completo" href="[^"]+">([^<]+)</a>') if title == "": title = scrapertools.find_single_match( match, "<figcaption>([^<]+)</figcaption>") if title == "": title = scrapertools.find_single_match(match, 'alt="([^"]+)"') title = scrapertools.htmlclean(title) url = urlparse.urljoin( item.url, scrapertools.find_single_match(match, 'a href="([^"]+)"')) thumbnail = scrapertools.find_single_match( match, 'data-uri="public\:\/\/([^"]+)"') thumbnail = "http://eltrecetv.cdncmd.com/sites/default/files/styles/298x168/public/" + thumbnail plot = scrapertools.find_single_match(match, '<p>([^<]+)</p>') if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") # Añade al listado itemlist.append( Item(channel=CHANNEL, action="play", server="eltrece", title=title, url=url, thumbnail=thumbnail, plot=plot, fanart=thumbnail, viewmode="movie_with_plot", folder=False)) # Paginación current_page = scrapertools.find_single_match(item.url, "page\=(\d+)") logger.info("tvalacarta.channels.eltrece programas current_page=" + current_page) if current_page == "": next_page_url = item.url + "?page=1" else: next_page_url = item.url.replace("page=" + current_page, "page=" + str(int(current_page) + 1)) logger.info("tvalacarta.channels.eltrece programas next_page_url=" + next_page_url) itemlist.append( Item(channel=CHANNEL, action="episodios", title=">> Página siguiente", url=next_page_url, folder=True)) return itemlist
def series(item): logger.info("pelisalacarta.channels.seriesflv series") # Descarga la pagina headers = DEFAULT_HEADERS[:] headers.append(["Referer", "http://www.seriesflv.net/series/"]) headers.append(["X-Requested-With", "XMLHttpRequest"]) post = item.extra data = scrapertools.cache_page(item.url, headers=headers, post=post) logger.info("data=" + data) # Extrae las entradas (carpetas) ''' <ul><li> <a href="http://www.seriesflv.net/serie/game-of-thrones.html" class="on over"> <div class="left"> <img src="http://http-s.ws/ysk/img/data/11a1a46bca5c4cca2cac0d0711225feb-size-90x120-a.jpg" width="50" height="60" /> Game of Thrones (Juego de tronos)</div> <div class="rigth over"> <div class="left op"> <span>4</span> <p>Temporadas</p> </div> ''' patron = '<a.*?href="([^"]+)"[^<]+' patron += '<div class="left"[^<]+' patron += '<img.*?src="([^"]+)"[^>]*>([^<]+)</div[^<]+' patron += '<div class="rigth over"[^<]+' patron += '<div class="left op"[^<]+' patron += '<span>([^<]+)</span' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedurl, scrapedthumbnail, scrapedtitle, numtemporadas in matches: title = scrapertools.htmlclean( scrapedtitle).strip() + " (" + numtemporadas + " temporadas)" thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = "" url = urlparse.urljoin(item.url, scrapedurl) itemlist.append( Item(channel=__channel__, action="episodios", title=title, url=url, thumbnail=thumbnail, plot=plot, show=title)) if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") #grupo_no=0&type=series&order=titulo old_offset = scrapertools.find_single_match(item.extra, "grupo_no\=(\d+)") new_offset = str(int(old_offset) + 1) newextra = item.extra.replace("grupo_no=" + old_offset, "grupo_no=" + new_offset) itemlist.append( Item(channel=__channel__, action="series", title=">> Página siguiente", extra=newextra, url=item.url)) return itemlist
def filmaffinity(item,infoLabels): title = infoLabels["title"].replace(" ", "+") try: year = infoLabels["year"] except: year="" sinopsis = infoLabels["sinopsis"] if year== "": if item.contentType!="movie": tipo = "serie" url_bing="http://www.bing.com/search?q=%s+Serie+de+tv+site:filmaffinity.com" % title else: tipo= "película" url_bing="http://www.bing.com/search?q=%s+site:filmaffinity.com" % title try: data = browser (url_bing) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) if "myaddrproxy.php" in data: subdata_bing = scrapertools.get_match(data,'li class="b_algo"><div class="b_title"><h2>(<a href="/myaddrproxy.php/http/www.filmaffinity.com/es/film.*?)"') subdata_bing = re.sub(r'\/myaddrproxy.php\/http\/','',subdata_bing) else: subdata_bing = scrapertools.get_match(data,'li class="b_algo"><h2>(<a href="http://www.filmaffinity.com/.*?/film.*?)"') url_filma = scrapertools.get_match(subdata_bing,'<a href="([^"]+)') if not "http" in url_filma: try: data=httptools.downloadpage("http://"+url_filma,cookies=False,timeout=1).data except: data=httptools.downloadpage("http://"+url_filma,cookies=False,timeout=1).data else: try: data=httptools.downloadpage(url_filma,cookies=False,timeout=1).data except: data=httptools.downloadpage(url_filma,cookies=False,timeout=1).data data = re.sub(r"\n|\r|\t|\s{2}| ","",data) except: pass else: tipo = "Pelicula" url = "http://www.filmaffinity.com/es/advsearch.php?stext={0}&stype%5B%5D=title&country=&genre=&fromyear={1}&toyear={1}".format(title, year) data = httptools.downloadpage(url,cookies=False).data url_filmaf = scrapertools.find_single_match(data, '<div class="mc-poster">\s*<a title="[^"]*" href="([^"]+)"') if url_filmaf: url_filmaf = "http://www.filmaffinity.com%s" % url_filmaf data=httptools.downloadpage(url_filmaf,cookies=False).data else : if item.contentType!="movie": tipo = "serie" url_bing="http://www.bing.com/search?q=%s+Serie+de+tv+site:filmaffinity.com" % title else: tipo= "película" url_bing="http://www.bing.com/search?q=%s+site:filmaffinity.com" % title try: data = browser (url_bing) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) if "myaddrproxy.php" in data: subdata_bing = scrapertools.get_match(data,'li class="b_algo"><div class="b_title"><h2>(<a href="/myaddrproxy.php/http/www.filmaffinity.com/es/film.*?)"') subdata_bing = re.sub(r'\/myaddrproxy.php\/http\/','',subdata_bing) else: subdata_bing = scrapertools.get_match(data,'li class="b_algo"><h2>(<a href="http://www.filmaffinity.com/.*?/film.*?)"') url_filma = scrapertools.get_match(subdata_bing,'<a href="([^"]+)') if not "http" in url_filma: data=httptools.downloadpage("http://"+url_filma,cookies=False).data else: data=httptools.downloadpage(url_filma,cookies=False).data data = re.sub(r"\n|\r|\t|\s{2}| ","",data) except: pass sinopsis_f = scrapertools.find_single_match(data, '<dd itemprop="description">(.*?)</dd>') sinopsis_f = sinopsis_f.replace("<br><br />", "\n") sinopsis_f =re.sub(r"\(FILMAFFINITY\)<br />","",sinopsis_f) try: year_f = scrapertools.get_match(data,'<dt>Año</dt>.*?>(\d+)</dd>') except: year_f= "" try: rating_filma=scrapertools.get_match(data,'itemprop="ratingValue" content="(.*?)">') except: rating_filma = "Sin puntuacion" critica="" patron = '<div itemprop="reviewBody">(.*?)</div>.*?itemprop="author">(.*?)\s*<i alt="([^"]+)"' matches_reviews = scrapertools.find_multiple_matches(data, patron) if matches_reviews: for review, autor, valoracion in matches_reviews: review = dhe(scrapertools.htmlclean(review)) review += "\n" + autor +"[CR]" review = re.sub(r'Puntuac.*?\)','',review) if "positiva" in valoracion: critica += "[COLOR green][B]%s[/B][/COLOR]\n" % review elif "neutral" in valoracion: critica += "[COLOR yellow][B]%s[/B][/COLOR]\n" % review else: critica += "[COLOR red][B]%s[/B][/COLOR]\n" % review else: critica = "[COLOR floralwhite][B]Esta %s no tiene críticas todavía...[/B][/COLOR]" % tipo return critica,rating_filma, year_f,sinopsis_f
# -*- coding: utf-8 -*-
def peliculas(item): logger.info("streamondemand.piratestreaming peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) # Extrae las entradas (carpetas) patron = '<div class="featuredItem">.*?<a href="([^"]+)".*?<img src="([^"]+)".*?<a href=[^>]*>(.*?)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedthumbnail, scrapedtitle in matches: scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle).strip() try: daa = scrapertools.cache_page(scrapedurl) da = daa.split('justify;">') da = da[1].split('</p>') scrapedplot = scrapertools.htmlclean(da[0]).strip() except: scrapedplot = "Trama non disponibile" if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") tmdbtitle1 = scrapedtitle.split("Sub ")[0] tmdbtitle = tmdbtitle1.split("(")[0] try: plot, fanart, poster, extrameta = info(tmdbtitle) itemlist.append( Item(channel=__channel__, thumbnail=poster, fanart=fanart if fanart != "" else poster, extrameta=extrameta, plot=str(plot), action="episodios" if item.extra == "serie" else "findvideos", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, fulltitle=scrapedtitle, show=scrapedtitle, folder=True)) except: itemlist.append( Item(channel=__channel__, action="episodios" if item.extra == "serie" else "findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Extrae el paginador patronvideos = '<td align="center">[^<]+</td>[^<]+<td align="center">\s*<a href="([^"]+)">[^<]+</a>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=__channel__, action="HomePage", title="[COLOR yellow]Torna Home[/COLOR]", folder=True)), itemlist.append( Item( channel=__channel__, action="peliculas", title="[COLOR orange]Successivo >>[/COLOR]", url=scrapedurl, thumbnail= "http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", folder=True)) return itemlist
def peliculas(item): logger.info("pelisalacarta.channels.elsenordelanillo peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) #logger.info("data="+data) # Extrae las entradas ''' <!--<pelicula>--> <li class="peli_bx br1px brdr10px ico_a"> <h2 class="titpeli bold ico_b"><a target="_top" href="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/pelicula/1077/el-jardinero-fiel.html" title="El Jardinero Fiel">El Jardinero Fiel</a></h2> <div class="peli_img p_relative"> <div class="peli_img_img"> <a target="_top" href="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/pelicula/1077/el-jardinero-fiel.html" title="El Jardinero Fiel"> <img src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/files/uploads/1077.jpg" alt="El Jardinero Fiel" /></a> </div> <div> <center><table border="5" bordercolor="#000000"><tr><td> <img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/lat.png"> </td><td> <img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/sub.png"> </td><td> <img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/no-cam.png"> </td><td> <img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/dvd.png"> </td><td> <img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/no-hd.png"> </td></tr></table></center> </div> <div class="peli_txt bgdeg8 brdr10px bxshd2 ico_b p_absolute pd15px white"> <div class="plt_tit bold fs14px mgbot10px"><h2 class="bold d_inline fs14px"><font color="black"><b>El Jardinero Fiel</b></font></h2></div> <div class="plt_ft clf mgtop10px"> <div class="stars f_left pdtop10px"><strong>Genero</strong>: Suspenso, Drama, 2005</div> <br><br> <div class="stars f_left pdtop10px"><table><tr><td><strong>Idioma</strong>:</td><td><img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/lat.png"></td><td><img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/sub.png"></td></tr></table></div> <br /><br /> <div class="stars f_left pdtop10px"><table><tr><td><strong>Calidad</strong>:</td><td><img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/no-cam.png"></td><td><img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/dvd.png"></td><td><img width="26" heigth="17" src="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/Temas/default/img/idioma/no-hd.png"></td></tr></table></div> <br /><br> <div class="stars f_left pdtop10px"><strong>Visualizada</strong>: 629 Veces</div> <a target="_top" class="vrfich bold ico f_right" href="http://www.xn--elseordelanillo-1qb.com/pelisdelanillo/pelicula/1077/el-jardinero-fiel.html" title=""></a> </div> </div> </div> </li> <!--</pelicula>--> ''' patronbloque = "<!--<pelicula>--[^<]+<li(.*?)</li>" bloques = re.compile(patronbloque,re.DOTALL).findall(data) for bloque in bloques: scrapedurl = scrapertools.find_single_match(bloque,'<a.*?href="([^"]+)"') scrapedtitle = scrapertools.find_single_match(bloque,'<a.*?title="([^"]+)"') scrapedthumbnail = scrapertools.find_single_match(bloque,'<img src="([^"]+)"') title = unicode( scrapedtitle, "iso-8859-1" , errors="replace" ).encode("utf-8") title = title.strip() title = scrapertools.htmlclean(title) thumbnail = urlparse.urljoin(item.url,scrapedthumbnail) plot = "" url = urlparse.urljoin(item.url,scrapedurl) if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos" , title=title , url=url, thumbnail=thumbnail, plot=plot, fulltitle=title, viewmode="movie")) #</b></span></a></li[^<]+<li><a href="?page=2"> next_page = scrapertools.find_single_match(data,'</b></span></a></li[^<]+<li><a target="_top" href="([^"]+)">') if next_page!="": itemlist.append( Item(channel=__channel__, action="peliculas" , title=">> Página siguiente" , url=item.url+next_page, folder=True)) return itemlist
def findvideos(item): logger.info() if item.contentSeason != '': return episode_links(item) itemlist = [] item.text_color = color3 data = get_data(item.url) item.plot = scrapertools.find_single_match(data, 'SINOPSIS(?:</span>|</strong>):(.*?)</p>') year = scrapertools.find_single_match(data, '(?:<span class="bold">|<strong>)AÑO(?:</span>|</strong>):\s*(\d+)') if year: try: from core import tmdb item.infoLabels['year'] = year tmdb.set_infoLabels_item(item, __modo_grafico__) except: pass old_format = False # Patron torrent antiguo formato if "Enlaces de descarga</div>" in data: old_format = True matches = scrapertools.find_multiple_matches(data, 'class="separate3 magnet".*?href="([^"]+)"') for scrapedurl in matches: scrapedurl = scrapertools.find_single_match(scrapedurl, '(magnet.*)') scrapedurl = urllib.unquote(re.sub(r'&b=4', '', scrapedurl)) title = "[Torrent] " title += urllib.unquote(scrapertools.find_single_match(scrapedurl, 'dn=(.*?)(?i)WWW.DescargasMix')) itemlist.append(item.clone(action="play", server="torrent", title=title, url=scrapedurl, text_color="green")) # Patron online data_online = scrapertools.find_single_match(data, 'Ver online</div>(.*?)<div class="section-box related-posts">') if data_online: title = "Enlaces Online" if '"l-latino2"' in data_online: title += " [LAT]" elif '"l-esp2"' in data_online: title += " [ESP]" elif '"l-vose2"' in data_online: title += " [VOSE]" patron = 'make_links.*?,[\'"]([^"\']+)["\']' matches = scrapertools.find_multiple_matches(data_online, patron) for i, code in enumerate(matches): enlace = show_links(code) links = servertools.findvideos(data=enlace[0]) if links and "peliculas.nu" not in links: if i == 0: extra_info = scrapertools.find_single_match(data_online, '<span class="tooltiptext">(.*?)</span>') size = scrapertools.find_single_match(data_online, '(?i)TAMAÑO:\s*(.*?)<').strip() if size: title += " [%s]" % size new_item = item.clone(title=title, action="", text_color=color1) if extra_info: extra_info = scrapertools.htmlclean(extra_info) new_item.infoLabels["plot"] = extra_info new_item.title += " +INFO" itemlist.append(new_item) title = " Ver vídeo en " + links[0][2] itemlist.append(item.clone(action="play", server=links[0][2], title=title, url=links[0][1])) scriptg = scrapertools.find_single_match(data, "<script type='text/javascript'>str='([^']+)'") if scriptg: gvideo = urllib.unquote_plus(scriptg.replace("@", "%")) url = scrapertools.find_single_match(gvideo, 'src="([^"]+)"') if url: itemlist.append(item.clone(action="play", server="directo", url=url, extra=item.url, title=" Ver vídeo en Googlevideo (Máxima calidad)")) # Patron descarga patron = '<div class="(?:floatLeft |)double(?:nuevo|)">(.*?)</div>(.*?)' \ '(?:<div(?: id="mirrors"|) class="(?:contentModuleSmall |)mirrors">|<div class="section-box related-' \ 'posts">)' bloques_descarga = scrapertools.find_multiple_matches(data, patron) for title_bloque, bloque in bloques_descarga: if title_bloque == "Ver online": continue if '"l-latino2"' in bloque: title_bloque += " [LAT]" elif '"l-esp2"' in bloque: title_bloque += " [ESP]" elif '"l-vose2"' in bloque: title_bloque += " [VOSE]" extra_info = scrapertools.find_single_match(bloque, '<span class="tooltiptext">(.*?)</span>') size = scrapertools.find_single_match(bloque, '(?i)TAMAÑO:\s*(.*?)<').strip() if size: title_bloque += " [%s]" % size new_item = item.clone(title=title_bloque, action="", text_color=color1) if extra_info: extra_info = scrapertools.htmlclean(extra_info) new_item.infoLabels["plot"] = extra_info new_item.title += " +INFO" itemlist.append(new_item) if '<div class="subiendo">' in bloque: itemlist.append(item.clone(title=" Los enlaces se están subiendo", action="")) continue patron = 'class="separate.*? ([^"]+)".*?(?:make_links.*?,|href=)[\'"]([^"\']+)["\']' matches = scrapertools.find_multiple_matches(bloque, patron) for scrapedserver, scrapedurl in matches: if (scrapedserver == "ul") | (scrapedserver == "uploaded"): scrapedserver = "uploadedto" titulo = unicode(scrapedserver, "utf-8").capitalize().encode("utf-8") if titulo == "Magnet" and old_format: continue elif titulo == "Magnet" and not old_format: title = " Enlace Torrent" scrapedurl = scrapertools.find_single_match(scrapedurl, '(magnet.*)') scrapedurl = urllib.unquote(re.sub(r'&b=4', '', scrapedurl)) itemlist.append(item.clone(action="play", server="torrent", title=title, url=scrapedurl, text_color="green")) continue if servertools.is_server_enabled(scrapedserver): try: # servers_module = __import__("servers." + scrapedserver) # Saca numero de enlaces urls = show_links(scrapedurl) numero = str(len(urls)) titulo = " %s - Nº enlaces: %s" % (titulo, numero) itemlist.append(item.clone(action="enlaces", title=titulo, extra=scrapedurl, server=scrapedserver)) except: pass itemlist.append(item.clone(channel="trailertools", title="Buscar Tráiler", action="buscartrailer", context="", text_color="magenta")) if item.extra != "findvideos" and config.get_videolibrary_support(): itemlist.append(Item(channel=item.channel, title="Añadir a la videoteca", action="add_pelicula_to_library", extra="findvideos", url=item.url, infoLabels={'title': item.fulltitle}, fulltitle=item.fulltitle, text_color="green")) return itemlist